Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import pandas as pd | |
| import sys | |
| # Add the path to api-results.py | |
| sys.path.append(os.path.abspath('data/api-results')) | |
| # Now import the API results | |
| from api_results import gpt4, gpt4o, gpt35turbo, claude_opus, gemini_15_pro, gemini_pro_1, gemini_15_flash | |
| from models_info import model_info | |
| directory = 'data/raw-eval-outputs' | |
| data = [] | |
| # Function to create a clickable hyperlink for the model name | |
| def model_hyperlink(link, model_name): | |
| return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
| # Function to apply the hyperlink creation function to the DataFrame | |
| def make_clickable_names(df): | |
| df["Model"] = df.apply( | |
| lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1 | |
| ) | |
| return df | |
| # Iterate over all the files in the directory | |
| for filename in os.listdir(directory): | |
| if filename.endswith(".json"): | |
| filepath = os.path.join(directory, filename) | |
| with open(filepath, 'r') as f: | |
| json_data = json.load(f) | |
| model_name = filename.replace("_results.json", "") | |
| # Extract the accuracy values | |
| results = json_data['results'] | |
| row = {'Model': model_name} | |
| for key, value in results.items(): | |
| row[key] = round(value['acc,none'] * 100, 2) | |
| # Add the tuning type and link to the row | |
| row['T'] = model_info[model_name]['tuning'] | |
| row['Link'] = model_info[model_name]['link'] | |
| data.append(row) | |
| # Prepare the API results for integration | |
| api_models = { | |
| 'GPT-4': gpt4, | |
| 'GPT-4o': gpt4o, | |
| 'GPT-3.5 Turbo': gpt35turbo, | |
| 'Claude Opus': claude_opus, | |
| 'Gemini 1.5 Pro': gemini_15_pro, | |
| 'Gemini Pro 1': gemini_pro_1, | |
| 'Gemini 1.5 Flash': gemini_15_flash | |
| } | |
| for model_name, results in api_models.items(): | |
| row = { | |
| 'Model': model_name, | |
| 'b4bqa': round(results.get('b4bqa', 0) * 100, 2), | |
| 'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2), | |
| 'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2), | |
| 'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2), | |
| 'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2), | |
| 'T': model_info[model_name]['tuning'], | |
| 'Link': model_info[model_name]['link'] | |
| } | |
| data.append(row) | |
| # Create DataFrame from the collected data | |
| df = pd.DataFrame(data) | |
| df = make_clickable_names(df) | |
| df.drop(columns=["Link"], inplace=True) | |
| # Calculate differences between specific evaluation metrics | |
| df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2) | |
| df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2) | |
| # Reorder columns | |
| cols = [ | |
| "T", | |
| "Model", | |
| "b4bqa", | |
| "b4b", | |
| "medmcqa_g2b", | |
| "medmcqa_orig_filtered", | |
| "medmcqa_diff", | |
| "medqa_4options_g2b", | |
| "medqa_4options_orig_filtered", | |
| "medqa_diff" | |
| ] + [col for col in df.columns if col not in [ | |
| "T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff" | |
| ]] | |
| df = df[cols] | |
| # Save DataFrame to CSV | |
| output_csv = 'data/csv/models_data.csv' | |
| df.to_csv(output_csv, index=False) | |
| print(f"DataFrame saved to {output_csv}") | |