Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/__pycache__/draw_diagram.cpython-310.pyc +0 -0
- app/__pycache__/pages.cpython-310.pyc +0 -0
- app/draw_diagram.py +1 -17
- app/pages.py +2 -5
app/__pycache__/draw_diagram.cpython-310.pyc
CHANGED
|
Binary files a/app/__pycache__/draw_diagram.cpython-310.pyc and b/app/__pycache__/draw_diagram.cpython-310.pyc differ
|
|
|
app/__pycache__/pages.cpython-310.pyc
CHANGED
|
Binary files a/app/__pycache__/pages.cpython-310.pyc and b/app/__pycache__/pages.cpython-310.pyc differ
|
|
|
app/draw_diagram.py
CHANGED
|
@@ -65,22 +65,6 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
| 65 |
min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
|
| 66 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
| 67 |
|
| 68 |
-
display_names = {
|
| 69 |
-
'cross_mmlu' : 'Cross-MMLU',
|
| 70 |
-
'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
|
| 71 |
-
'cross_logiqa' : 'Cross-LogiQA',
|
| 72 |
-
'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
|
| 73 |
-
'cross_xquad' : 'Cross-XQUAD',
|
| 74 |
-
'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
|
| 75 |
-
'sg_eval' : 'SG EVAL',
|
| 76 |
-
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
| 77 |
-
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
| 78 |
-
'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
|
| 79 |
-
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
| 80 |
-
'us_eval' : 'US EVAL',
|
| 81 |
-
'cn_eval' : 'CN EVAL',
|
| 82 |
-
'ph_eval' : 'PH EVAL'
|
| 83 |
-
}
|
| 84 |
|
| 85 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
| 86 |
|
|
@@ -109,7 +93,7 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
| 109 |
chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
|
| 110 |
}
|
| 111 |
).highlight_max(
|
| 112 |
-
subset=[chart_data_table.columns[2]], color='
|
| 113 |
)
|
| 114 |
|
| 115 |
st.dataframe(
|
|
|
|
| 65 |
min_value = round(min(chart_data.iloc[:, 1]) - 0.1*min(chart_data.iloc[:, 1]), 1)
|
| 66 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
| 70 |
|
|
|
|
| 93 |
chart_data_table.columns[i]: "{:.3f}" for i in range(2, len(chart_data_table.columns))
|
| 94 |
}
|
| 95 |
).highlight_max(
|
| 96 |
+
subset=[chart_data_table.columns[2]], color='#b0c1d7',
|
| 97 |
)
|
| 98 |
|
| 99 |
st.dataframe(
|
app/pages.py
CHANGED
|
@@ -130,8 +130,6 @@ def cultural_reasoning():
|
|
| 130 |
filters_leveltwo = [
|
| 131 |
'SG-EVAL-v2-MCQ',
|
| 132 |
'SG-EVAL-v2-Open-Ended',
|
| 133 |
-
'SG-EVAL-v1-Cleaned',
|
| 134 |
-
'SG-EVAL-v1',
|
| 135 |
'CN-EVAL',
|
| 136 |
'PH-EVAL',
|
| 137 |
'US-EVAL'
|
|
@@ -143,8 +141,6 @@ def cultural_reasoning():
|
|
| 143 |
|
| 144 |
category_two_dict = {
|
| 145 |
'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
|
| 146 |
-
'SG-EVAL-v1' : 'sg_eval',
|
| 147 |
-
'SG-EVAL-v1-Cleaned' : 'sg_eval_v1_cleaned',
|
| 148 |
'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
|
| 149 |
'US-EVAL' : 'us_eval',
|
| 150 |
'CN-EVAL' : 'cn_eval',
|
|
@@ -171,6 +167,7 @@ def general_reasoning():
|
|
| 171 |
st.title("Task: General Reasoning")
|
| 172 |
|
| 173 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
|
|
|
| 174 |
filters_leveltwo = [
|
| 175 |
'IndoMMLU',
|
| 176 |
'MMLU',
|
|
@@ -188,7 +185,7 @@ def general_reasoning():
|
|
| 188 |
'IndoMMLU': 'indommlu_no_prompt',
|
| 189 |
'MMLU' : 'mmlu_no_prompt',
|
| 190 |
'C-Eval' : 'c_eval',
|
| 191 |
-
'CMMLU' : '
|
| 192 |
'ZBench' : 'zbench',
|
| 193 |
}
|
| 194 |
|
|
|
|
| 130 |
filters_leveltwo = [
|
| 131 |
'SG-EVAL-v2-MCQ',
|
| 132 |
'SG-EVAL-v2-Open-Ended',
|
|
|
|
|
|
|
| 133 |
'CN-EVAL',
|
| 134 |
'PH-EVAL',
|
| 135 |
'US-EVAL'
|
|
|
|
| 141 |
|
| 142 |
category_two_dict = {
|
| 143 |
'SG-EVAL-v2-MCQ' : 'sg_eval_v2_mcq_no_prompt',
|
|
|
|
|
|
|
| 144 |
'SG-EVAL-v2-Open-Ended' : 'sg_eval_v2_open',
|
| 145 |
'US-EVAL' : 'us_eval',
|
| 146 |
'CN-EVAL' : 'cn_eval',
|
|
|
|
| 167 |
st.title("Task: General Reasoning")
|
| 168 |
|
| 169 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
| 170 |
+
|
| 171 |
filters_leveltwo = [
|
| 172 |
'IndoMMLU',
|
| 173 |
'MMLU',
|
|
|
|
| 185 |
'IndoMMLU': 'indommlu_no_prompt',
|
| 186 |
'MMLU' : 'mmlu_no_prompt',
|
| 187 |
'C-Eval' : 'c_eval',
|
| 188 |
+
'CMMLU' : 'cmmlu_no_prompt',
|
| 189 |
'ZBench' : 'zbench',
|
| 190 |
}
|
| 191 |
|