File size: 15,667 Bytes
4064f94
 
 
 
 
87de7de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f978d71
 
 
 
 
 
 
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f978d71
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f978d71
 
 
 
 
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f978d71
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45e871d
 
e1e407e
 
 
4064f94
e1e407e
 
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
a4cc6e6
 
4064f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45e871d
bcbd74b
45e871d
08cb016
e3610a1
4064f94
e3610a1
 
4064f94
08cb016
4064f94
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import gradio as gr
import json
import pandas as pd
import numpy as np

# Function to load and display PNG logo
def load_png_as_logo():
    try:
        import base64
        with open('racine.png', 'rb') as f:
            png_data = base64.b64encode(f.read()).decode('utf-8')
            # Return an img tag with the logo
            return f'''<img src="data:image/png;base64,{png_data}" style="width: 120px; height: auto;" />'''
    except FileNotFoundError:
        print("Warning: racine.png file not found")
        return "<!-- PNG file not found -->"
    except Exception as e:
        print(f"Error loading PNG: {e}")
        return "<!-- Error loading PNG -->"

# Load the scores from JSON file
def load_scores():
    with open('scores.json', 'r') as f:
        return json.load(f)

# Function to create dataframe for a specific language and sector filter
def create_language_df(scores, language, sector_filter='all'):
    models = list(scores.keys())
    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
    
    if sector_filter == 'en_only':
        selected_sectors = sectors_en
    elif sector_filter == 'fr_only':
        selected_sectors = sectors_fr
    else:  # 'all'
        selected_sectors = sectors_en + sectors_fr
    
    data = []
    for model in models:
        row = {'Model': model}

        # Check if license info exists in JSON, otherwise default logic
        if 'license' in scores[model]:
            row['License'] = scores[model]['license']
        else:
            row['License'] = "Apache 2.0"
            if "jina" in model.lower():
                row['License'] = "Qwen Research License"
        
        # Add origin information (for styling)
        if 'origin' in scores[model]:
            row['origin'] = scores[model]['origin']
        else:
            row['origin'] = 'CN'  # Default to Chinese if not specified
            
        # Special handling for AMPERE-1 model
        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
            row['coming_soon'] = True
            # Fill all sector values with empty strings
            for sector in selected_sectors:
                row[sector] = ""
            row['Average'] = ""
            # Add sort value for correct ordering
            row['sort_value'] = float('inf')  # Place at the top when sorting
        else:
            row['coming_soon'] = False
            # Add sector scores
            sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
            row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
            
            # Calculate and add average score
            avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
            row['Average'] = f"{avg_score:.3f}"
            # Add sort value for correct ordering
            row['sort_value'] = avg_score
        
        data.append(row)
    
    df = pd.DataFrame(data)
    
    # Sort by the sort_value
    df = df.sort_values('sort_value', ascending=False)
    
    # Remove the sort column
    df = df.drop('sort_value', axis=1)
    
    # Move Average column to be the second column (right after Model)
    cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
    # Add hidden columns at the end
    if 'origin' in df.columns:
        cols.append('origin')
    if 'coming_soon' in df.columns:
        cols.append('coming_soon')
    
    df = df[cols]
    
    return df

def create_average_language_df(scores):
    models = list(scores.keys())
    languages = ['en', 'fr', 'es', 'de', 'it']
    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
    all_sectors = sectors_en + sectors_fr
    
    data = []
    for model in models:
        row = {'Model': model}
        
        # Check if license info exists in JSON, otherwise default logic
        if 'License' in scores[model]:
            row['License'] = scores[model]['License']
        else:
            row['License'] = "N/A"
        
        # Add origin information (for styling)
        if 'origin' in scores[model]:
            row['origin'] = scores[model]['origin']
        else:
            row['origin'] = 'CN'  # Default to Chinese if not specified
        
        # Special handling for AMPERE-1 model
        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
            row['coming_soon'] = True
            # Fill all sector values with empty strings
            for sector in all_sectors:
                row[sector] = ""
            row['Average'] = ""
            # Add sort value for correct ordering
            row['sort_value'] = float('inf')  # Place at the top when sorting
        else:
            row['coming_soon'] = False
            # Calculate average for each sector across all languages
            for sector in all_sectors:
                sector_scores = [scores[model][sector][lang] for lang in languages]
                sector_avg = np.mean(sector_scores)
                row[sector] = f"{sector_avg:.3f}"
            
            # Calculate overall average across all sectors
            sector_values = [float(row[sector]) for sector in all_sectors]
            avg_value = np.mean(sector_values) if sector_values else 0
            row['Average'] = f"{avg_value:.3f}"
            # Add sort value for correct ordering
            row['sort_value'] = avg_value
        
        data.append(row)
    
    df = pd.DataFrame(data)
    
    # Sort by the sort_value
    df = df.sort_values('sort_value', ascending=False)
    
    # Remove the sort column
    df = df.drop('sort_value', axis=1)
    
    # Move Average column to be the third column, with License second
    cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
    # Add hidden columns at the end
    if 'origin' in df.columns:
        cols.append('origin')
    if 'coming_soon' in df.columns:
        cols.append('coming_soon')
    
    df = df[cols]
    
    return df

def create_leaderboard():
    scores = load_scores()
    languages = {
        'en': 'English',
        'fr': 'French',
        'es': 'Spanish',
        'de': 'German',
        'it': 'Italian'
    }
    
    with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard", 
                theme='argilla/argilla-theme') as demo:
        
        # Header section with Racine.ai and title
        gr.HTML("""
            <div style="padding: 2em 2em 1em 2em; text-align: center;">
                <div style="font-size: 1.5em; font-weight: 600; color: #001f3f; margin-bottom: 0.5em;">
                    Racine.ai
                </div>
                <h1 style="font-size: 2.5em; font-weight: bold; margin: 0; color: #001f3f;">
                    Open VLM Retrieval Leaderboard
                </h1>
            </div>
            """)
        
        gr.Markdown("""
        This leaderboard presents the performance of various visual embedding models across different business sectors 
        and languages. The evaluation is based on retrieval accuracy for visual search tasks.
        
        ## Structure
        - **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
        - **Models**: Each row shows a different model's performance
        - **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
        - **Average**: Overall mean performance across all sectors for each model
        - **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models

        The leaderboard was created in collaboration with the <em>Intelligence Lab</em> of the <em>ECE - Ecole centrale d'électronique</em>.
        """)
        
        # Info box with custom styling
        gr.Markdown("""
        ### How to Read the Results
        - Select a language tab to see how models perform with queries in that language
        - All scores are normalized retrieval accuracy metrics
        - Background colors indicate model origins (Blue = EU, Red = Chinese)
        """)
        
        # Custom CSS for styling tables
        gr.HTML("""
        <style>
        table.gradio-dataframe tr[data-origin="EU"] {
            background-color: rgba(0, 0, 255, 0.2) !important;
        }
        table.gradio-dataframe tr[data-origin="CN"] {
            background-color: rgba(255, 0, 0, 0.2) !important;
        }
        </style>
        """)
        
        # Tabs section
        with gr.Tabs() as tabs:
            # Add Average Languages tab first
            with gr.Tab("Average Across Languages"):
                gr.Markdown("""
                ### Average Performance Across Languages
                This table shows the average performance of each model for each sector,
                averaged across all query languages.
                """)
                
                # Get the dataframe for average across languages
                avg_df = create_average_language_df(scores)
                
                # Create HTML for the colored table
                html_table = "<table class='gradio-dataframe'><thead><tr>"
                
                # Add headers
                for col in avg_df.columns:
                    if col not in ['origin', 'coming_soon']:
                        html_table += f"<th>{col}</th>"
                
                html_table += "</tr></thead><tbody>"
                
                # Add rows with appropriate background colors
                for _, row in avg_df.iterrows():
                    origin = row['origin'] if 'origin' in row else 'CN'
                    coming_soon = row.get('coming_soon', False)
                    html_table += f"<tr data-origin='{origin}'>"
                    
                    for col in avg_df.columns:
                        if col not in ['origin', 'coming_soon']:
                            if coming_soon and col != 'Model':
                                if col == 'Average':
                                    # Add "Coming Soon" text in italics
                                    html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
                                else:
                                    html_table += "<td></td>"
                            else:
                                html_table += f"<td>{row[col]}</td>"
                    
                    html_table += "</tr>"
                
                html_table += "</tbody></table>"
                
                gr.HTML(html_table)
                
                # Add color legend
                gr.HTML("""
                <div style="margin-top: 20px; margin-bottom: 40px;">
                    <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
                    <div style="display: flex; align-items: center; margin-bottom: 8px;">
                        <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                        <div>European Union</div>
                    </div>
                    <div style="display: flex; align-items: center;">
                        <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                        <div>China</div>
                    </div>
                </div>
                """)
            
            # Individual language tabs
            for lang_code, lang_name in languages.items():
                with gr.Tab(f"{lang_name} Queries"):
                    gr.Markdown(f"""
                    ### Performance with {lang_name} Queries
                    The table below shows how each model performs when the search queries are in {lang_name}.
                    """)
                    
                    # Get the dataframe for this language
                    lang_df = create_language_df(scores, lang_code, 'all')
                    
                    # Create HTML for the colored table
                    html_table = "<table class='gradio-dataframe'><thead><tr>"
                    
                    # Add headers
                    for col in lang_df.columns:
                        if col not in ['origin', 'coming_soon']:
                            html_table += f"<th>{col}</th>"
                    
                    html_table += "</tr></thead><tbody>"
                    
                    # Add rows with appropriate background colors
                    for _, row in lang_df.iterrows():
                        origin = row['origin'] if 'origin' in row else 'CN'
                        coming_soon = row.get('coming_soon', False)
                        html_table += f"<tr data-origin='{origin}'>"
                        
                        for col in lang_df.columns:
                            if col not in ['origin', 'coming_soon']:
                                if coming_soon and col != 'Model':
                                    if col == 'Average':
                                        # Add "Coming Soon" text in italics
                                        html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
                                    else:
                                        html_table += "<td></td>"
                                else:
                                    html_table += f"<td>{row[col]}</td>"
                        
                        html_table += "</tr>"
                    
                    html_table += "</tbody></table>"
                    
                    gr.HTML(html_table)
                    
                    # Add color legend
                    gr.HTML("""
                    <div style="margin-top: 20px; margin-bottom: 40px;">
                        <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
                        <div style="display: flex; align-items: center; margin-bottom: 8px;">
                            <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                            <div>European Union</div>
                        </div>
                        <div style="display: flex; align-items: center;">
                            <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                            <div>China</div>
                        </div>
                    </div>
                    """)
        
        # Footer section - Only citation
        gr.Markdown("""   
        If you use these benchmarks in your research, please cite:
```
        @article{visual_embeddings_benchmark_2025,
            title={Cross-lingual Visual Embeddings Benchmark},
            author={racine.ai},
            year={2025}
        }
```
        """)
    
    return demo

# Create and launch the interface
if __name__ == "__main__":
    demo = create_leaderboard()
    demo.launch()