Spaces:

racineai
/

Open-VLM-Retrieval-Leaderboard

Running

File size: 15,667 Bytes

import gradio as gr
import json
import pandas as pd
import numpy as np

# Function to load and display PNG logo
def load_png_as_logo():
    try:
        import base64
        with open('racine.png', 'rb') as f:
            png_data = base64.b64encode(f.read()).decode('utf-8')
            # Return an img tag with the logo
            return f'''<img src="data:image/png;base64,{png_data}" style="width: 120px; height: auto;" />'''
    except FileNotFoundError:
        print("Warning: racine.png file not found")
        return "<!-- PNG file not found -->"
    except Exception as e:
        print(f"Error loading PNG: {e}")
        return "<!-- Error loading PNG -->"

# Load the scores from JSON file
def load_scores():
    with open('scores.json', 'r') as f:
        return json.load(f)

# Function to create dataframe for a specific language and sector filter
def create_language_df(scores, language, sector_filter='all'):
    models = list(scores.keys())
    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
    
    if sector_filter == 'en_only':
        selected_sectors = sectors_en
    elif sector_filter == 'fr_only':
        selected_sectors = sectors_fr
    else:  # 'all'
        selected_sectors = sectors_en + sectors_fr
    
    data = []
    for model in models:
        row = {'Model': model}

        # Check if license info exists in JSON, otherwise default logic
        if 'license' in scores[model]:
            row['License'] = scores[model]['license']
        else:
            row['License'] = "Apache 2.0"
            if "jina" in model.lower():
                row['License'] = "Qwen Research License"
        
        # Add origin information (for styling)
        if 'origin' in scores[model]:
            row['origin'] = scores[model]['origin']
        else:
            row['origin'] = 'CN'  # Default to Chinese if not specified
            
        # Special handling for AMPERE-1 model
        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
            row['coming_soon'] = True
            # Fill all sector values with empty strings
            for sector in selected_sectors:
                row[sector] = ""
            row['Average'] = ""
            # Add sort value for correct ordering
            row['sort_value'] = float('inf')  # Place at the top when sorting
        else:
            row['coming_soon'] = False
            # Add sector scores
            sector_scores = {sector: scores[model][sector][language] for sector in selected_sectors}
            row.update({sector: f"{score:.3f}" for sector, score in sector_scores.items()})
            
            # Calculate and add average score
            avg_score = sum(float(value) for value in sector_scores.values()) / len(sector_scores)
            row['Average'] = f"{avg_score:.3f}"
            # Add sort value for correct ordering
            row['sort_value'] = avg_score
        
        data.append(row)
    
    df = pd.DataFrame(data)
    
    # Sort by the sort_value
    df = df.sort_values('sort_value', ascending=False)
    
    # Remove the sort column
    df = df.drop('sort_value', axis=1)
    
    # Move Average column to be the second column (right after Model)
    cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
    # Add hidden columns at the end
    if 'origin' in df.columns:
        cols.append('origin')
    if 'coming_soon' in df.columns:
        cols.append('coming_soon')
    
    df = df[cols]
    
    return df

def create_average_language_df(scores):
    models = list(scores.keys())
    languages = ['en', 'fr', 'es', 'de', 'it']
    sectors_en = [col for col in scores[models[0]] if col.endswith('_EN') and col != 'origin']
    sectors_fr = [col for col in scores[models[0]] if col.endswith('_FR') and col != 'origin']
    all_sectors = sectors_en + sectors_fr
    
    data = []
    for model in models:
        row = {'Model': model}
        
        # Check if license info exists in JSON, otherwise default logic
        if 'License' in scores[model]:
            row['License'] = scores[model]['License']
        else:
            row['License'] = "N/A"
        
        # Add origin information (for styling)
        if 'origin' in scores[model]:
            row['origin'] = scores[model]['origin']
        else:
            row['origin'] = 'CN'  # Default to Chinese if not specified
        
        # Special handling for AMPERE-1 model
        if "AMPERE-1" in model and "AMPERE-1.1" not in model:  # Exclude AMPERE-1.1
            row['coming_soon'] = True
            # Fill all sector values with empty strings
            for sector in all_sectors:
                row[sector] = ""
            row['Average'] = ""
            # Add sort value for correct ordering
            row['sort_value'] = float('inf')  # Place at the top when sorting
        else:
            row['coming_soon'] = False
            # Calculate average for each sector across all languages
            for sector in all_sectors:
                sector_scores = [scores[model][sector][lang] for lang in languages]
                sector_avg = np.mean(sector_scores)
                row[sector] = f"{sector_avg:.3f}"
            
            # Calculate overall average across all sectors
            sector_values = [float(row[sector]) for sector in all_sectors]
            avg_value = np.mean(sector_values) if sector_values else 0
            row['Average'] = f"{avg_value:.3f}"
            # Add sort value for correct ordering
            row['sort_value'] = avg_value
        
        data.append(row)
    
    df = pd.DataFrame(data)
    
    # Sort by the sort_value
    df = df.sort_values('sort_value', ascending=False)
    
    # Remove the sort column
    df = df.drop('sort_value', axis=1)
    
    # Move Average column to be the third column, with License second
    cols = ['Model', 'License', 'Average'] + [col for col in df.columns if col not in ['Model', 'License', 'Average', 'origin', 'coming_soon']]
    # Add hidden columns at the end
    if 'origin' in df.columns:
        cols.append('origin')
    if 'coming_soon' in df.columns:
        cols.append('coming_soon')
    
    df = df[cols]
    
    return df

def create_leaderboard():
    scores = load_scores()
    languages = {
        'en': 'English',
        'fr': 'French',
        'es': 'Spanish',
        'de': 'German',
        'it': 'Italian'
    }
    
    with gr.Blocks(title="Visual Embeddings Retrieval Leaderboard", 
                theme='argilla/argilla-theme') as demo:
        
        # Header section with Racine.ai and title
        gr.HTML("""
            <div style="padding: 2em 2em 1em 2em; text-align: center;">
                <div style="font-size: 1.5em; font-weight: 600; color: #001f3f; margin-bottom: 0.5em;">
                    Racine.ai
                </div>
                <h1 style="font-size: 2.5em; font-weight: bold; margin: 0; color: #001f3f;">
                    Open VLM Retrieval Leaderboard
                </h1>
            </div>
            """)
        
        gr.Markdown("""
        This leaderboard presents the performance of various visual embedding models across different business sectors 
        and languages. The evaluation is based on retrieval accuracy for visual search tasks.
        
        ## Structure
        - **Sectors**: Each column represents a different business sector (e.g., Energy, Education) with documents in either English (_EN) or French (_FR)
        - **Models**: Each row shows a different model's performance
        - **Scores**: Values range from 0 to 1, where higher is better (1.000 being perfect retrieval)
        - **Average**: Overall mean performance across all sectors for each model
        - **Colors**: Blue backgrounds indicate EU models, red backgrounds indicate Chinese models

        The leaderboard was created in collaboration with the <em>Intelligence Lab</em> of the <em>ECE - Ecole centrale d'électronique</em>.
        """)
        
        # Info box with custom styling
        gr.Markdown("""
        ### How to Read the Results
        - Select a language tab to see how models perform with queries in that language
        - All scores are normalized retrieval accuracy metrics
        - Background colors indicate model origins (Blue = EU, Red = Chinese)
        """)
        
        # Custom CSS for styling tables
        gr.HTML("""
        <style>
        table.gradio-dataframe tr[data-origin="EU"] {
            background-color: rgba(0, 0, 255, 0.2) !important;
        }
        table.gradio-dataframe tr[data-origin="CN"] {
            background-color: rgba(255, 0, 0, 0.2) !important;
        }
        </style>
        """)
        
        # Tabs section
        with gr.Tabs() as tabs:
            # Add Average Languages tab first
            with gr.Tab("Average Across Languages"):
                gr.Markdown("""
                ### Average Performance Across Languages
                This table shows the average performance of each model for each sector,
                averaged across all query languages.
                """)
                
                # Get the dataframe for average across languages
                avg_df = create_average_language_df(scores)
                
                # Create HTML for the colored table
                html_table = "<table class='gradio-dataframe'><thead><tr>"
                
                # Add headers
                for col in avg_df.columns:
                    if col not in ['origin', 'coming_soon']:
                        html_table += f"<th>{col}</th>"
                
                html_table += "</tr></thead><tbody>"
                
                # Add rows with appropriate background colors
                for _, row in avg_df.iterrows():
                    origin = row['origin'] if 'origin' in row else 'CN'
                    coming_soon = row.get('coming_soon', False)
                    html_table += f"<tr data-origin='{origin}'>"
                    
                    for col in avg_df.columns:
                        if col not in ['origin', 'coming_soon']:
                            if coming_soon and col != 'Model':
                                if col == 'Average':
                                    # Add "Coming Soon" text in italics
                                    html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
                                else:
                                    html_table += "<td></td>"
                            else:
                                html_table += f"<td>{row[col]}</td>"
                    
                    html_table += "</tr>"
                
                html_table += "</tbody></table>"
                
                gr.HTML(html_table)
                
                # Add color legend
                gr.HTML("""
                <div style="margin-top: 20px; margin-bottom: 40px;">
                    <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
                    <div style="display: flex; align-items: center; margin-bottom: 8px;">
                        <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                        <div>European Union</div>
                    </div>
                    <div style="display: flex; align-items: center;">
                        <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                        <div>China</div>
                    </div>
                </div>
                """)
            
            # Individual language tabs
            for lang_code, lang_name in languages.items():
                with gr.Tab(f"{lang_name} Queries"):
                    gr.Markdown(f"""
                    ### Performance with {lang_name} Queries
                    The table below shows how each model performs when the search queries are in {lang_name}.
                    """)
                    
                    # Get the dataframe for this language
                    lang_df = create_language_df(scores, lang_code, 'all')
                    
                    # Create HTML for the colored table
                    html_table = "<table class='gradio-dataframe'><thead><tr>"
                    
                    # Add headers
                    for col in lang_df.columns:
                        if col not in ['origin', 'coming_soon']:
                            html_table += f"<th>{col}</th>"
                    
                    html_table += "</tr></thead><tbody>"
                    
                    # Add rows with appropriate background colors
                    for _, row in lang_df.iterrows():
                        origin = row['origin'] if 'origin' in row else 'CN'
                        coming_soon = row.get('coming_soon', False)
                        html_table += f"<tr data-origin='{origin}'>"
                        
                        for col in lang_df.columns:
                            if col not in ['origin', 'coming_soon']:
                                if coming_soon and col != 'Model':
                                    if col == 'Average':
                                        # Add "Coming Soon" text in italics
                                        html_table += "<td><span style='font-style: italic; color: #666;'>Coming Soon</span></td>"
                                    else:
                                        html_table += "<td></td>"
                                else:
                                    html_table += f"<td>{row[col]}</td>"
                        
                        html_table += "</tr>"
                    
                    html_table += "</tbody></table>"
                    
                    gr.HTML(html_table)
                    
                    # Add color legend
                    gr.HTML("""
                    <div style="margin-top: 20px; margin-bottom: 40px;">
                        <div style="font-weight: bold; margin-bottom: 10px;">Model Origin:</div>
                        <div style="display: flex; align-items: center; margin-bottom: 8px;">
                            <div style="width: 20px; height: 20px; background-color: rgba(0, 0, 255, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                            <div>European Union</div>
                        </div>
                        <div style="display: flex; align-items: center;">
                            <div style="width: 20px; height: 20px; background-color: rgba(255, 0, 0, 0.2); margin-right: 10px; border: 1px solid #ccc;"></div>
                            <div>China</div>
                        </div>
                    </div>
                    """)
        
        # Footer section - Only citation
        gr.Markdown("""   
        If you use these benchmarks in your research, please cite:
```
        @article{visual_embeddings_benchmark_2025,
            title={Cross-lingual Visual Embeddings Benchmark},
            author={racine.ai},
            year={2025}
        }
```
        """)
    
    return demo

# Create and launch the interface
if __name__ == "__main__":
    demo = create_leaderboard()
    demo.launch()