import gradio as gr import pandas as pd def model_hyperlink_md(link: str, name: str) -> str: return f"[{name}]({link})" def make_clickable_and_drop_links(df: pd.DataFrame) -> pd.DataFrame: if "Links" not in df.columns: raise ValueError("CSV must include a 'Links' column.") df = df.copy() df["Model"] = df.apply(lambda r: model_hyperlink_md(r["Links"], r["Model"]), axis=1) return df.drop(columns=["Links"]) def datatypes_with_markdown(df: pd.DataFrame): return ["markdown" if c == "Model" else "str" for c in df.columns] # ---------- load data ---------- BASE_CSV = "code_eval_board.csv" INSTRUCT_CSV = "eval_instruct_lms.csv" base_df_raw = pd.read_csv(BASE_CSV) inst_df_raw = pd.read_csv(INSTRUCT_CSV) base_df = make_clickable_and_drop_links(base_df_raw) inst_df = make_clickable_and_drop_links(inst_df_raw) base_dtypes = datatypes_with_markdown(base_df) inst_dtypes = datatypes_with_markdown(inst_df) # ---------- css ---------- custom_css = """ .gradio-container {font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, sans-serif;} #base-table a, #inst-table a { color: #2a7ae2 !important; text-decoration: underline dotted !important; text-underline-offset: 3px; } #base-table a:hover, #inst-table a:hover { color: #1e5bbf !important; text-decoration: underline solid !important; } """ # ---------- app ---------- demo = gr.Blocks(css=custom_css) with demo: # ---------- HEADER ---------- gr.HTML( """""" ) # ---------- TABS ---------- with gr.Tabs(): # TAB 1: Base LLMs with gr.TabItem("🟢 Base LLMs Evaluation"): with gr.Row(): base_search = gr.Textbox(placeholder="🔍 Search base models...", show_label=False) def base_search_fn(q): if not q or not q.strip(): return base_df mask = base_df["Model"].str.contains(q, case=False) return base_df[mask] base_table = gr.Dataframe( value=base_df, datatype=base_dtypes, interactive=False, sortable=True, elem_id="base-table", ) base_search.submit(base_search_fn, base_search, base_table) # TAB 2: Instruction-tuned LLMs with gr.TabItem("🔶 Instruction-tuned LLMs Evaluation"): with gr.Row(): inst_search = gr.Textbox(placeholder="🔍 Search instruction-tuned models...", show_label=False) def inst_search_fn(q): if not q or not q.strip(): return inst_df mask = inst_df["Model"].str.contains(q, case=False) return inst_df[mask] inst_table = gr.Dataframe( value=inst_df, datatype=inst_dtypes, interactive=False, sortable=True, elem_id="inst-table", ) inst_search.submit(inst_search_fn, inst_search, inst_table) # TAB 3: About # ---------- TAB 3: About ---------- with gr.TabItem("📘 About"): gr.HTML("""

📊 Benchmarks and Metrics Overview

The Diké Leaderboard evaluates the impact of quantization and compression on bias, fairness, ethics, and safety of large language models (LLMs). Each benchmark measures a specific social or ethical aspect of model behavior.

All evaluations are implemented via the LM Evaluation Harness and follow consistent zero-shot protocols.

""") gr.HTML( """
Notes
• Click column headers to sort ascending/descending
• Model names are clickable links to Hugging Face pages

Part of the ⚖️ Diké Project.
""" ) demo.launch(server_name="0.0.0.0", server_port=7860)