Spaces:
Runtime error
Runtime error
| # A simple script that loops over all public models and get their libary_name | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from collections import Counter | |
| from huggingface_hub import HfApi | |
| from datasets import load_dataset | |
| api = HfApi() | |
| list_models = api.list_models() | |
| def fetch_dataset_and_init(): | |
| dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train") | |
| library_names = dataset["library_name"] | |
| string_counts = Counter(library_names) | |
| string_counts_series = pd.Series(string_counts) | |
| # Sort the series in descending order | |
| df = string_counts_series.sort_values(ascending=False).to_frame() | |
| df.columns = ["count"] | |
| df = df.reset_index() | |
| df_log = df.copy() | |
| df_log['count'] = np.log(df_log['count']) | |
| return df, df_log | |
| df, df_log = fetch_dataset_and_init() | |
| def get_current_nb_models(): | |
| # We need this hack since `list_models` returns a generator.. | |
| total_models = sum(1 for _ in list_models) | |
| diff_models = total_models - df["count"].sum() | |
| return str(diff_models) | |
| plot_height = 512 | |
| plot_width = 1512 | |
| top_k = len(df) | |
| def bar_plot_fn(display, top_k): | |
| if display == "simple": | |
| return gr.BarPlot( | |
| df[:top_k], | |
| x="index", | |
| y="count", | |
| tooltip=["index", "count"], | |
| height=plot_height, | |
| width=plot_width | |
| ) | |
| elif display == "log": | |
| return gr.BarPlot( | |
| df_log[:top_k], | |
| x="index", | |
| y="count", | |
| tooltip=["index", "count"], | |
| height=plot_height, | |
| width=plot_width | |
| ) | |
| with gr.Blocks() as bar_plot: | |
| with gr.Column(): | |
| with gr.Column(): | |
| display = gr.Dropdown( | |
| choices=[ | |
| "simple", | |
| "log", | |
| ], | |
| value="simple", | |
| label="Type of Bar Plot", | |
| ) | |
| top_k = gr.Slider( | |
| label="Select top-K most used library_name", | |
| value=len(df), | |
| minimum=1, | |
| maximum=len(df), | |
| step=1, | |
| ) | |
| with gr.Column(): | |
| plot = gr.BarPlot() | |
| with gr.Row(): | |
| fetch_button = gr.Button(value="Fetch current number of models without model cards (takes up to 1min to fetch everything)") | |
| text_box = gr.Textbox(value="", label="Number of models without model cards") | |
| top_k.change(bar_plot_fn, inputs=[display, top_k], outputs=plot) | |
| display.change(bar_plot_fn, inputs=[display, top_k], outputs=plot) | |
| fetch_button.click(get_current_nb_models, outputs=[text_box]) | |
| bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot) | |
| bar_plot.launch() |