#!/usr/bin/env python3 """ Script to populate the Hugging Face dataset with mock data """ from datasets import Dataset from datetime import datetime, timedelta import random import os # Configuration DATASET_ID = "daniehua/dsr1-fp4-sgl-isl8192osl1024" HF_TOKEN = None # Get from environment or use None to use cached token # Generate mock data mock_data = [] teams = ["Official Test1", "Official Test2"] base_date = datetime.now() - timedelta(days=7) for i in range(2): team = random.choice(teams) timestamp = (base_date + timedelta(days=i / 2)).strftime("%Y-%m-%d %H:%M:%S") CONC = random.choice([4, 8, 16, 32, 64]) MI355X_E2E = random.randint(1000, 2000) MI355X_THROUGHPUT = random.randint(1000, 2000) B200_E2E = random.randint(1000, 2000) B200_THROUGHPUT = random.randint(1000, 2000) E2E_RATIO = MI355X_E2E / B200_E2E THROUGHPUT_RATIO = MI355X_THROUGHPUT / B200_THROUGHPUT BITS_PER_BYTE = random.random() BYTE_PERPLEXITY = random.random() WORD_PERPLEXITY = random.random() entry = { "team_name": team, "timestamp": timestamp, "conc": CONC, "mi355x_e2e": MI355X_E2E, "mi355x_throughput": MI355X_THROUGHPUT, "b200_e2e": B200_E2E, "b200_throughput": B200_THROUGHPUT, "e2e_ratio": E2E_RATIO, "throughput_ratio": THROUGHPUT_RATIO, "bits_per_byte": BITS_PER_BYTE, "byte_perplexity": BYTE_PERPLEXITY, "word_perplexity": WORD_PERPLEXITY } mock_data.append(entry) # Sort by timestamp mock_data.sort(key=lambda x: x["mi355x_throughput"], reverse=True) # Create dataset and push to hub print(f"Creating dataset with {len(mock_data)} entries...") dataset = Dataset.from_list(mock_data) print(f"Pushing to Hugging Face Hub: {DATASET_ID}") # if HF_TOKEN: # dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) # else: # # Use cached token from huggingface-cli login dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) print("Dataset populated successfully!") print("\nSample entries:")