|
|
|
|
|
""" |
|
|
Script to populate the Hugging Face dataset with mock data |
|
|
""" |
|
|
|
|
|
from datasets import Dataset |
|
|
from datetime import datetime, timedelta |
|
|
import random |
|
|
import os |
|
|
|
|
|
|
|
|
DATASET_ID = "daniehua/dsr1-fp4-sgl-isl8192osl1024" |
|
|
HF_TOKEN = None |
|
|
|
|
|
|
|
|
mock_data = [] |
|
|
|
|
|
teams = ["Official Test1", "Official Test2"] |
|
|
base_date = datetime.now() - timedelta(days=7) |
|
|
|
|
|
for i in range(2): |
|
|
team = random.choice(teams) |
|
|
timestamp = (base_date + timedelta(days=i / 2)).strftime("%Y-%m-%d %H:%M:%S") |
|
|
|
|
|
CONC = random.choice([4, 8, 16, 32, 64]) |
|
|
MI355X_E2E = random.randint(1000, 2000) |
|
|
MI355X_THROUGHPUT = random.randint(1000, 2000) |
|
|
B200_E2E = random.randint(1000, 2000) |
|
|
B200_THROUGHPUT = random.randint(1000, 2000) |
|
|
E2E_RATIO = MI355X_E2E / B200_E2E |
|
|
THROUGHPUT_RATIO = MI355X_THROUGHPUT / B200_THROUGHPUT |
|
|
BITS_PER_BYTE = random.random() |
|
|
BYTE_PERPLEXITY = random.random() |
|
|
WORD_PERPLEXITY = random.random() |
|
|
|
|
|
entry = { |
|
|
"team_name": team, |
|
|
"timestamp": timestamp, |
|
|
"conc": CONC, |
|
|
"mi355x_e2e": MI355X_E2E, |
|
|
"mi355x_throughput": MI355X_THROUGHPUT, |
|
|
"b200_e2e": B200_E2E, |
|
|
"b200_throughput": B200_THROUGHPUT, |
|
|
"e2e_ratio": E2E_RATIO, |
|
|
"throughput_ratio": THROUGHPUT_RATIO, |
|
|
"bits_per_byte": BITS_PER_BYTE, |
|
|
"byte_perplexity": BYTE_PERPLEXITY, |
|
|
"word_perplexity": WORD_PERPLEXITY |
|
|
} |
|
|
|
|
|
mock_data.append(entry) |
|
|
|
|
|
|
|
|
mock_data.sort(key=lambda x: x["mi355x_throughput"], reverse=True) |
|
|
|
|
|
|
|
|
print(f"Creating dataset with {len(mock_data)} entries...") |
|
|
dataset = Dataset.from_list(mock_data) |
|
|
|
|
|
print(f"Pushing to Hugging Face Hub: {DATASET_ID}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) |
|
|
|
|
|
print("Dataset populated successfully!") |
|
|
print("\nSample entries:") |