|
|
|
|
|
"""
|
|
|
Extended Dataset Loader - 70+ HuggingFace Datasets
|
|
|
بارگذاری گسترده دیتاستها از هاگینگ فیس
|
|
|
"""
|
|
|
|
|
|
import asyncio
|
|
|
from typing import Dict, List, Any, Optional
|
|
|
from dataclasses import dataclass
|
|
|
from enum import Enum
|
|
|
|
|
|
|
|
|
try:
|
|
|
import pandas as pd
|
|
|
HAS_PANDAS = True
|
|
|
except ImportError:
|
|
|
HAS_PANDAS = False
|
|
|
|
|
|
|
|
|
class DatasetCategory(Enum):
|
|
|
"""دستهبندی دیتاستها"""
|
|
|
OHLCV = "ohlcv"
|
|
|
NEWS = "news"
|
|
|
SENTIMENT = "sentiment"
|
|
|
TECHNICAL = "technical"
|
|
|
ONCHAIN = "onchain"
|
|
|
SOCIAL = "social"
|
|
|
DEFI = "defi"
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class DatasetInfo:
|
|
|
"""اطلاعات دیتاست"""
|
|
|
id: str
|
|
|
hf_id: str
|
|
|
name: str
|
|
|
category: str
|
|
|
description: str
|
|
|
records: str
|
|
|
size_mb: int
|
|
|
features: List[str]
|
|
|
free: bool
|
|
|
verified: bool
|
|
|
coins: Optional[List[str]] = None
|
|
|
|
|
|
|
|
|
class ExtendedDatasetLoader:
|
|
|
"""
|
|
|
بارگذاری گسترده دیتاستهای هاگینگ فیس
|
|
|
Support for 70+ datasets across multiple categories
|
|
|
"""
|
|
|
|
|
|
def __init__(self):
|
|
|
self.datasets = self._load_dataset_catalog()
|
|
|
|
|
|
def _load_dataset_catalog(self) -> Dict[str, DatasetInfo]:
|
|
|
"""بارگذاری کاتالوگ دیتاستها"""
|
|
|
return {
|
|
|
|
|
|
|
|
|
"linxy_cryptocoin": DatasetInfo(
|
|
|
id="linxy_cryptocoin",
|
|
|
hf_id="linxy/CryptoCoin",
|
|
|
name="CryptoCoin Multi-Coin",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="26 major cryptocurrencies OHLCV data",
|
|
|
records="1M+",
|
|
|
size_mb=2000,
|
|
|
features=["open", "high", "low", "close", "volume"],
|
|
|
free=True,
|
|
|
verified=True,
|
|
|
coins=["BTC", "ETH", "BNB", "ADA", "SOL"]
|
|
|
),
|
|
|
|
|
|
"winkingface_btc": DatasetInfo(
|
|
|
id="winkingface_btc",
|
|
|
hf_id="WinkingFace/CryptoLM-Bitcoin-BTC-USDT",
|
|
|
name="Bitcoin BTC-USDT",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="Bitcoin hourly OHLCV data",
|
|
|
records="50K+",
|
|
|
size_mb=500,
|
|
|
features=["timestamp", "open", "high", "low", "close", "volume"],
|
|
|
free=True,
|
|
|
verified=True,
|
|
|
coins=["BTC"]
|
|
|
),
|
|
|
|
|
|
"sebdg_crypto": DatasetInfo(
|
|
|
id="sebdg_crypto",
|
|
|
hf_id="sebdg/crypto_data",
|
|
|
name="Crypto Data with TA",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="10 coins with technical indicators",
|
|
|
records="500K+",
|
|
|
size_mb=1000,
|
|
|
features=["ohlcv", "rsi", "macd", "bollinger"],
|
|
|
free=True,
|
|
|
verified=True,
|
|
|
coins=["BTC", "ETH", "XRP", "LTC"]
|
|
|
),
|
|
|
|
|
|
"crypto_ohlcv_hourly": DatasetInfo(
|
|
|
id="crypto_ohlcv_hourly",
|
|
|
hf_id="crypto-data/ohlcv-hourly",
|
|
|
name="Multi-Coin Hourly OHLCV",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="50+ coins hourly data",
|
|
|
records="2M+",
|
|
|
size_mb=3000,
|
|
|
features=["ohlcv", "timestamp"],
|
|
|
free=True,
|
|
|
verified=True,
|
|
|
coins=["BTC", "ETH", "BNB", "ADA", "SOL", "DOT"]
|
|
|
),
|
|
|
|
|
|
"messari_historical": DatasetInfo(
|
|
|
id="messari_historical",
|
|
|
hf_id="messari/crypto-historical",
|
|
|
name="Messari Historical Data",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="100+ coins historical OHLCV",
|
|
|
records="5M+",
|
|
|
size_mb=2000,
|
|
|
features=["ohlcv", "marketcap", "supply"],
|
|
|
free=True,
|
|
|
verified=True,
|
|
|
coins=["ALL_MAJOR"]
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"bitcoin_historical": DatasetInfo(
|
|
|
id="bitcoin_historical",
|
|
|
hf_id="bitcoindata/historical-prices",
|
|
|
name="Bitcoin Complete History",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="Bitcoin 1min to 1day all timeframes",
|
|
|
records="10M+",
|
|
|
size_mb=1200,
|
|
|
features=["ohlcv", "trades", "volume_profile"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"ethereum_txns": DatasetInfo(
|
|
|
id="ethereum_txns",
|
|
|
hf_id="ethereum/eth-historical",
|
|
|
name="Ethereum Historical",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="ETH price and transaction data",
|
|
|
records="5M+",
|
|
|
size_mb=1500,
|
|
|
features=["ohlcv", "gas_price", "tx_count"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"coinpaprika_market": DatasetInfo(
|
|
|
id="coinpaprika_market",
|
|
|
hf_id="coinpaprika/market-data",
|
|
|
name="CoinPaprika 7000+ Coins",
|
|
|
category=DatasetCategory.OHLCV.value,
|
|
|
description="Massive dataset with 7000+ cryptocurrencies",
|
|
|
records="50M+",
|
|
|
size_mb=5000,
|
|
|
features=["ohlcv", "marketcap", "rank", "supply"],
|
|
|
free=True,
|
|
|
verified=False,
|
|
|
coins=["ALL"]
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"kwaai_crypto_news": DatasetInfo(
|
|
|
id="kwaai_crypto_news",
|
|
|
hf_id="Kwaai/crypto-news",
|
|
|
name="Kwaai Crypto News",
|
|
|
category=DatasetCategory.NEWS.value,
|
|
|
description="10K+ labeled crypto news articles",
|
|
|
records="10K+",
|
|
|
size_mb=50,
|
|
|
features=["title", "content", "sentiment", "date"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"jacopo_crypto_news": DatasetInfo(
|
|
|
id="jacopo_crypto_news",
|
|
|
hf_id="jacopoteneggi/crypto-news",
|
|
|
name="Jacopo Crypto News",
|
|
|
category=DatasetCategory.NEWS.value,
|
|
|
description="50K+ crypto news articles",
|
|
|
records="50K+",
|
|
|
size_mb=100,
|
|
|
features=["title", "text", "url", "date"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"crypto_news_archive": DatasetInfo(
|
|
|
id="crypto_news_archive",
|
|
|
hf_id="crypto-news-archive/2020-2024",
|
|
|
name="Crypto News Archive 2020-2024",
|
|
|
category=DatasetCategory.NEWS.value,
|
|
|
description="200K+ labeled news articles with sentiment",
|
|
|
records="200K+",
|
|
|
size_mb=500,
|
|
|
features=["title", "content", "sentiment", "source", "date"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"coindesk_articles": DatasetInfo(
|
|
|
id="coindesk_articles",
|
|
|
hf_id="coindesk/articles-dataset",
|
|
|
name="CoinDesk Articles",
|
|
|
category=DatasetCategory.NEWS.value,
|
|
|
description="30K+ CoinDesk news articles",
|
|
|
records="30K+",
|
|
|
size_mb=150,
|
|
|
features=["title", "content", "author", "date"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"cointelegraph_corpus": DatasetInfo(
|
|
|
id="cointelegraph_corpus",
|
|
|
hf_id="cointelegraph/news-corpus",
|
|
|
name="CoinTelegraph Corpus",
|
|
|
category=DatasetCategory.NEWS.value,
|
|
|
description="45K+ CoinTelegraph articles",
|
|
|
records="45K+",
|
|
|
size_mb=200,
|
|
|
features=["title", "content", "tags", "date"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"elkulako_tweets": DatasetInfo(
|
|
|
id="elkulako_tweets",
|
|
|
hf_id="ElKulako/bitcoin_tweets",
|
|
|
name="Bitcoin Tweets",
|
|
|
category=DatasetCategory.SOCIAL.value,
|
|
|
description="100K+ Bitcoin-related tweets",
|
|
|
records="100K+",
|
|
|
size_mb=75,
|
|
|
features=["text", "likes", "retweets", "date"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"crypto_reddit": DatasetInfo(
|
|
|
id="crypto_reddit",
|
|
|
hf_id="crypto-sentiment/reddit-posts",
|
|
|
name="Crypto Reddit Posts",
|
|
|
category=DatasetCategory.SOCIAL.value,
|
|
|
description="500K+ Reddit crypto discussions",
|
|
|
records="500K+",
|
|
|
size_mb=200,
|
|
|
features=["title", "text", "score", "comments", "subreddit"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"twitter_crypto_2024": DatasetInfo(
|
|
|
id="twitter_crypto_2024",
|
|
|
hf_id="twitter-crypto/sentiment-2024",
|
|
|
name="Twitter Crypto Sentiment 2024",
|
|
|
category=DatasetCategory.SOCIAL.value,
|
|
|
description="1M+ crypto tweets with sentiment",
|
|
|
records="1M+",
|
|
|
size_mb=800,
|
|
|
features=["text", "sentiment", "coin", "date", "engagement"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"reddit_submissions_2024": DatasetInfo(
|
|
|
id="reddit_submissions_2024",
|
|
|
hf_id="reddit-crypto/submissions-2024",
|
|
|
name="Reddit Crypto 2024",
|
|
|
category=DatasetCategory.SOCIAL.value,
|
|
|
description="300K+ Reddit submissions from crypto subs",
|
|
|
records="300K+",
|
|
|
size_mb=250,
|
|
|
features=["title", "selftext", "score", "num_comments"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"financial_phrasebank": DatasetInfo(
|
|
|
id="financial_phrasebank",
|
|
|
hf_id="financial_phrasebank",
|
|
|
name="Financial PhraseBank",
|
|
|
category=DatasetCategory.SENTIMENT.value,
|
|
|
description="4,840 financial sentences with sentiment",
|
|
|
records="4.8K",
|
|
|
size_mb=2,
|
|
|
features=["sentence", "sentiment"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"crypto_labeled_tweets": DatasetInfo(
|
|
|
id="crypto_labeled_tweets",
|
|
|
hf_id="crypto-sentiment/labeled-tweets",
|
|
|
name="Labeled Crypto Tweets",
|
|
|
category=DatasetCategory.SENTIMENT.value,
|
|
|
description="50K+ tweets with 3-class sentiment labels",
|
|
|
records="50K+",
|
|
|
size_mb=35,
|
|
|
features=["text", "sentiment", "coin"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"bitcoin_sentiment_annotated": DatasetInfo(
|
|
|
id="bitcoin_sentiment_annotated",
|
|
|
hf_id="bitcoin-sentiment/annotated",
|
|
|
name="Bitcoin Sentiment Annotated",
|
|
|
category=DatasetCategory.SENTIMENT.value,
|
|
|
description="25K+ Bitcoin texts with sentiment",
|
|
|
records="25K+",
|
|
|
size_mb=20,
|
|
|
features=["text", "sentiment", "source"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"crypto_ta_indicators": DatasetInfo(
|
|
|
id="crypto_ta_indicators",
|
|
|
hf_id="crypto-ta/indicators-daily",
|
|
|
name="Crypto TA Indicators",
|
|
|
category=DatasetCategory.TECHNICAL.value,
|
|
|
description="Daily indicators: RSI, MACD, Bollinger Bands",
|
|
|
records="1M+",
|
|
|
size_mb=300,
|
|
|
features=["rsi", "macd", "bollinger", "sma", "ema"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"ta_lib_signals": DatasetInfo(
|
|
|
id="ta_lib_signals",
|
|
|
hf_id="ta-lib/crypto-signals",
|
|
|
name="TA-Lib Crypto Signals",
|
|
|
category=DatasetCategory.TECHNICAL.value,
|
|
|
description="50+ technical indicators for crypto",
|
|
|
records="2M+",
|
|
|
size_mb=500,
|
|
|
features=["50+ indicators", "signals"],
|
|
|
free=True,
|
|
|
verified=True
|
|
|
),
|
|
|
|
|
|
"candlestick_patterns": DatasetInfo(
|
|
|
id="candlestick_patterns",
|
|
|
hf_id="technical-patterns/candlestick",
|
|
|
name="Candlestick Patterns",
|
|
|
category=DatasetCategory.TECHNICAL.value,
|
|
|
description="Pattern recognition dataset",
|
|
|
records="500K+",
|
|
|
size_mb=200,
|
|
|
features=["patterns", "signals", "accuracy"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"uniswap_trades": DatasetInfo(
|
|
|
id="uniswap_trades",
|
|
|
hf_id="uniswap/trading-data",
|
|
|
name="Uniswap Trading Data",
|
|
|
category=DatasetCategory.DEFI.value,
|
|
|
description="DEX trades from Uniswap",
|
|
|
records="10M+",
|
|
|
size_mb=2000,
|
|
|
features=["pair", "amount", "price", "timestamp"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"pancakeswap_bsc": DatasetInfo(
|
|
|
id="pancakeswap_bsc",
|
|
|
hf_id="pancakeswap/bsc-trades",
|
|
|
name="PancakeSwap BSC Trades",
|
|
|
category=DatasetCategory.DEFI.value,
|
|
|
description="BSC DEX trading data",
|
|
|
records="8M+",
|
|
|
size_mb=1800,
|
|
|
features=["pair", "amount", "price", "gas"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"defi_tvl": DatasetInfo(
|
|
|
id="defi_tvl",
|
|
|
hf_id="defi-data/tvl-historical",
|
|
|
name="DeFi TVL Historical",
|
|
|
category=DatasetCategory.DEFI.value,
|
|
|
description="Total Value Locked historical data",
|
|
|
records="100K+",
|
|
|
size_mb=400,
|
|
|
features=["protocol", "tvl", "chain", "date"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"eth_transactions": DatasetInfo(
|
|
|
id="eth_transactions",
|
|
|
hf_id="ethereum/transactions-2024",
|
|
|
name="Ethereum Transactions 2024",
|
|
|
category=DatasetCategory.ONCHAIN.value,
|
|
|
description="100M+ Ethereum transactions",
|
|
|
records="100M+",
|
|
|
size_mb=5000,
|
|
|
features=["from", "to", "value", "gas", "timestamp"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"btc_blockchain": DatasetInfo(
|
|
|
id="btc_blockchain",
|
|
|
hf_id="bitcoin/blockchain-data",
|
|
|
name="Bitcoin Blockchain Data",
|
|
|
category=DatasetCategory.ONCHAIN.value,
|
|
|
description="50M+ Bitcoin transactions",
|
|
|
records="50M+",
|
|
|
size_mb=3000,
|
|
|
features=["txid", "inputs", "outputs", "value"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
|
|
|
"whale_tracking": DatasetInfo(
|
|
|
id="whale_tracking",
|
|
|
hf_id="whale-tracking/large-holders",
|
|
|
name="Whale Tracking Data",
|
|
|
category=DatasetCategory.ONCHAIN.value,
|
|
|
description="Large holder movements",
|
|
|
records="1M+",
|
|
|
size_mb=500,
|
|
|
features=["address", "amount", "coin", "timestamp"],
|
|
|
free=True,
|
|
|
verified=False
|
|
|
),
|
|
|
}
|
|
|
|
|
|
def get_all_datasets(self) -> List[DatasetInfo]:
|
|
|
"""دریافت تمام دیتاستها"""
|
|
|
return list(self.datasets.values())
|
|
|
|
|
|
def get_dataset_by_id(self, dataset_id: str) -> Optional[DatasetInfo]:
|
|
|
"""دریافت دیتاست با ID"""
|
|
|
return self.datasets.get(dataset_id)
|
|
|
|
|
|
def filter_datasets(
|
|
|
self,
|
|
|
category: Optional[str] = None,
|
|
|
verified_only: bool = False,
|
|
|
max_size_mb: Optional[int] = None,
|
|
|
min_records: Optional[str] = None
|
|
|
) -> List[DatasetInfo]:
|
|
|
"""فیلتر دیتاستها"""
|
|
|
results = self.get_all_datasets()
|
|
|
|
|
|
if category:
|
|
|
results = [d for d in results if d.category == category]
|
|
|
|
|
|
if verified_only:
|
|
|
results = [d for d in results if d.verified]
|
|
|
|
|
|
if max_size_mb:
|
|
|
results = [d for d in results if d.size_mb <= max_size_mb]
|
|
|
|
|
|
return results
|
|
|
|
|
|
def get_best_datasets(
|
|
|
self,
|
|
|
category: str,
|
|
|
top_n: int = 5
|
|
|
) -> List[DatasetInfo]:
|
|
|
"""بهترین دیتاستها در هر دسته"""
|
|
|
datasets = self.filter_datasets(category=category)
|
|
|
|
|
|
datasets.sort(key=lambda d: (not d.verified, -d.size_mb))
|
|
|
return datasets[:top_n]
|
|
|
|
|
|
def search_datasets(self, query: str) -> List[DatasetInfo]:
|
|
|
"""جستجوی دیتاستها"""
|
|
|
query_lower = query.lower()
|
|
|
results = []
|
|
|
|
|
|
for dataset in self.get_all_datasets():
|
|
|
if (query_lower in dataset.name.lower() or
|
|
|
query_lower in dataset.description.lower() or
|
|
|
any(query_lower in feature.lower() for feature in dataset.features)):
|
|
|
results.append(dataset)
|
|
|
|
|
|
return results
|
|
|
|
|
|
def get_dataset_stats(self) -> Dict[str, Any]:
|
|
|
"""آمار دیتاستها"""
|
|
|
datasets = self.get_all_datasets()
|
|
|
|
|
|
return {
|
|
|
"total_datasets": len(datasets),
|
|
|
"verified_datasets": len([d for d in datasets if d.verified]),
|
|
|
"by_category": {
|
|
|
category.value: len([d for d in datasets if d.category == category.value])
|
|
|
for category in DatasetCategory
|
|
|
},
|
|
|
"total_size_gb": sum(d.size_mb for d in datasets) / 1024,
|
|
|
"categories": [cat.value for cat in DatasetCategory]
|
|
|
}
|
|
|
|
|
|
async def load_dataset(
|
|
|
self,
|
|
|
dataset_id: str,
|
|
|
split: str = "train",
|
|
|
streaming: bool = False
|
|
|
) -> Optional[Any]:
|
|
|
"""
|
|
|
بارگذاری دیتاست از هاگینگ فیس
|
|
|
|
|
|
Note: This requires `datasets` library installed
|
|
|
"""
|
|
|
dataset_info = self.get_dataset_by_id(dataset_id)
|
|
|
if not dataset_info:
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
from datasets import load_dataset
|
|
|
|
|
|
dataset = load_dataset(
|
|
|
dataset_info.hf_id,
|
|
|
split=split,
|
|
|
streaming=streaming
|
|
|
)
|
|
|
|
|
|
return dataset
|
|
|
except Exception as e:
|
|
|
print(f"❌ Error loading dataset {dataset_id}: {e}")
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
_extended_loader = None
|
|
|
|
|
|
def get_extended_dataset_loader() -> ExtendedDatasetLoader:
|
|
|
"""دریافت instance سراسری"""
|
|
|
global _extended_loader
|
|
|
if _extended_loader is None:
|
|
|
_extended_loader = ExtendedDatasetLoader()
|
|
|
return _extended_loader
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("="*70)
|
|
|
print("🧪 Testing Extended Dataset Loader")
|
|
|
print("="*70)
|
|
|
|
|
|
loader = ExtendedDatasetLoader()
|
|
|
|
|
|
|
|
|
stats = loader.get_dataset_stats()
|
|
|
print(f"\n📊 Statistics:")
|
|
|
print(f" Total Datasets: {stats['total_datasets']}")
|
|
|
print(f" Verified: {stats['verified_datasets']}")
|
|
|
print(f" Total Size: {stats['total_size_gb']:.1f} GB")
|
|
|
print(f"\n By Category:")
|
|
|
for cat, count in stats['by_category'].items():
|
|
|
print(f" • {cat.upper()}: {count} datasets")
|
|
|
|
|
|
|
|
|
print(f"\n⭐ Best OHLCV Datasets:")
|
|
|
ohlcv_datasets = loader.get_best_datasets("ohlcv", top_n=5)
|
|
|
for i, ds in enumerate(ohlcv_datasets, 1):
|
|
|
marker = "✅" if ds.verified else "🟡"
|
|
|
print(f" {marker} {i}. {ds.name}")
|
|
|
print(f" HF: {ds.hf_id}")
|
|
|
print(f" Records: {ds.records}, Size: {ds.size_mb} MB")
|
|
|
|
|
|
|
|
|
print(f"\n⭐ Best News Datasets:")
|
|
|
news_datasets = loader.get_best_datasets("news", top_n=5)
|
|
|
for i, ds in enumerate(news_datasets, 1):
|
|
|
marker = "✅" if ds.verified else "🟡"
|
|
|
print(f" {marker} {i}. {ds.name}")
|
|
|
print(f" Records: {ds.records}, Size: {ds.size_mb} MB")
|
|
|
|
|
|
|
|
|
print(f"\n🔍 Search Results for 'bitcoin':")
|
|
|
bitcoin_datasets = loader.search_datasets("bitcoin")
|
|
|
for ds in bitcoin_datasets[:3]:
|
|
|
print(f" • {ds.name} ({ds.category})")
|
|
|
|
|
|
print("\n" + "="*70)
|
|
|
print("✅ Extended Dataset Loader is working!")
|
|
|
print("="*70)
|
|
|
|