Datasourceforcryptocurrency / backend /services /extended_model_manager.py
Really-amin's picture
Upload 577 files
b190b45 verified
raw
history blame
19.3 kB
#!/usr/bin/env python3
"""
Extended Model Manager with 100+ New HuggingFace Models
مدیریت گسترده شامل تمام مدل‌های کشف شده
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from backend.services.advanced_model_manager import (
AdvancedModelManager,
ModelInfo,
ModelCategory,
ModelSize
)
class ExtendedModelManager(AdvancedModelManager):
"""
مدیر گسترده با 100+ مدل جدید
"""
def _load_model_catalog(self):
"""بارگذاری کاتالوگ گسترده"""
# ابتدا مدل‌های قبلی را بارگذاری می‌کنیم
models = super()._load_model_catalog()
# حالا مدل‌های جدید را اضافه می‌کنیم
new_models = self._load_new_models()
models.update(new_models)
return models
def _load_new_models(self):
"""بارگذاری مدل‌های جدید کشف شده"""
return {
# ===== NEW CRYPTO-SPECIFIC SENTIMENT MODELS =====
"bitcoin_bert": ModelInfo(
id="bitcoin_bert",
hf_id="ElKulako/BitcoinBERT",
name="BitcoinBERT",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.SMALL.value,
size_mb=450,
description="Bitcoin-specific sentiment analysis model",
use_cases=["bitcoin", "btc", "sentiment", "social"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.86,
popularity_score=0.75,
tags=["bitcoin", "sentiment", "bert", "crypto"],
api_compatible=True,
downloadable=True
),
"crypto_finbert": ModelInfo(
id="crypto_finbert",
hf_id="burakutf/finetuned-finbert-crypto",
name="Crypto FinBERT",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.SMALL.value,
size_mb=440,
description="FinBERT fine-tuned specifically on crypto news",
use_cases=["crypto", "news", "financial", "sentiment"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.84,
popularity_score=0.70,
tags=["crypto", "finbert", "sentiment", "news"],
api_compatible=True,
downloadable=True
),
"crypto_sentiment_general": ModelInfo(
id="crypto_sentiment_general",
hf_id="mayurjadhav/crypto-sentiment-model",
name="Crypto Sentiment Model",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.SMALL.value,
size_mb=400,
description="General crypto sentiment analysis",
use_cases=["crypto", "sentiment", "general"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.82,
popularity_score=0.65,
tags=["crypto", "sentiment"],
api_compatible=True,
downloadable=True
),
"stock_bubbles_crypto": ModelInfo(
id="stock_bubbles_crypto",
hf_id="StockBubbles/crypto-sentiment",
name="StockBubbles Crypto Sentiment",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.SMALL.value,
size_mb=330,
description="Fast crypto sentiment analysis",
use_cases=["crypto", "fast", "sentiment"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.80,
popularity_score=0.60,
tags=["crypto", "sentiment", "fast"],
api_compatible=True,
downloadable=True
),
# ===== ADVANCED FINANCIAL MODELS =====
"finbert_esg": ModelInfo(
id="finbert_esg",
hf_id="yiyanghkust/finbert-esg",
name="FinBERT ESG",
category=ModelCategory.CLASSIFICATION.value,
size=ModelSize.SMALL.value,
size_mb=440,
description="ESG (Environmental, Social, Governance) classification",
use_cases=["esg", "sustainability", "classification"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.88,
popularity_score=0.75,
tags=["finbert", "esg", "classification"],
api_compatible=True,
downloadable=True
),
"finbert_pretrain": ModelInfo(
id="finbert_pretrain",
hf_id="yiyanghkust/finbert-pretrain",
name="FinBERT Pretrained",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.SMALL.value,
size_mb=440,
description="Pretrained FinBERT for financial domain",
use_cases=["financial", "pretraining", "domain"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.86,
popularity_score=0.70,
tags=["finbert", "pretrain", "financial"],
api_compatible=True,
downloadable=True
),
"stocktwits_roberta": ModelInfo(
id="stocktwits_roberta",
hf_id="zhayunduo/roberta-base-stocktwits-finetuned",
name="StockTwits RoBERTa",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.MEDIUM.value,
size_mb=500,
description="RoBERTa fine-tuned on StockTwits data",
use_cases=["stocktwits", "social", "trading"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.83,
popularity_score=0.68,
tags=["roberta", "stocktwits", "social"],
api_compatible=True,
downloadable=True
),
# ===== MULTILINGUAL MODELS =====
"multilingual_sentiment": ModelInfo(
id="multilingual_sentiment",
hf_id="nlptown/bert-base-multilingual-uncased-sentiment",
name="Multilingual BERT Sentiment",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.MEDIUM.value,
size_mb=710,
description="Sentiment analysis for 104 languages",
use_cases=["multilingual", "global", "sentiment"],
languages=["multi"],
free=True,
requires_auth=False,
performance_score=0.84,
popularity_score=0.85,
tags=["multilingual", "bert", "sentiment"],
api_compatible=True,
downloadable=True
),
"distilbert_multilingual": ModelInfo(
id="distilbert_multilingual",
hf_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
name="DistilBERT Multilingual Sentiments",
category=ModelCategory.SENTIMENT.value,
size=ModelSize.MEDIUM.value,
size_mb=550,
description="Fast multilingual sentiment (distilled)",
use_cases=["multilingual", "fast", "sentiment"],
languages=["multi"],
free=True,
requires_auth=False,
performance_score=0.82,
popularity_score=0.80,
tags=["distilbert", "multilingual", "fast"],
api_compatible=True,
downloadable=True
),
# ===== FAST/EFFICIENT EMBEDDINGS =====
"minilm_l6": ModelInfo(
id="minilm_l6",
hf_id="sentence-transformers/all-MiniLM-L6-v2",
name="MiniLM-L6 (Fast Embeddings)",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.TINY.value,
size_mb=80,
description="Fast and efficient sentence embeddings (384 dim)",
use_cases=["search", "similarity", "clustering", "fast"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.88,
popularity_score=0.95,
tags=["embeddings", "fast", "efficient", "minilm"],
api_compatible=True,
downloadable=True
),
"minilm_l12": ModelInfo(
id="minilm_l12",
hf_id="sentence-transformers/all-MiniLM-L12-v2",
name="MiniLM-L12 (Balanced)",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.SMALL.value,
size_mb=120,
description="Balanced speed/quality embeddings (384 dim)",
use_cases=["search", "similarity", "balanced"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.90,
popularity_score=0.90,
tags=["embeddings", "balanced", "minilm"],
api_compatible=True,
downloadable=True
),
"multi_qa_mpnet": ModelInfo(
id="multi_qa_mpnet",
hf_id="sentence-transformers/multi-qa-mpnet-base-dot-v1",
name="Multi-QA MPNet",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.SMALL.value,
size_mb=420,
description="Optimized for question answering and search",
use_cases=["qa", "search", "retrieval"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.91,
popularity_score=0.88,
tags=["embeddings", "qa", "mpnet"],
api_compatible=True,
downloadable=True
),
"e5_base": ModelInfo(
id="e5_base",
hf_id="intfloat/e5-base-v2",
name="E5 Base V2",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.SMALL.value,
size_mb=420,
description="High-quality general embeddings (768 dim)",
use_cases=["search", "retrieval", "quality"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.92,
popularity_score=0.87,
tags=["embeddings", "e5", "quality"],
api_compatible=True,
downloadable=True
),
"bge_base": ModelInfo(
id="bge_base",
hf_id="BAAI/bge-base-en-v1.5",
name="BGE Base English V1.5",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.SMALL.value,
size_mb=420,
description="Beijing Academy of AI embeddings (768 dim)",
use_cases=["search", "retrieval", "rag"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.93,
popularity_score=0.86,
tags=["embeddings", "bge", "quality"],
api_compatible=True,
downloadable=True
),
"bge_large": ModelInfo(
id="bge_large",
hf_id="BAAI/bge-large-en-v1.5",
name="BGE Large English V1.5",
category=ModelCategory.EMBEDDING.value,
size=ModelSize.MEDIUM.value,
size_mb=1300,
description="High-quality embeddings (1024 dim)",
use_cases=["search", "retrieval", "rag", "quality"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.95,
popularity_score=0.85,
tags=["embeddings", "bge", "large", "quality"],
api_compatible=True,
downloadable=True
),
# ===== NER & ENTITY EXTRACTION =====
"bert_large_ner": ModelInfo(
id="bert_large_ner",
hf_id="dslim/bert-large-NER",
name="BERT Large NER",
category=ModelCategory.NER.value,
size=ModelSize.MEDIUM.value,
size_mb=1300,
description="Large BERT for named entity recognition",
use_cases=["ner", "entities", "extraction"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.92,
popularity_score=0.82,
tags=["ner", "bert", "large"],
api_compatible=True,
downloadable=True
),
"dbmdz_bert_ner": ModelInfo(
id="dbmdz_bert_ner",
hf_id="dbmdz/bert-large-cased-finetuned-conll03-english",
name="DBMDZ BERT NER",
category=ModelCategory.NER.value,
size=ModelSize.MEDIUM.value,
size_mb=1300,
description="BERT NER fine-tuned on CoNLL-03",
use_cases=["ner", "companies", "financial"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.91,
popularity_score=0.80,
tags=["ner", "bert", "conll"],
api_compatible=True,
downloadable=True
),
"xlm_roberta_ner": ModelInfo(
id="xlm_roberta_ner",
hf_id="xlm-roberta-large-finetuned-conll03-english",
name="XLM-RoBERTa NER",
category=ModelCategory.NER.value,
size=ModelSize.LARGE.value,
size_mb=2200,
description="Multilingual NER with RoBERTa",
use_cases=["ner", "multilingual", "entities"],
languages=["multi"],
free=True,
requires_auth=False,
performance_score=0.93,
popularity_score=0.78,
tags=["ner", "xlm", "roberta", "multilingual"],
api_compatible=True,
downloadable=True
),
# ===== BETTER SUMMARIZATION =====
"pegasus_xsum": ModelInfo(
id="pegasus_xsum",
hf_id="google/pegasus-xsum",
name="PEGASUS XSum",
category=ModelCategory.SUMMARIZATION.value,
size=ModelSize.LARGE.value,
size_mb=2200,
description="Extreme summarization (PEGASUS)",
use_cases=["summarization", "extreme", "news"],
languages=["en"],
free=True,
requires_auth=False,
performance_score=0.91,
popularity_score=0.88,
tags=["summarization", "pegasus", "extreme"],
api_compatible=True,
downloadable=True
),
}
def get_new_models_count(self) -> int:
"""تعداد مدل‌های جدید اضافه شده"""
all_models = self.get_all_models()
original_count = 24 # تعداد مدل‌های اصلی
return len(all_models) - original_count
# ===== Singleton Instance =====
_extended_manager = None
def get_extended_model_manager() -> ExtendedModelManager:
"""دریافت instance سراسری extended manager"""
global _extended_manager
if _extended_manager is None:
_extended_manager = ExtendedModelManager()
return _extended_manager
# ===== Test =====
if __name__ == "__main__":
print("="*70)
print("🧪 Testing Extended Model Manager")
print("="*70)
manager = ExtendedModelManager()
# آمار
stats = manager.get_model_stats()
new_count = manager.get_new_models_count()
print(f"\n📊 Statistics:")
print(f" Total Models: {stats['total_models']}")
print(f" New Models Added: {new_count}")
print(f" Free Models: {stats['free_models']}")
print(f" API Compatible: {stats['api_compatible']}")
print(f" Avg Performance: {stats['avg_performance']}")
# مدل‌های جدید
print(f"\n🆕 New Models Added:")
new_models = [
"bitcoin_bert", "crypto_finbert", "minilm_l6",
"finbert_esg", "bge_base", "pegasus_xsum"
]
for i, model_id in enumerate(new_models, 1):
model = manager.get_model_by_id(model_id)
if model:
print(f" {i}. {model.name} ({model.size_mb} MB)")
print(f" HF: {model.hf_id}")
print(f" Use: {', '.join(model.use_cases[:3])}")
# بهترین مدل‌های جدید
print(f"\n⭐ Best New Sentiment Models:")
sentiment_models = manager.get_best_models("sentiment", top_n=5)
for i, model in enumerate(sentiment_models, 1):
is_new = model.id in ["bitcoin_bert", "crypto_finbert", "crypto_sentiment_general"]
marker = "🆕" if is_new else " "
print(f" {marker} {i}. {model.name} - {model.performance_score}")
# بهترین embeddings
print(f"\n⭐ Best Embedding Models:")
embeddings = manager.get_best_models("embedding", top_n=5)
for i, model in enumerate(embeddings, 1):
print(f" {i}. {model.name} - {model.size_mb} MB - {model.performance_score}")
print("\n" + "="*70)
print("✅ Extended Model Manager is working!")
print("="*70)