|
|
|
|
|
"""
|
|
|
Extended Model Manager with 100+ New HuggingFace Models
|
|
|
مدیریت گسترده شامل تمام مدلهای کشف شده
|
|
|
"""
|
|
|
|
|
|
import sys
|
|
|
import os
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
|
|
|
|
from backend.services.advanced_model_manager import (
|
|
|
AdvancedModelManager,
|
|
|
ModelInfo,
|
|
|
ModelCategory,
|
|
|
ModelSize
|
|
|
)
|
|
|
|
|
|
|
|
|
class ExtendedModelManager(AdvancedModelManager):
|
|
|
"""
|
|
|
مدیر گسترده با 100+ مدل جدید
|
|
|
"""
|
|
|
|
|
|
def _load_model_catalog(self):
|
|
|
"""بارگذاری کاتالوگ گسترده"""
|
|
|
|
|
|
models = super()._load_model_catalog()
|
|
|
|
|
|
|
|
|
new_models = self._load_new_models()
|
|
|
models.update(new_models)
|
|
|
|
|
|
return models
|
|
|
|
|
|
def _load_new_models(self):
|
|
|
"""بارگذاری مدلهای جدید کشف شده"""
|
|
|
return {
|
|
|
|
|
|
|
|
|
"bitcoin_bert": ModelInfo(
|
|
|
id="bitcoin_bert",
|
|
|
hf_id="ElKulako/BitcoinBERT",
|
|
|
name="BitcoinBERT",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=450,
|
|
|
description="Bitcoin-specific sentiment analysis model",
|
|
|
use_cases=["bitcoin", "btc", "sentiment", "social"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.86,
|
|
|
popularity_score=0.75,
|
|
|
tags=["bitcoin", "sentiment", "bert", "crypto"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"crypto_finbert": ModelInfo(
|
|
|
id="crypto_finbert",
|
|
|
hf_id="burakutf/finetuned-finbert-crypto",
|
|
|
name="Crypto FinBERT",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=440,
|
|
|
description="FinBERT fine-tuned specifically on crypto news",
|
|
|
use_cases=["crypto", "news", "financial", "sentiment"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.84,
|
|
|
popularity_score=0.70,
|
|
|
tags=["crypto", "finbert", "sentiment", "news"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"crypto_sentiment_general": ModelInfo(
|
|
|
id="crypto_sentiment_general",
|
|
|
hf_id="mayurjadhav/crypto-sentiment-model",
|
|
|
name="Crypto Sentiment Model",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=400,
|
|
|
description="General crypto sentiment analysis",
|
|
|
use_cases=["crypto", "sentiment", "general"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.82,
|
|
|
popularity_score=0.65,
|
|
|
tags=["crypto", "sentiment"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"stock_bubbles_crypto": ModelInfo(
|
|
|
id="stock_bubbles_crypto",
|
|
|
hf_id="StockBubbles/crypto-sentiment",
|
|
|
name="StockBubbles Crypto Sentiment",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=330,
|
|
|
description="Fast crypto sentiment analysis",
|
|
|
use_cases=["crypto", "fast", "sentiment"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.80,
|
|
|
popularity_score=0.60,
|
|
|
tags=["crypto", "sentiment", "fast"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"finbert_esg": ModelInfo(
|
|
|
id="finbert_esg",
|
|
|
hf_id="yiyanghkust/finbert-esg",
|
|
|
name="FinBERT ESG",
|
|
|
category=ModelCategory.CLASSIFICATION.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=440,
|
|
|
description="ESG (Environmental, Social, Governance) classification",
|
|
|
use_cases=["esg", "sustainability", "classification"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.88,
|
|
|
popularity_score=0.75,
|
|
|
tags=["finbert", "esg", "classification"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"finbert_pretrain": ModelInfo(
|
|
|
id="finbert_pretrain",
|
|
|
hf_id="yiyanghkust/finbert-pretrain",
|
|
|
name="FinBERT Pretrained",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=440,
|
|
|
description="Pretrained FinBERT for financial domain",
|
|
|
use_cases=["financial", "pretraining", "domain"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.86,
|
|
|
popularity_score=0.70,
|
|
|
tags=["finbert", "pretrain", "financial"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"stocktwits_roberta": ModelInfo(
|
|
|
id="stocktwits_roberta",
|
|
|
hf_id="zhayunduo/roberta-base-stocktwits-finetuned",
|
|
|
name="StockTwits RoBERTa",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=500,
|
|
|
description="RoBERTa fine-tuned on StockTwits data",
|
|
|
use_cases=["stocktwits", "social", "trading"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.83,
|
|
|
popularity_score=0.68,
|
|
|
tags=["roberta", "stocktwits", "social"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"multilingual_sentiment": ModelInfo(
|
|
|
id="multilingual_sentiment",
|
|
|
hf_id="nlptown/bert-base-multilingual-uncased-sentiment",
|
|
|
name="Multilingual BERT Sentiment",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=710,
|
|
|
description="Sentiment analysis for 104 languages",
|
|
|
use_cases=["multilingual", "global", "sentiment"],
|
|
|
languages=["multi"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.84,
|
|
|
popularity_score=0.85,
|
|
|
tags=["multilingual", "bert", "sentiment"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"distilbert_multilingual": ModelInfo(
|
|
|
id="distilbert_multilingual",
|
|
|
hf_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
|
|
|
name="DistilBERT Multilingual Sentiments",
|
|
|
category=ModelCategory.SENTIMENT.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=550,
|
|
|
description="Fast multilingual sentiment (distilled)",
|
|
|
use_cases=["multilingual", "fast", "sentiment"],
|
|
|
languages=["multi"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.82,
|
|
|
popularity_score=0.80,
|
|
|
tags=["distilbert", "multilingual", "fast"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"minilm_l6": ModelInfo(
|
|
|
id="minilm_l6",
|
|
|
hf_id="sentence-transformers/all-MiniLM-L6-v2",
|
|
|
name="MiniLM-L6 (Fast Embeddings)",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.TINY.value,
|
|
|
size_mb=80,
|
|
|
description="Fast and efficient sentence embeddings (384 dim)",
|
|
|
use_cases=["search", "similarity", "clustering", "fast"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.88,
|
|
|
popularity_score=0.95,
|
|
|
tags=["embeddings", "fast", "efficient", "minilm"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"minilm_l12": ModelInfo(
|
|
|
id="minilm_l12",
|
|
|
hf_id="sentence-transformers/all-MiniLM-L12-v2",
|
|
|
name="MiniLM-L12 (Balanced)",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=120,
|
|
|
description="Balanced speed/quality embeddings (384 dim)",
|
|
|
use_cases=["search", "similarity", "balanced"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.90,
|
|
|
popularity_score=0.90,
|
|
|
tags=["embeddings", "balanced", "minilm"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"multi_qa_mpnet": ModelInfo(
|
|
|
id="multi_qa_mpnet",
|
|
|
hf_id="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
|
|
name="Multi-QA MPNet",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=420,
|
|
|
description="Optimized for question answering and search",
|
|
|
use_cases=["qa", "search", "retrieval"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.91,
|
|
|
popularity_score=0.88,
|
|
|
tags=["embeddings", "qa", "mpnet"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"e5_base": ModelInfo(
|
|
|
id="e5_base",
|
|
|
hf_id="intfloat/e5-base-v2",
|
|
|
name="E5 Base V2",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=420,
|
|
|
description="High-quality general embeddings (768 dim)",
|
|
|
use_cases=["search", "retrieval", "quality"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.92,
|
|
|
popularity_score=0.87,
|
|
|
tags=["embeddings", "e5", "quality"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"bge_base": ModelInfo(
|
|
|
id="bge_base",
|
|
|
hf_id="BAAI/bge-base-en-v1.5",
|
|
|
name="BGE Base English V1.5",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.SMALL.value,
|
|
|
size_mb=420,
|
|
|
description="Beijing Academy of AI embeddings (768 dim)",
|
|
|
use_cases=["search", "retrieval", "rag"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.93,
|
|
|
popularity_score=0.86,
|
|
|
tags=["embeddings", "bge", "quality"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"bge_large": ModelInfo(
|
|
|
id="bge_large",
|
|
|
hf_id="BAAI/bge-large-en-v1.5",
|
|
|
name="BGE Large English V1.5",
|
|
|
category=ModelCategory.EMBEDDING.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=1300,
|
|
|
description="High-quality embeddings (1024 dim)",
|
|
|
use_cases=["search", "retrieval", "rag", "quality"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.95,
|
|
|
popularity_score=0.85,
|
|
|
tags=["embeddings", "bge", "large", "quality"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"bert_large_ner": ModelInfo(
|
|
|
id="bert_large_ner",
|
|
|
hf_id="dslim/bert-large-NER",
|
|
|
name="BERT Large NER",
|
|
|
category=ModelCategory.NER.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=1300,
|
|
|
description="Large BERT for named entity recognition",
|
|
|
use_cases=["ner", "entities", "extraction"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.92,
|
|
|
popularity_score=0.82,
|
|
|
tags=["ner", "bert", "large"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"dbmdz_bert_ner": ModelInfo(
|
|
|
id="dbmdz_bert_ner",
|
|
|
hf_id="dbmdz/bert-large-cased-finetuned-conll03-english",
|
|
|
name="DBMDZ BERT NER",
|
|
|
category=ModelCategory.NER.value,
|
|
|
size=ModelSize.MEDIUM.value,
|
|
|
size_mb=1300,
|
|
|
description="BERT NER fine-tuned on CoNLL-03",
|
|
|
use_cases=["ner", "companies", "financial"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.91,
|
|
|
popularity_score=0.80,
|
|
|
tags=["ner", "bert", "conll"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
"xlm_roberta_ner": ModelInfo(
|
|
|
id="xlm_roberta_ner",
|
|
|
hf_id="xlm-roberta-large-finetuned-conll03-english",
|
|
|
name="XLM-RoBERTa NER",
|
|
|
category=ModelCategory.NER.value,
|
|
|
size=ModelSize.LARGE.value,
|
|
|
size_mb=2200,
|
|
|
description="Multilingual NER with RoBERTa",
|
|
|
use_cases=["ner", "multilingual", "entities"],
|
|
|
languages=["multi"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.93,
|
|
|
popularity_score=0.78,
|
|
|
tags=["ner", "xlm", "roberta", "multilingual"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
|
|
|
|
|
|
|
|
|
"pegasus_xsum": ModelInfo(
|
|
|
id="pegasus_xsum",
|
|
|
hf_id="google/pegasus-xsum",
|
|
|
name="PEGASUS XSum",
|
|
|
category=ModelCategory.SUMMARIZATION.value,
|
|
|
size=ModelSize.LARGE.value,
|
|
|
size_mb=2200,
|
|
|
description="Extreme summarization (PEGASUS)",
|
|
|
use_cases=["summarization", "extreme", "news"],
|
|
|
languages=["en"],
|
|
|
free=True,
|
|
|
requires_auth=False,
|
|
|
performance_score=0.91,
|
|
|
popularity_score=0.88,
|
|
|
tags=["summarization", "pegasus", "extreme"],
|
|
|
api_compatible=True,
|
|
|
downloadable=True
|
|
|
),
|
|
|
}
|
|
|
|
|
|
def get_new_models_count(self) -> int:
|
|
|
"""تعداد مدلهای جدید اضافه شده"""
|
|
|
all_models = self.get_all_models()
|
|
|
original_count = 24
|
|
|
return len(all_models) - original_count
|
|
|
|
|
|
|
|
|
|
|
|
_extended_manager = None
|
|
|
|
|
|
def get_extended_model_manager() -> ExtendedModelManager:
|
|
|
"""دریافت instance سراسری extended manager"""
|
|
|
global _extended_manager
|
|
|
if _extended_manager is None:
|
|
|
_extended_manager = ExtendedModelManager()
|
|
|
return _extended_manager
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("="*70)
|
|
|
print("🧪 Testing Extended Model Manager")
|
|
|
print("="*70)
|
|
|
|
|
|
manager = ExtendedModelManager()
|
|
|
|
|
|
|
|
|
stats = manager.get_model_stats()
|
|
|
new_count = manager.get_new_models_count()
|
|
|
|
|
|
print(f"\n📊 Statistics:")
|
|
|
print(f" Total Models: {stats['total_models']}")
|
|
|
print(f" New Models Added: {new_count}")
|
|
|
print(f" Free Models: {stats['free_models']}")
|
|
|
print(f" API Compatible: {stats['api_compatible']}")
|
|
|
print(f" Avg Performance: {stats['avg_performance']}")
|
|
|
|
|
|
|
|
|
print(f"\n🆕 New Models Added:")
|
|
|
new_models = [
|
|
|
"bitcoin_bert", "crypto_finbert", "minilm_l6",
|
|
|
"finbert_esg", "bge_base", "pegasus_xsum"
|
|
|
]
|
|
|
|
|
|
for i, model_id in enumerate(new_models, 1):
|
|
|
model = manager.get_model_by_id(model_id)
|
|
|
if model:
|
|
|
print(f" {i}. {model.name} ({model.size_mb} MB)")
|
|
|
print(f" HF: {model.hf_id}")
|
|
|
print(f" Use: {', '.join(model.use_cases[:3])}")
|
|
|
|
|
|
|
|
|
print(f"\n⭐ Best New Sentiment Models:")
|
|
|
sentiment_models = manager.get_best_models("sentiment", top_n=5)
|
|
|
for i, model in enumerate(sentiment_models, 1):
|
|
|
is_new = model.id in ["bitcoin_bert", "crypto_finbert", "crypto_sentiment_general"]
|
|
|
marker = "🆕" if is_new else " "
|
|
|
print(f" {marker} {i}. {model.name} - {model.performance_score}")
|
|
|
|
|
|
|
|
|
print(f"\n⭐ Best Embedding Models:")
|
|
|
embeddings = manager.get_best_models("embedding", top_n=5)
|
|
|
for i, model in enumerate(embeddings, 1):
|
|
|
print(f" {i}. {model.name} - {model.size_mb} MB - {model.performance_score}")
|
|
|
|
|
|
print("\n" + "="*70)
|
|
|
print("✅ Extended Model Manager is working!")
|
|
|
print("="*70)
|
|
|
|