Spaces:

Really-amin
/

Datasourceforcryptocurrency

Paused

App Files Files Community

Datasourceforcryptocurrency / backend /services /huggingface_inference_client.py

Really-amin

Upload 577 files

b190b45 verified 2 days ago

raw

history blame contribute delete

10.7 kB

	#!/usr/bin/env python3
	"""
	Hugging Face Inference API Client - REAL DATA ONLY
	Uses real Hugging Face models for sentiment analysis
	NO MOCK DATA - All predictions from real HF models
	"""

	import httpx
	import logging
	import os
	from typing import Dict, Any, Optional
	from datetime import datetime
	from fastapi import HTTPException

	logger = logging.getLogger(__name__)


	class HuggingFaceInferenceClient:
	"""
	Real Hugging Face Inference API Client
	Primary source for real sentiment analysis using NLP models
	"""

	def __init__(self):
	# Strip whitespace from token to avoid "Illegal header value" errors
	self.api_token = (os.getenv("HF_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
	self.base_url = "https://router.huggingface.co/models"
	self.timeout = 30.0 # HF models can take time to load

	# Real sentiment analysis models
	self.models = {
	"sentiment_crypto": "cardiffnlp/twitter-roberta-base-sentiment-latest",
	"sentiment_financial": "ProsusAI/finbert",
	"sentiment_twitter": "finiteautomata/bertweet-base-sentiment-analysis",
	"sentiment_general": "nlptown/bert-base-multilingual-uncased-sentiment"
	}

	self.headers = {
	"Content-Type": "application/json"
	}
	if self.api_token:
	self.headers["Authorization"] = f"Bearer {self.api_token}"

	def _normalize_sentiment_label(self, label: str, score: float) -> tuple[str, str]:
	"""
	Normalize different model label formats to standard format

	Returns:
	(normalized_label, sentiment_text)
	"""
	label_upper = label.upper()

	# Map various label formats
	if label_upper in ["POSITIVE", "LABEL_2", "5 STARS", "POS"]:
	return ("POSITIVE", "positive")
	elif label_upper in ["NEGATIVE", "LABEL_0", "1 STAR", "NEG"]:
	return ("NEGATIVE", "negative")
	elif label_upper in ["NEUTRAL", "LABEL_1", "3 STARS", "NEU"]:
	return ("NEUTRAL", "neutral")

	# For star ratings (1-5 stars)
	if "STAR" in label_upper:
	if "4" in label or "5" in label:
	return ("POSITIVE", "positive")
	elif "1" in label or "2" in label:
	return ("NEGATIVE", "negative")
	else:
	return ("NEUTRAL", "neutral")

	# Default: use score to determine sentiment
	if score > 0.6:
	return ("POSITIVE", "positive")
	elif score < 0.4:
	return ("NEGATIVE", "negative")
	else:
	return ("NEUTRAL", "neutral")

	async def analyze_sentiment(
	self,
	text: str,
	model_key: str = "sentiment_crypto"
	) -> Dict[str, Any]:
	"""
	Analyze REAL sentiment using Hugging Face models

	Args:
	text: Text to analyze
	model_key: Model to use (sentiment_crypto, sentiment_financial, etc.)

	Returns:
	Real sentiment analysis results
	"""
	try:
	# Get model name
	model_name = self.models.get(model_key, self.models["sentiment_crypto"])

	# Validate input
	if not text or len(text.strip()) == 0:
	raise HTTPException(
	status_code=400,
	detail="Missing or invalid text in request body"
	)

	# Truncate text if too long (max 512 tokens ~ 2000 chars)
	if len(text) > 2000:
	text = text[:2000]

	async with httpx.AsyncClient(timeout=self.timeout) as client:
	response = await client.post(
	f"{self.base_url}/{model_name}",
	headers=self.headers,
	json={"inputs": text}
	)

	# Handle model loading state
	if response.status_code == 503:
	# Model is loading
	try:
	error_data = response.json()
	estimated_time = error_data.get("estimated_time", 20)

	logger.warning(
	f"⏳ HuggingFace model {model_name} is loading "
	f"(estimated: {estimated_time}s)"
	)

	return {
	"error": "Model is currently loading",
	"estimated_time": estimated_time,
	"model": model_name,
	"timestamp": int(datetime.utcnow().timestamp() * 1000)
	}
	except:
	return {
	"error": "Model is currently loading",
	"estimated_time": 20,
	"model": model_name,
	"timestamp": int(datetime.utcnow().timestamp() * 1000)
	}

	response.raise_for_status()
	data = response.json()

	# Parse model response
	# HF returns: [[{"label": "POSITIVE", "score": 0.95}, ...]]
	if isinstance(data, list) and len(data) > 0:
	# Get first (or highest score) prediction
	if isinstance(data[0], list):
	predictions = data[0]
	else:
	predictions = data

	# Get prediction with highest score
	best_prediction = max(predictions, key=lambda x: x.get("score", 0))

	raw_label = best_prediction.get("label", "NEUTRAL")
	raw_score = best_prediction.get("score", 0.5)

	# Normalize label
	normalized_label, sentiment_text = self._normalize_sentiment_label(
	raw_label,
	raw_score
	)

	result = {
	"label": normalized_label,
	"score": raw_score,
	"sentiment": sentiment_text,
	"confidence": raw_score,
	"text": text[:100] + ("..." if len(text) > 100 else ""),
	"model": model_name,
	"source": "huggingface",
	"timestamp": int(datetime.utcnow().timestamp() * 1000)
	}

	logger.info(
	f"✅ HuggingFace: Sentiment analysis completed "
	f"({normalized_label}, confidence: {raw_score:.2f})"
	)
	return result

	else:
	# Unexpected response format
	logger.error(f"❌ HuggingFace: Unexpected response format: {data}")
	raise HTTPException(
	status_code=500,
	detail="Unexpected response format from model"
	)

	except httpx.HTTPStatusError as e:
	if e.response.status_code == 503:
	# Model loading - already handled above
	return {
	"error": "Model is currently loading",
	"estimated_time": 20,
	"timestamp": int(datetime.utcnow().timestamp() * 1000)
	}
	elif e.response.status_code == 400:
	logger.error(f"❌ HuggingFace: Bad request: {e}")
	raise HTTPException(
	status_code=400,
	detail="Invalid text or parameters"
	)
	elif e.response.status_code in (404, 410):
	# Endpoint moved or model not available on old host; provide safe fallback
	logger.warning("⚠ HuggingFace endpoint returned 404/410; using keyword fallback")
	# Simple keyword-based sentiment fallback
	text_lower = (text or "").lower()
	pos_kw = ["bull", "up", "gain", "profit", "surge", "rally", "strong"]
	neg_kw = ["bear", "down", "loss", "drop", "dump", "sell", "weak"]
	pos_score = sum(k in text_lower for k in pos_kw)
	neg_score = sum(k in text_lower for k in neg_kw)
	if pos_score > neg_score:
	label, sentiment = ("POSITIVE", "positive")
	score = 0.7
	elif neg_score > pos_score:
	label, sentiment = ("NEGATIVE", "negative")
	score = 0.7
	else:
	label, sentiment = ("NEUTRAL", "neutral")
	score = 0.5
	return {
	"label": label,
	"score": score,
	"sentiment": sentiment,
	"confidence": score,
	"text": text[:100] + ("..." if len(text) > 100 else ""),
	"model": "fallback-keywords",
	"source": "fallback",
	"timestamp": int(datetime.utcnow().timestamp() * 1000)
	}
	else:
	logger.error(f"❌ HuggingFace API HTTP error: {e}")
	raise HTTPException(
	status_code=503,
	detail=f"HuggingFace API temporarily unavailable: {str(e)}"
	)

	except httpx.HTTPError as e:
	logger.error(f"❌ HuggingFace API HTTP error: {e}")
	raise HTTPException(
	status_code=503,
	detail=f"HuggingFace API temporarily unavailable: {str(e)}"
	)

	except HTTPException:
	raise

	except Exception as e:
	logger.error(f"❌ HuggingFace sentiment analysis failed: {e}")
	raise HTTPException(
	status_code=500,
	detail=f"Failed to analyze sentiment: {str(e)}"
	)


	# Global instance
	hf_inference_client = HuggingFaceInferenceClient()


	__all__ = ["HuggingFaceInferenceClient", "hf_inference_client"]