Really-amin's picture
Upload 577 files
b190b45 verified
"""
News Provider - Cryptocurrency and financial news aggregation
Provides:
- Latest crypto news from NewsAPI
- Keyword-based news search
- News sentiment analysis (basic)
API Documentation: https://newsapi.org/docs
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from datetime import datetime, timedelta
from .base import BaseProvider, create_success_response, create_error_response
class NewsProvider(BaseProvider):
"""NewsAPI REST API provider for cryptocurrency news"""
# API Key (temporary hardcoded - will be secured later)
API_KEY = "968a5e25552b4cb5ba3280361d8444ab"
# Default crypto-related keywords
CRYPTO_KEYWORDS = [
"bitcoin", "ethereum", "cryptocurrency", "crypto",
"blockchain", "defi", "nft", "web3"
]
def __init__(self, api_key: Optional[str] = None):
super().__init__(
name="newsapi",
base_url="https://newsapi.org/v2",
api_key=api_key or self.API_KEY,
timeout=10.0,
cache_ttl=60.0 # Cache news for 60 seconds
)
def _get_default_headers(self) -> Dict[str, str]:
"""Get headers with NewsAPI authorization"""
return {
"Accept": "application/json",
"X-Api-Key": self.api_key
}
async def get_latest_news(
self,
query: Optional[str] = None,
page_size: int = 20,
page: int = 1,
language: str = "en",
sort_by: str = "publishedAt"
) -> Dict[str, Any]:
"""
Get latest cryptocurrency news.
Args:
query: Search query (default: crypto keywords)
page_size: Number of articles per page (max 100)
page: Page number
language: Language filter (en, es, fr, etc.)
sort_by: Sort order (publishedAt, relevancy, popularity)
Returns:
Standardized response with news articles
"""
# Use default crypto keywords if no query provided
search_query = query or " OR ".join(self.CRYPTO_KEYWORDS[:5])
# Calculate date range (last 7 days for free tier)
from_date = (datetime.utcnow() - timedelta(days=7)).strftime("%Y-%m-%d")
params = {
"q": search_query,
"pageSize": min(page_size, 100),
"page": page,
"language": language,
"sortBy": sort_by,
"from": from_date
}
response = await self.get("everything", params=params)
if not response.get("success"):
return response
data = response.get("data", {})
if data.get("status") != "ok":
error_msg = data.get("message", "Unknown error")
return create_error_response(self.name, error_msg, data.get("code"))
articles = data.get("articles", [])
total_results = data.get("totalResults", 0)
return create_success_response(
self.name,
{
"articles": self._format_articles(articles),
"count": len(articles),
"totalResults": total_results,
"query": search_query,
"page": page,
"pageSize": page_size
}
)
def _format_articles(self, articles: List[Dict]) -> List[Dict]:
"""Format news articles for clean output"""
formatted = []
for article in articles:
formatted.append({
"title": article.get("title"),
"description": article.get("description"),
"content": article.get("content"),
"author": article.get("author"),
"source": {
"id": article.get("source", {}).get("id"),
"name": article.get("source", {}).get("name")
},
"url": article.get("url"),
"urlToImage": article.get("urlToImage"),
"publishedAt": article.get("publishedAt"),
"sentiment": self._basic_sentiment(article.get("title", "") + " " + (article.get("description") or ""))
})
return formatted
def _basic_sentiment(self, text: str) -> Dict[str, Any]:
"""
Basic sentiment analysis using keyword matching.
For advanced sentiment, use HFSentimentProvider.
"""
text_lower = text.lower()
positive_words = [
"surge", "soar", "rally", "gain", "bullish", "growth", "rise",
"breakthrough", "record", "milestone", "adoption", "success",
"profit", "up", "high", "positive", "boost", "moon"
]
negative_words = [
"crash", "plunge", "drop", "fall", "bearish", "decline", "loss",
"hack", "scam", "fraud", "ban", "regulation", "lawsuit", "risk",
"down", "low", "negative", "warning", "concern", "fear"
]
positive_count = sum(1 for word in positive_words if word in text_lower)
negative_count = sum(1 for word in negative_words if word in text_lower)
total = positive_count + negative_count
if total == 0:
return {"label": "neutral", "score": 0.5}
positive_ratio = positive_count / total
if positive_ratio > 0.6:
return {"label": "positive", "score": positive_ratio}
elif positive_ratio < 0.4:
return {"label": "negative", "score": 1 - positive_ratio}
else:
return {"label": "neutral", "score": 0.5}
async def get_top_headlines(
self,
category: str = "business",
country: str = "us",
page_size: int = 20
) -> Dict[str, Any]:
"""
Get top headlines from news sources.
Args:
category: Category (business, technology, etc.)
country: Country code (us, gb, etc.)
page_size: Number of articles
"""
params = {
"category": category,
"country": country,
"pageSize": min(page_size, 100)
}
response = await self.get("top-headlines", params=params)
if not response.get("success"):
return response
data = response.get("data", {})
if data.get("status") != "ok":
error_msg = data.get("message", "Unknown error")
return create_error_response(self.name, error_msg, data.get("code"))
articles = data.get("articles", [])
return create_success_response(
self.name,
{
"articles": self._format_articles(articles),
"count": len(articles),
"category": category,
"country": country
}
)
async def search_news(
self,
keywords: List[str],
page_size: int = 20,
language: str = "en"
) -> Dict[str, Any]:
"""
Search news by multiple keywords.
Args:
keywords: List of keywords to search
page_size: Number of results
language: Language filter
"""
if not keywords:
return create_error_response(
self.name,
"Missing keywords",
"At least one keyword is required"
)
# Build OR query for keywords
query = " OR ".join(f'"{k}"' for k in keywords[:5])
return await self.get_latest_news(
query=query,
page_size=page_size,
language=language
)
async def get_crypto_news(self, page_size: int = 20) -> Dict[str, Any]:
"""
Convenience method to get latest crypto-specific news.
"""
return await self.get_latest_news(
query="cryptocurrency OR bitcoin OR ethereum OR crypto",
page_size=page_size,
sort_by="publishedAt"
)
async def get_news_sources(self, category: str = "business") -> Dict[str, Any]:
"""Get available news sources"""
params = {
"category": category,
"language": "en"
}
response = await self.get("top-headlines/sources", params=params)
if not response.get("success"):
return response
data = response.get("data", {})
if data.get("status") != "ok":
error_msg = data.get("message", "Unknown error")
return create_error_response(self.name, error_msg)
sources = data.get("sources", [])
formatted_sources = []
for source in sources:
formatted_sources.append({
"id": source.get("id"),
"name": source.get("name"),
"description": source.get("description"),
"url": source.get("url"),
"category": source.get("category"),
"language": source.get("language"),
"country": source.get("country")
})
return create_success_response(
self.name,
{
"sources": formatted_sources,
"count": len(formatted_sources),
"category": category
}
)