|
|
"""
|
|
|
News Provider - Cryptocurrency and financial news aggregation
|
|
|
|
|
|
Provides:
|
|
|
- Latest crypto news from NewsAPI
|
|
|
- Keyword-based news search
|
|
|
- News sentiment analysis (basic)
|
|
|
|
|
|
API Documentation: https://newsapi.org/docs
|
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
from .base import BaseProvider, create_success_response, create_error_response
|
|
|
|
|
|
|
|
|
class NewsProvider(BaseProvider):
|
|
|
"""NewsAPI REST API provider for cryptocurrency news"""
|
|
|
|
|
|
|
|
|
API_KEY = "968a5e25552b4cb5ba3280361d8444ab"
|
|
|
|
|
|
|
|
|
CRYPTO_KEYWORDS = [
|
|
|
"bitcoin", "ethereum", "cryptocurrency", "crypto",
|
|
|
"blockchain", "defi", "nft", "web3"
|
|
|
]
|
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None):
|
|
|
super().__init__(
|
|
|
name="newsapi",
|
|
|
base_url="https://newsapi.org/v2",
|
|
|
api_key=api_key or self.API_KEY,
|
|
|
timeout=10.0,
|
|
|
cache_ttl=60.0
|
|
|
)
|
|
|
|
|
|
def _get_default_headers(self) -> Dict[str, str]:
|
|
|
"""Get headers with NewsAPI authorization"""
|
|
|
return {
|
|
|
"Accept": "application/json",
|
|
|
"X-Api-Key": self.api_key
|
|
|
}
|
|
|
|
|
|
async def get_latest_news(
|
|
|
self,
|
|
|
query: Optional[str] = None,
|
|
|
page_size: int = 20,
|
|
|
page: int = 1,
|
|
|
language: str = "en",
|
|
|
sort_by: str = "publishedAt"
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
Get latest cryptocurrency news.
|
|
|
|
|
|
Args:
|
|
|
query: Search query (default: crypto keywords)
|
|
|
page_size: Number of articles per page (max 100)
|
|
|
page: Page number
|
|
|
language: Language filter (en, es, fr, etc.)
|
|
|
sort_by: Sort order (publishedAt, relevancy, popularity)
|
|
|
|
|
|
Returns:
|
|
|
Standardized response with news articles
|
|
|
"""
|
|
|
|
|
|
search_query = query or " OR ".join(self.CRYPTO_KEYWORDS[:5])
|
|
|
|
|
|
|
|
|
from_date = (datetime.utcnow() - timedelta(days=7)).strftime("%Y-%m-%d")
|
|
|
|
|
|
params = {
|
|
|
"q": search_query,
|
|
|
"pageSize": min(page_size, 100),
|
|
|
"page": page,
|
|
|
"language": language,
|
|
|
"sortBy": sort_by,
|
|
|
"from": from_date
|
|
|
}
|
|
|
|
|
|
response = await self.get("everything", params=params)
|
|
|
|
|
|
if not response.get("success"):
|
|
|
return response
|
|
|
|
|
|
data = response.get("data", {})
|
|
|
|
|
|
if data.get("status") != "ok":
|
|
|
error_msg = data.get("message", "Unknown error")
|
|
|
return create_error_response(self.name, error_msg, data.get("code"))
|
|
|
|
|
|
articles = data.get("articles", [])
|
|
|
total_results = data.get("totalResults", 0)
|
|
|
|
|
|
return create_success_response(
|
|
|
self.name,
|
|
|
{
|
|
|
"articles": self._format_articles(articles),
|
|
|
"count": len(articles),
|
|
|
"totalResults": total_results,
|
|
|
"query": search_query,
|
|
|
"page": page,
|
|
|
"pageSize": page_size
|
|
|
}
|
|
|
)
|
|
|
|
|
|
def _format_articles(self, articles: List[Dict]) -> List[Dict]:
|
|
|
"""Format news articles for clean output"""
|
|
|
formatted = []
|
|
|
for article in articles:
|
|
|
formatted.append({
|
|
|
"title": article.get("title"),
|
|
|
"description": article.get("description"),
|
|
|
"content": article.get("content"),
|
|
|
"author": article.get("author"),
|
|
|
"source": {
|
|
|
"id": article.get("source", {}).get("id"),
|
|
|
"name": article.get("source", {}).get("name")
|
|
|
},
|
|
|
"url": article.get("url"),
|
|
|
"urlToImage": article.get("urlToImage"),
|
|
|
"publishedAt": article.get("publishedAt"),
|
|
|
"sentiment": self._basic_sentiment(article.get("title", "") + " " + (article.get("description") or ""))
|
|
|
})
|
|
|
return formatted
|
|
|
|
|
|
def _basic_sentiment(self, text: str) -> Dict[str, Any]:
|
|
|
"""
|
|
|
Basic sentiment analysis using keyword matching.
|
|
|
For advanced sentiment, use HFSentimentProvider.
|
|
|
"""
|
|
|
text_lower = text.lower()
|
|
|
|
|
|
positive_words = [
|
|
|
"surge", "soar", "rally", "gain", "bullish", "growth", "rise",
|
|
|
"breakthrough", "record", "milestone", "adoption", "success",
|
|
|
"profit", "up", "high", "positive", "boost", "moon"
|
|
|
]
|
|
|
|
|
|
negative_words = [
|
|
|
"crash", "plunge", "drop", "fall", "bearish", "decline", "loss",
|
|
|
"hack", "scam", "fraud", "ban", "regulation", "lawsuit", "risk",
|
|
|
"down", "low", "negative", "warning", "concern", "fear"
|
|
|
]
|
|
|
|
|
|
positive_count = sum(1 for word in positive_words if word in text_lower)
|
|
|
negative_count = sum(1 for word in negative_words if word in text_lower)
|
|
|
|
|
|
total = positive_count + negative_count
|
|
|
if total == 0:
|
|
|
return {"label": "neutral", "score": 0.5}
|
|
|
|
|
|
positive_ratio = positive_count / total
|
|
|
|
|
|
if positive_ratio > 0.6:
|
|
|
return {"label": "positive", "score": positive_ratio}
|
|
|
elif positive_ratio < 0.4:
|
|
|
return {"label": "negative", "score": 1 - positive_ratio}
|
|
|
else:
|
|
|
return {"label": "neutral", "score": 0.5}
|
|
|
|
|
|
async def get_top_headlines(
|
|
|
self,
|
|
|
category: str = "business",
|
|
|
country: str = "us",
|
|
|
page_size: int = 20
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
Get top headlines from news sources.
|
|
|
|
|
|
Args:
|
|
|
category: Category (business, technology, etc.)
|
|
|
country: Country code (us, gb, etc.)
|
|
|
page_size: Number of articles
|
|
|
"""
|
|
|
params = {
|
|
|
"category": category,
|
|
|
"country": country,
|
|
|
"pageSize": min(page_size, 100)
|
|
|
}
|
|
|
|
|
|
response = await self.get("top-headlines", params=params)
|
|
|
|
|
|
if not response.get("success"):
|
|
|
return response
|
|
|
|
|
|
data = response.get("data", {})
|
|
|
|
|
|
if data.get("status") != "ok":
|
|
|
error_msg = data.get("message", "Unknown error")
|
|
|
return create_error_response(self.name, error_msg, data.get("code"))
|
|
|
|
|
|
articles = data.get("articles", [])
|
|
|
|
|
|
return create_success_response(
|
|
|
self.name,
|
|
|
{
|
|
|
"articles": self._format_articles(articles),
|
|
|
"count": len(articles),
|
|
|
"category": category,
|
|
|
"country": country
|
|
|
}
|
|
|
)
|
|
|
|
|
|
async def search_news(
|
|
|
self,
|
|
|
keywords: List[str],
|
|
|
page_size: int = 20,
|
|
|
language: str = "en"
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
Search news by multiple keywords.
|
|
|
|
|
|
Args:
|
|
|
keywords: List of keywords to search
|
|
|
page_size: Number of results
|
|
|
language: Language filter
|
|
|
"""
|
|
|
if not keywords:
|
|
|
return create_error_response(
|
|
|
self.name,
|
|
|
"Missing keywords",
|
|
|
"At least one keyword is required"
|
|
|
)
|
|
|
|
|
|
|
|
|
query = " OR ".join(f'"{k}"' for k in keywords[:5])
|
|
|
|
|
|
return await self.get_latest_news(
|
|
|
query=query,
|
|
|
page_size=page_size,
|
|
|
language=language
|
|
|
)
|
|
|
|
|
|
async def get_crypto_news(self, page_size: int = 20) -> Dict[str, Any]:
|
|
|
"""
|
|
|
Convenience method to get latest crypto-specific news.
|
|
|
"""
|
|
|
return await self.get_latest_news(
|
|
|
query="cryptocurrency OR bitcoin OR ethereum OR crypto",
|
|
|
page_size=page_size,
|
|
|
sort_by="publishedAt"
|
|
|
)
|
|
|
|
|
|
async def get_news_sources(self, category: str = "business") -> Dict[str, Any]:
|
|
|
"""Get available news sources"""
|
|
|
params = {
|
|
|
"category": category,
|
|
|
"language": "en"
|
|
|
}
|
|
|
|
|
|
response = await self.get("top-headlines/sources", params=params)
|
|
|
|
|
|
if not response.get("success"):
|
|
|
return response
|
|
|
|
|
|
data = response.get("data", {})
|
|
|
|
|
|
if data.get("status") != "ok":
|
|
|
error_msg = data.get("message", "Unknown error")
|
|
|
return create_error_response(self.name, error_msg)
|
|
|
|
|
|
sources = data.get("sources", [])
|
|
|
|
|
|
formatted_sources = []
|
|
|
for source in sources:
|
|
|
formatted_sources.append({
|
|
|
"id": source.get("id"),
|
|
|
"name": source.get("name"),
|
|
|
"description": source.get("description"),
|
|
|
"url": source.get("url"),
|
|
|
"category": source.get("category"),
|
|
|
"language": source.get("language"),
|
|
|
"country": source.get("country")
|
|
|
})
|
|
|
|
|
|
return create_success_response(
|
|
|
self.name,
|
|
|
{
|
|
|
"sources": formatted_sources,
|
|
|
"count": len(formatted_sources),
|
|
|
"category": category
|
|
|
}
|
|
|
)
|
|
|
|