# services/agent_chat.py import os import re import json from typing import Dict, List, Any, Optional from smolagents import LiteLLMModel, tool, ToolCallingAgent from smolagents.mcp_client import MCPClient as SmolMCPClient from gradio_client import Client as GradioClient from config.constants import AVAILABLE_MODELS_BY_PROVIDER from services.db import save_analysis_report MCP_SERVER_URL = os.getenv("MCP_SERVER_URL", "https://mcp-1st-birthday-gitrepo-inspector-mcp.hf.space/") # =================================================================== # TOOLS WITH CORRECT DOCSTRINGS # =================================================================== @tool def get_issue_status(issue_number: int, repo_url: str) -> Dict[str, Any]: """ Retrieves the latest AI analysis report for a specific GitHub issue from the database. Args: issue_number (int): The GitHub issue number (e.g. 12345). repo_url (str): Full repository URL (e.g. "https://github.com/gradio-app/gradio"). Returns: dict: Structured report with keys: - issue_number (int) - title (str) - verdict (str) - report (str) – clean text without HTML - action (dict or None) - model_used (str) - has_report (bool) - error (str, optional) """ try: client = GradioClient(MCP_SERVER_URL) result = client.predict(repo_url, issue_number, api_name="/get_issue_report") # Normalize MCP response if isinstance(result, str): data = json.loads(result) if "{" in result else {"error": result} else: data = result if isinstance(result, dict) else {"error": str(result)} if data.get("error"): return {"error": data["error"], "has_report": False} # Remove HTML tags for easy LLM reading raw_body = data.get("report", "") clean_body = re.sub(r'<[^>]+>', '', raw_body).strip() return { "issue_number": int(data.get("issue", issue_number)), "title": data.get("title", "No title"), "verdict": data.get("verdict", "Pending Analysis"), "report": clean_body, "action": data.get("action"), "model_used": data.get("llm_model", "unknown"), "has_report": True } except Exception as e: return {"error": str(e), "has_report": False} @tool def search_issues( repo_url: str, query: str = "", verdict: Optional[str] = None, author: Optional[str] = None, limit: int = 8 ) -> List[Dict[str, Any]]: """ Searches for GitHub issues matching the given criteria. Args: repo_url (str): Full repository URL. query (str, optional): Text to search in title/body. verdict (str, optional): AI verdict filter ("resolved", "duplicate", "unresolved", etc.). author (str, optional): GitHub username of the issue author. limit (int, optional): Maximum number of results (default: 8). Returns: list[dict]: List of issues, each containing: - issue_number (int) - title (str) - author (str) - verdict (str) - snippet (str) """ try: client = GradioClient(MCP_SERVER_URL) result = client.predict( repo_url, query, None, "open", verdict, author, limit, api_name="/search_issues" ) if isinstance(result, str): data = json.loads(result) if ("[" in result or "{" in result) else [] else: data = result if isinstance(result, list) else [] return [ { "issue_number": item.get("id"), "title": item.get("title", "No title"), "author": item.get("author", "unknown"), "verdict": item.get("verdict", "pending"), "snippet": (item.get("snippet") or "")[:150].replace("\n", " ") } for item in data ] except Exception as e: return [{"error": str(e)}] @tool def trigger_live_analysis( issue_number: int, repo_url: str, provider: str = "gemini", model: Optional[str] = None, github_token: Optional[str] = None ) -> str: """ Forces a brand-new AI analysis of an issue, ignoring any cached data. Use ONLY when the user explicitly asks to re-analyze or "now", "again", etc. Args: issue_number (int): The GitHub issue number. repo_url (str): Full repository URL. provider (str, optional): AI provider ("gemini", "openai", "nebius", "sambanova"). model (str, optional): Specific model name. If None, uses first model of the provider. github_token (str, optional): GitHub token for private repos. Returns: str: Confirmation message or error. """ try: if not model: model = AVAILABLE_MODELS_BY_PROVIDER.get(provider.lower(), ["gemini-2.0-flash"])[0] client = GradioClient(MCP_SERVER_URL) result = client.predict( repo_url, int(issue_number), provider, model, github_token, None, api_name="/analyze_github_issue" ) html_report, thought = result[0], result[1] # Detects verdict to save in database verdict = "unresolved" if "Resolved" in html_report: verdict = "resolved" elif "Possibly Resolved" in html_report: verdict = "possibly_resolved" elif "Duplicate" in html_report: verdict = "duplicate" save_analysis_report( repo_url=repo_url, issue_number=issue_number, provider=provider, model=model, verdict=verdict, body=html_report, thought=thought ) return f"Live analysis completed using {provider} ({model}). Report updated!" except Exception as e: return f"Live analysis failed: {str(e)}" # =================================================================== # AGENT FACTORY # =================================================================== def create_dashboard_agent(gemini_api_key): print("Initializing GitRepo Inspector Agent...") model = LiteLLMModel( model_id="gemini/gemini-2.5-flash", temperature=0.1, max_tokens=2048, api_key=gemini_api_key ) # Loads the clean prompt from YAML config import yaml yaml_path = os.path.join(os.path.dirname(__file__), "../config/gitrepo_agent_prompt.yaml") with open(yaml_path, "r", encoding="utf-8") as f: prompt_templates = yaml.safe_load(f) # Automatic MCP tools (keep web_search, etc.) sse_url = f"{MCP_SERVER_URL.rstrip('/')}/gradio_api/mcp/sse" mcp_client = SmolMCPClient({"url": sse_url, "transport": "sse"}) auto_tools = [ t for t in mcp_client.get_tools() if t.name not in {"analyze_github_issue", "get_issue_report", "search_issues"} ] my_tools = [get_issue_status, search_issues, trigger_live_analysis] agent = ToolCallingAgent( tools=auto_tools + my_tools, model=model, prompt_templates=prompt_templates, max_steps=12, planning_interval=4 ) print("GitRepo Inspector Agent ready!") return agent