Krishna Chaitanya Cheedella
Refactor to use FREE HuggingFace models + OpenAI instead of OpenRouter
aa61236
| import gradio as gr | |
| from backend.council_free import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream | |
| from backend.config_free import COUNCIL_MODELS, CHAIRMAN_MODEL | |
| async def ask_council(question: str, progress=gr.Progress()): | |
| """ | |
| Ask the LLM Council a question. | |
| The council consists of multiple advanced LLMs (currently: {models}) that: | |
| 1. Individually answer the question | |
| 2. Rank each other's answers | |
| 3. Synthesize a final best answer (Chairman: {chairman}) | |
| Args: | |
| question: The user's question to be discussed by the council. | |
| progress: Gradio progress tracker. | |
| Yields: | |
| Status updates and finally the synthesized answer. | |
| """.format( | |
| models=", ".join([m["id"].split("/")[-1] for m in COUNCIL_MODELS]), | |
| chairman=CHAIRMAN_MODEL["id"].split("/")[-1] | |
| ) | |
| try: | |
| buffer = "" | |
| # Stage 1: Collect individual responses | |
| progress(0.1, desc="Stage 1: Collecting individual responses...") | |
| buffer += "## π‘ Stage 1: Collecting individual responses from council members...\n\n" | |
| yield buffer | |
| stage1_results = await stage1_collect_responses(question) | |
| if not stage1_results: | |
| buffer += "\nβ The council failed to generate a response." | |
| yield buffer | |
| return | |
| # Format Stage 1 results | |
| buffer += f"### β Received {len(stage1_results)} responses:\n" | |
| for res in stage1_results: | |
| model_name = res["model"].split("/")[-1] | |
| preview = res["response"][:100].replace("\n", " ") + "..." | |
| buffer += f"- **{model_name}**: {preview}\n" | |
| buffer += "\n---\n\n" | |
| yield buffer | |
| # Stage 2: Collect rankings | |
| progress(0.4, desc="Stage 2: Council members are ranking responses...") | |
| buffer += "## π‘ Stage 2: Council members are ranking each other's responses...\n\n" | |
| yield buffer | |
| stage2_results, _ = await stage2_collect_rankings(question, stage1_results) | |
| # Format Stage 2 results | |
| buffer += "### β Rankings Collected:\n" | |
| for res in stage2_results: | |
| model_name = res["model"].split("/")[-1] | |
| # Extract just the ranking part if possible, or just say "Ranked" | |
| buffer += f"- **{model_name}** has submitted their rankings.\n" | |
| buffer += "\n---\n\n" | |
| yield buffer | |
| # Stage 3: Synthesize final answer | |
| progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...") | |
| buffer += "## π‘ Stage 3: Chairman is synthesizing the final answer...\n\n" | |
| yield buffer | |
| full_response = "" | |
| async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results): | |
| full_response += chunk | |
| yield buffer + full_response | |
| progress(1.0, desc="Complete!") | |
| if not full_response: | |
| buffer += "\nβ The council failed to generate a final synthesis." | |
| yield buffer | |
| return | |
| # Let's keep the history but mark Stage 3 as done | |
| final_buffer = buffer.replace( | |
| "## π‘ Stage 3: Chairman is synthesizing the final answer...", "## π’ Stage 3: Final Answer" | |
| ) | |
| yield final_buffer + full_response | |
| except Exception as e: | |
| yield f"β Error consulting the council: {str(e)}" | |
| description = """ | |
| An LLM Council that consults multiple AI models to answer questions. Based on [LLM Council](https://github.com/machine-theory/lm-council) by Machine Theory | |
| and Andrej Karpathy. | |
| π― **Council Members**: Mix of FREE HuggingFace models + OpenAI models | |
| - Meta Llama 3.3 70B | |
| - Qwen 2.5 72B | |
| - Mixtral 8x7B | |
| - OpenAI GPT-4o-mini | |
| - OpenAI GPT-3.5-turbo | |
| π‘ **How it works**: | |
| 1. Each model answers your question independently | |
| 2. Models rank each other's responses anonymously | |
| 3. Chairman synthesizes the best final answer | |
| β±οΈ Takes ~1-2 minutes per question (3 stages) | |
| π° Uses mostly FREE models! | |
| """ | |
| demo = gr.Interface( | |
| fn=ask_council, | |
| inputs=gr.Textbox(lines=2, placeholder="Ask the council..."), | |
| outputs=gr.Markdown(height=200), | |
| title="LLM Council MCP Server", | |
| description=description, | |
| ) | |
| if __name__ == "__main__": | |
| # Launch with mcp_server=True to expose as MCP | |
| demo.launch(mcp_server=True, show_error=True) | |