Spaces:

zade-frontier
/

andrej-karpathy-llm-council

Running

Krishna Chaitanya Cheedella

Refactor to use FREE HuggingFace models + OpenAI instead of OpenRouter

aa61236 19 days ago

4.39 kB

	import gradio as gr
	from backend.council_free import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream
	from backend.config_free import COUNCIL_MODELS, CHAIRMAN_MODEL


	async def ask_council(question: str, progress=gr.Progress()):
	"""
	Ask the LLM Council a question.

	The council consists of multiple advanced LLMs (currently: {models}) that:
	1. Individually answer the question
	2. Rank each other's answers
	3. Synthesize a final best answer (Chairman: {chairman})

	Args:
	question: The user's question to be discussed by the council.
	progress: Gradio progress tracker.

	Yields:
	Status updates and finally the synthesized answer.
	""".format(
	models=", ".join([m["id"].split("/")[-1] for m in COUNCIL_MODELS]),
	chairman=CHAIRMAN_MODEL["id"].split("/")[-1]
	)

	try:
	buffer = ""

	# Stage 1: Collect individual responses
	progress(0.1, desc="Stage 1: Collecting individual responses...")
	buffer += "## 🟡 Stage 1: Collecting individual responses from council members...\n\n"
	yield buffer

	stage1_results = await stage1_collect_responses(question)

	if not stage1_results:
	buffer += "\n❌ The council failed to generate a response."
	yield buffer
	return

	# Format Stage 1 results
	buffer += f"### ✅ Received {len(stage1_results)} responses:\n"
	for res in stage1_results:
	model_name = res["model"].split("/")[-1]
	preview = res["response"][:100].replace("\n", " ") + "..."
	buffer += f"- {model_name}: {preview}\n"
	buffer += "\n---\n\n"
	yield buffer

	# Stage 2: Collect rankings
	progress(0.4, desc="Stage 2: Council members are ranking responses...")
	buffer += "## 🟡 Stage 2: Council members are ranking each other's responses...\n\n"
	yield buffer

	stage2_results, _ = await stage2_collect_rankings(question, stage1_results)

	# Format Stage 2 results
	buffer += "### ✅ Rankings Collected:\n"
	for res in stage2_results:
	model_name = res["model"].split("/")[-1]
	# Extract just the ranking part if possible, or just say "Ranked"
	buffer += f"- {model_name} has submitted their rankings.\n"
	buffer += "\n---\n\n"
	yield buffer

	# Stage 3: Synthesize final answer
	progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...")
	buffer += "## 🟡 Stage 3: Chairman is synthesizing the final answer...\n\n"
	yield buffer

	full_response = ""
	async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results):
	full_response += chunk
	yield buffer + full_response

	progress(1.0, desc="Complete!")

	if not full_response:
	buffer += "\n❌ The council failed to generate a final synthesis."
	yield buffer
	return

	# Let's keep the history but mark Stage 3 as done
	final_buffer = buffer.replace(
	"## 🟡 Stage 3: Chairman is synthesizing the final answer...", "## 🟢 Stage 3: Final Answer"
	)
	yield final_buffer + full_response

	except Exception as e:
	yield f"❌ Error consulting the council: {str(e)}"


	description = """
	An LLM Council that consults multiple AI models to answer questions. Based on [LLM Council](https://github.com/machine-theory/lm-council) by Machine Theory
	and Andrej Karpathy.

	🎯 Council Members: Mix of FREE HuggingFace models + OpenAI models
	- Meta Llama 3.3 70B
	- Qwen 2.5 72B
	- Mixtral 8x7B
	- OpenAI GPT-4o-mini
	- OpenAI GPT-3.5-turbo

	💡 How it works:
	1. Each model answers your question independently
	2. Models rank each other's responses anonymously
	3. Chairman synthesizes the best final answer

	⏱️ Takes ~1-2 minutes per question (3 stages)
	💰 Uses mostly FREE models!
	"""

	demo = gr.Interface(
	fn=ask_council,
	inputs=gr.Textbox(lines=2, placeholder="Ask the council..."),
	outputs=gr.Markdown(height=200),
	title="LLM Council MCP Server",
	description=description,
	)

	if __name__ == "__main__":
	# Launch with mcp_server=True to expose as MCP
	demo.launch(mcp_server=True, show_error=True)