gpt-oss-20b-docker-gradio

Runtime error

Create app.py

e09cf5a verified about 1 month ago

774 Bytes

	import gradio as gr
	import requests

	LLAMA_API = "http://localhost:7860/completion" # llama.cpp server URL

	def chat_with_model(message, history):
	# Build the prompt
	full_prompt = "\n".join([f"User: {m[0]}\nAssistant: {m[1]}" for m in history])
	full_prompt += f"\nUser: {message}\nAssistant:"

	# Send to llama.cpp server
	resp = requests.post(LLAMA_API, json={
	"prompt": full_prompt,
	"n_predict": 256,
	"temperature": 0.7
	})
	text = resp.json().get("content", "")
	return text

	chat = gr.ChatInterface(
	fn=chat_with_model,
	title="GPT-OSS 20B Chat",
	description="Chat with the local llama.cpp model running inside Docker",
	)

	if __name__ == "__main__":
	chat.launch(server_name="0.0.0.0", server_port=7861)