Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| LLAMA_API = "http://localhost:7860/completion" # llama.cpp server URL | |
| def chat_with_model(message, history): | |
| # Build the prompt | |
| full_prompt = "\n".join([f"User: {m[0]}\nAssistant: {m[1]}" for m in history]) | |
| full_prompt += f"\nUser: {message}\nAssistant:" | |
| # Send to llama.cpp server | |
| resp = requests.post(LLAMA_API, json={ | |
| "prompt": full_prompt, | |
| "n_predict": 256, | |
| "temperature": 0.7 | |
| }) | |
| text = resp.json().get("content", "") | |
| return text | |
| chat = gr.ChatInterface( | |
| fn=chat_with_model, | |
| title="GPT-OSS 20B Chat", | |
| description="Chat with the local llama.cpp model running inside Docker", | |
| ) | |
| if __name__ == "__main__": | |
| chat.launch(server_name="0.0.0.0", server_port=7861) | |