rahul7star's picture
Create app.py
e09cf5a verified
raw
history blame contribute delete
774 Bytes
import gradio as gr
import requests
LLAMA_API = "http://localhost:7860/completion" # llama.cpp server URL
def chat_with_model(message, history):
# Build the prompt
full_prompt = "\n".join([f"User: {m[0]}\nAssistant: {m[1]}" for m in history])
full_prompt += f"\nUser: {message}\nAssistant:"
# Send to llama.cpp server
resp = requests.post(LLAMA_API, json={
"prompt": full_prompt,
"n_predict": 256,
"temperature": 0.7
})
text = resp.json().get("content", "")
return text
chat = gr.ChatInterface(
fn=chat_with_model,
title="GPT-OSS 20B Chat",
description="Chat with the local llama.cpp model running inside Docker",
)
if __name__ == "__main__":
chat.launch(server_name="0.0.0.0", server_port=7861)