| import gradio as gr | |
| from inference_fine_tune import generate_response # your generator-based inference code | |
| # This function streams the response | |
| def chat_interface(prompt): | |
| return generate_response(prompt) # returns a generator | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Chat with the Model") | |
| with gr.Row(): | |
| inp = gr.Textbox(label="Your Prompt", placeholder="Enter your message...", lines=3) | |
| out = gr.Textbox(label="Model Response", lines=10) | |
| btn = gr.Button("Send") | |
| btn.click(chat_interface, inputs=inp, outputs=out, streaming=True) # ✅ Set streaming=True) | |
| # Launch for Hugging Face Spaces | |
| demo.launch(share=True) | |