import gradio as gr from huggingface_hub import InferenceClient def respond( message, history: list[dict[str, str]], hf_token: gr.OAuthToken, ): client = InferenceClient(token=hf_token.token, model="Qwen/Qwen3-8B", provider="featherless-ai") messages = [] messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, stream=True, ): choices = message.choices token = "" if len(choices) and choices[0].delta.content: print("choices", choices) token = choices[0].delta.content print("token", token) response += token yield response chatbot = gr.ChatInterface( respond, chatbot=gr.Chatbot(collapse_thinking=[("", "")]) ) with gr.Blocks() as demo: with gr.Sidebar(): gr.LoginButton() chatbot.render() if __name__ == "__main__": demo.launch()