import gradio as gr
from huggingface_hub import InferenceClient


def respond(
    message,
    history: list[dict[str, str]],
    hf_token: gr.OAuthToken,
):
    client = InferenceClient(token=hf_token.token, model="Qwen/Qwen3-8B", provider="featherless-ai")

    messages = []

    messages.extend(history)

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        stream=True,
    ):
        choices = message.choices
        token = ""
        if len(choices) and choices[0].delta.content:
            print("choices", choices)
            token = choices[0].delta.content
            print("token", token)

        response += token
        yield response


chatbot = gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(collapse_thinking=[("<think>", "</think>")])
)

with gr.Blocks() as demo:
    with gr.Sidebar():
        gr.LoginButton()
    chatbot.render()


if __name__ == "__main__":
    demo.launch()