Spaces:

freddyaboulton
/

collapse-thinking-by-default

Sleeping

Update app.py

8ea488e verified about 2 months ago

1.01 kB

	import gradio as gr
	from huggingface_hub import InferenceClient


	def respond(
	message,
	history: list[dict[str, str]],
	hf_token: gr.OAuthToken,
	):
	client = InferenceClient(token=hf_token.token, model="Qwen/Qwen3-8B", provider="featherless-ai")

	messages = []

	messages.extend(history)

	messages.append({"role": "user", "content": message})

	response = ""

	for message in client.chat_completion(
	messages,
	stream=True,
	):
	choices = message.choices
	token = ""
	if len(choices) and choices[0].delta.content:
	print("choices", choices)
	token = choices[0].delta.content
	print("token", token)

	response += token
	yield response


	chatbot = gr.ChatInterface(
	respond,
	chatbot=gr.Chatbot(collapse_thinking=[("<think>", "</think>")])
	)

	with gr.Blocks() as demo:
	with gr.Sidebar():
	gr.LoginButton()
	chatbot.render()


	if __name__ == "__main__":
	demo.launch()