Spaces:

Seshathri
/

Coding_Test

Sleeping

App Files Files Community

Coding_Test / app.py

Seshathri

Update app.py

2274601 verified 25 days ago

raw

history blame contribute delete

3.92 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	class VibeThinkerChat:
	def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"):
	print("Loading model and tokenizer...")
	self.model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto"
	)
	self.tokenizer = AutoTokenizer.from_pretrained(
	model_path,
	trust_remote_code=True
	)
	print("Model loaded successfully!")

	def generate_response(self, prompt, temperature=0.6, max_tokens=40960, top_p=0.95):
	messages = [
	{"role": "user", "content": prompt}
	]

	text = self.tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)

	generation_config = dict(
	max_new_tokens=max_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	top_k=1
	)

	generated_ids = self.model.generate(
	model_inputs.input_ids,
	**generation_config
	)

	generated_ids = [
	output_ids[len(input_ids):]
	for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response

	# Initialize model
	chat_model = VibeThinkerChat()

	def chat_interface(message, history, temperature, max_tokens):
	try:
	response = chat_model.generate_response(
	message,
	temperature=temperature,
	max_tokens=max_tokens
	)
	return response
	except Exception as e:
	return f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="VibeThinker-1.5B Chat") as demo:
	gr.Markdown("# 🧠 VibeThinker-1.5B Chat Interface")
	gr.Markdown("A 1.5B parameter reasoning model optimized for math and coding problems.")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=500)
	msg = gr.Textbox(
	label="Your Message",
	placeholder="Ask a math or coding question...",
	lines=3
	)
	with gr.Row():
	submit = gr.Button("Submit", variant="primary")
	clear = gr.Button("Clear")

	with gr.Column(scale=1):
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.6,
	step=0.1,
	label="Temperature",
	info="Recommended: 0.6 or 1.0"
	)
	max_tokens = gr.Slider(
	minimum=512,
	maximum=40960,
	value=4096,
	step=512,
	label="Max Tokens",
	info="Maximum response length"
	)

	def user_message(user_msg, history):
	return "", history + [[user_msg, None]]

	def bot_response(history, temp, max_tok):
	user_msg = history[-1][0]
	bot_msg = chat_interface(user_msg, history, temp, max_tok)
	history[-1][1] = bot_msg
	return history

	msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot_response, [chatbot, temperature, max_tokens], chatbot
	)
	submit.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot_response, [chatbot, temperature, max_tokens], chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.queue()
	demo.launch()