Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| class VibeThinkerChat: | |
| def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"): | |
| print("Loading model and tokenizer...") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| device_map="auto" | |
| ) | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| model_path, | |
| trust_remote_code=True | |
| ) | |
| print("Model loaded successfully!") | |
| def generate_response(self, prompt, temperature=0.6, max_tokens=40960, top_p=0.95): | |
| messages = [ | |
| {"role": "user", "content": prompt} | |
| ] | |
| text = self.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) | |
| generation_config = dict( | |
| max_new_tokens=max_tokens, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=1 | |
| ) | |
| generated_ids = self.model.generate( | |
| model_inputs.input_ids, | |
| **generation_config | |
| ) | |
| generated_ids = [ | |
| output_ids[len(input_ids):] | |
| for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
| ] | |
| response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response | |
| # Initialize model | |
| chat_model = VibeThinkerChat() | |
| def chat_interface(message, history, temperature, max_tokens): | |
| try: | |
| response = chat_model.generate_response( | |
| message, | |
| temperature=temperature, | |
| max_tokens=max_tokens | |
| ) | |
| return response | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="VibeThinker-1.5B Chat") as demo: | |
| gr.Markdown("# 🧠 VibeThinker-1.5B Chat Interface") | |
| gr.Markdown("A 1.5B parameter reasoning model optimized for math and coding problems.") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chatbot = gr.Chatbot(height=500) | |
| msg = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Ask a math or coding question...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| submit = gr.Button("Submit", variant="primary") | |
| clear = gr.Button("Clear") | |
| with gr.Column(scale=1): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=0.6, | |
| step=0.1, | |
| label="Temperature", | |
| info="Recommended: 0.6 or 1.0" | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=512, | |
| maximum=40960, | |
| value=4096, | |
| step=512, | |
| label="Max Tokens", | |
| info="Maximum response length" | |
| ) | |
| def user_message(user_msg, history): | |
| return "", history + [[user_msg, None]] | |
| def bot_response(history, temp, max_tok): | |
| user_msg = history[-1][0] | |
| bot_msg = chat_interface(user_msg, history, temp, max_tok) | |
| history[-1][1] = bot_msg | |
| return history | |
| msg.submit(user_message, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot_response, [chatbot, temperature, max_tokens], chatbot | |
| ) | |
| submit.click(user_message, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot_response, [chatbot, temperature, max_tokens], chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch() | |