import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import spaces import os # Model configuration MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" # Small, efficient open-source model MAX_NEW_TOKENS = 512 TEMPERATURE = 0.7 TOP_P = 0.95 # Initialize model and tokenizer print(f"Loading model: {MODEL_NAME}") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", trust_remote_code=True ) # Create text generation pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) @spaces.GPU(duration=60) # Request GPU for 60 seconds per call (for Hugging Face Spaces) def generate_response(message, history, system_message, max_tokens, temperature, top_p): """Generate a response using the LLM.""" # Build conversation context messages = [] # Add system message if provided if system_message: messages.append({"role": "system", "content": system_message}) # Add conversation history for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Apply chat template text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Generate response try: outputs = pipe( text, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, return_full_text=False ) response = outputs[0]["generated_text"] return response except Exception as e: return f"Error generating response: {str(e)}" # Create Gradio interface with gr.Blocks(title="Open Source LLM Chat", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # Open Source LLM Chat This app uses **{model}** - an open-source language model. ### Features: - Interactive chat interface - Adjustable generation parameters - Custom system messages - Deployed on Hugging Face Spaces """.format(model=MODEL_NAME) ) with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot( label="Chat History", height=500, bubble_full_width=False ) msg = gr.Textbox( label="Your Message", placeholder="Type your message here and press Enter...", lines=2 ) with gr.Row(): submit = gr.Button("Send", variant="primary") clear = gr.Button("Clear Chat") with gr.Column(scale=1): gr.Markdown("### Settings") system_message = gr.Textbox( label="System Message (Optional)", placeholder="You are a helpful AI assistant...", lines=3 ) max_tokens = gr.Slider( minimum=50, maximum=2048, value=MAX_NEW_TOKENS, step=50, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=2.0, value=TEMPERATURE, step=0.1, label="Temperature (Creativity)" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=TOP_P, step=0.05, label="Top-p (Nucleus Sampling)" ) gr.Markdown( """ ### Parameter Guide: - **Max Tokens**: Maximum length of response - **Temperature**: Higher = more creative, Lower = more focused - **Top-p**: Controls diversity of word choices """ ) gr.Markdown( """ --- ### Tips: - Start with a clear, specific question - Adjust temperature for creative vs. factual responses - Use system messages to set the AI's behavior - Clear chat if responses become inconsistent """ ) # Handle message submission def user_submit(message, history): return "", history + [[message, None]] def bot_respond(history, system_message, max_tokens, temperature, top_p): if len(history) == 0 or history[-1][1] is not None: return history message = history[-1][0] bot_message = generate_response( message, history[:-1], # Don't include the current message in history system_message, max_tokens, temperature, top_p ) history[-1][1] = bot_message return history # Wire up the interface msg.submit( user_submit, [msg, chatbot], [msg, chatbot], queue=False ).then( bot_respond, [chatbot, system_message, max_tokens, temperature, top_p], chatbot ) submit.click( user_submit, [msg, chatbot], [msg, chatbot], queue=False ).then( bot_respond, [chatbot, system_message, max_tokens, temperature, top_p], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) # Launch the app if __name__ == "__main__": demo.launch( share=False, show_error=True )