opensource / app.py
AI-RESEARCHER-2024's picture
Create app.py
6cc0a7e verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import spaces
import os
# Model configuration
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" # Small, efficient open-source model
MAX_NEW_TOKENS = 512
TEMPERATURE = 0.7
TOP_P = 0.95
# Initialize model and tokenizer
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True
)
# Create text generation pipeline
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
@spaces.GPU(duration=60) # Request GPU for 60 seconds per call (for Hugging Face Spaces)
def generate_response(message, history, system_message, max_tokens, temperature, top_p):
"""Generate a response using the LLM."""
# Build conversation context
messages = []
# Add system message if provided
if system_message:
messages.append({"role": "system", "content": system_message})
# Add conversation history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Generate response
try:
outputs = pipe(
text,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
return_full_text=False
)
response = outputs[0]["generated_text"]
return response
except Exception as e:
return f"Error generating response: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Open Source LLM Chat", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# Open Source LLM Chat
This app uses **{model}** - an open-source language model.
### Features:
- Interactive chat interface
- Adjustable generation parameters
- Custom system messages
- Deployed on Hugging Face Spaces
""".format(model=MODEL_NAME)
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Chat History",
height=500,
bubble_full_width=False
)
msg = gr.Textbox(
label="Your Message",
placeholder="Type your message here and press Enter...",
lines=2
)
with gr.Row():
submit = gr.Button("Send", variant="primary")
clear = gr.Button("Clear Chat")
with gr.Column(scale=1):
gr.Markdown("### Settings")
system_message = gr.Textbox(
label="System Message (Optional)",
placeholder="You are a helpful AI assistant...",
lines=3
)
max_tokens = gr.Slider(
minimum=50,
maximum=2048,
value=MAX_NEW_TOKENS,
step=50,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=TEMPERATURE,
step=0.1,
label="Temperature (Creativity)"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=TOP_P,
step=0.05,
label="Top-p (Nucleus Sampling)"
)
gr.Markdown(
"""
### Parameter Guide:
- **Max Tokens**: Maximum length of response
- **Temperature**: Higher = more creative, Lower = more focused
- **Top-p**: Controls diversity of word choices
"""
)
gr.Markdown(
"""
---
### Tips:
- Start with a clear, specific question
- Adjust temperature for creative vs. factual responses
- Use system messages to set the AI's behavior
- Clear chat if responses become inconsistent
"""
)
# Handle message submission
def user_submit(message, history):
return "", history + [[message, None]]
def bot_respond(history, system_message, max_tokens, temperature, top_p):
if len(history) == 0 or history[-1][1] is not None:
return history
message = history[-1][0]
bot_message = generate_response(
message,
history[:-1], # Don't include the current message in history
system_message,
max_tokens,
temperature,
top_p
)
history[-1][1] = bot_message
return history
# Wire up the interface
msg.submit(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_message, max_tokens, temperature, top_p],
chatbot
)
submit.click(
user_submit,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot_respond,
[chatbot, system_message, max_tokens, temperature, top_p],
chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
# Launch the app
if __name__ == "__main__":
demo.launch(
share=False,
show_error=True
)