Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import torch | |
| # Load the base model and LoRA adapter | |
| model_name = "unsloth/llama-3-8b-bnb-4bit" # Replace with your base model | |
| adapter_model_name = "DanielWong76/lora_model1" # Replace with your LoRA adapter | |
| # Load the base model and tokenizer | |
| base_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") | |
| model = PeftModel.from_pretrained(base_model, adapter_model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Define the Alpaca-style prompt | |
| alpaca_prompt = """### Instruction: | |
| {instruction} | |
| ### Input: | |
| {input} | |
| ### Output: | |
| {output} | |
| """ | |
| # Define the function to generate text | |
| def generate_response(instruction, input_text): | |
| # Format the prompt with the instruction and input text | |
| prompt = alpaca_prompt.format( | |
| instruction=instruction, | |
| input=input_text, | |
| output="" # Leave output blank for generation | |
| ) | |
| # Tokenize the prompt and move it to the GPU | |
| inputs = tokenizer([prompt], return_tensors="pt").to("cuda") | |
| # Generate the response from the model | |
| outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True) | |
| # Decode the output into human-readable text | |
| generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
| output_start = generated_text.find("### Output:") | |
| if output_start != -1: | |
| generated_text = generated_text[output_start + len("### Output:"):].strip() | |
| return generated_text | |
| # Gradio Interface with two inputs: Instruction and Input Text | |
| iface = gr.Interface( | |
| fn=generate_response, | |
| inputs=[ | |
| gr.Textbox(lines=2, placeholder="Enter the instruction here..."), | |
| gr.Textbox(lines=5, placeholder="Enter the input text here...") | |
| ], | |
| outputs="text", | |
| title="Alpaca-Style Instruction-Input-Output Model" | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() |