Spaces:
Runtime error
Runtime error
| #!pip install -U "transformers==4.40.0" --upgrade | |
| #!pip install -i https://pypi.org/simple/ bitsandbytes | |
| #!pip install accelerate | |
| import transformers | |
| import torch | |
| model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model_id, | |
| model_kwargs={ | |
| "torch_dtype": torch.float16, | |
| "quantization_config": {"load_in_4bit": True}, | |
| "low_cpu_mem_usage": True, | |
| }, | |
| ) | |
| messages = [ | |
| {"role" : "system", | |
| "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, | |
| {"role" : "user", | |
| "content": """hi there!"""}, | |
| ] | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| print(outputs[0]["generated_text"][len(prompt):]) | |
| #!pip install gradio | |
| import gradio as gr | |
| messages = [{"role" : "system", | |
| "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, | |
| {"role" : "user", | |
| "content": """hi there!"""},] | |
| def add_text(history, text): | |
| global messages #message[list] is defined globally | |
| history = history + [(text,'')] | |
| messages = messages + [{"role":'user', 'content': text}] | |
| return history, '' | |
| def generate(history): | |
| global messages | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| response_msg = outputs[0]["generated_text"][len(prompt):] | |
| for char in response_msg: | |
| history[-1][1] += char | |
| yield history | |
| pass | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot(value=[], elem_id="chatbot") | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| ) | |
| txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| generate, inputs =[chatbot,],outputs = chatbot,) | |
| demo.queue() | |
| demo.launch(debug=True) | |
| ''' | |
| import os | |
| from groq import Groq | |
| import gradio as gr | |
| client = Groq(api_key = os.environ.get("GROQ_API_KEY"), ) | |
| system_prompt = { | |
| "role": "system", | |
| "content": | |
| "You are a useful assistant. I would appreciate it if you reply with efficient answers. " | |
| } | |
| async def chat_groq(message, history): | |
| messages = [system_prompt] | |
| for msg in history: | |
| messages.append({"role": "user", "content": str(msg[0])}) | |
| messages.append({"role": "assistant", "content": str(msg[1])}) | |
| messages.append({"role": "user", "content": str (message)}) | |
| response_content = '' | |
| stream = client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=messages, | |
| max_tokens=1024, | |
| temperature=1.3, | |
| stream=True | |
| ) | |
| for chunk in stream: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| response_content += chunk. choices[0].delta.content | |
| yield response_content | |
| with gr. Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: | |
| gr.ChatInterface(chat_groq, | |
| clear_btn=None, | |
| undo_btn=None, | |
| retry_btn=None, | |
| ) | |
| demo.queue() | |
| demo.launch()''' |