import gradio as gr import random import time from transformers import AutoModelForSequenceClassification import os with gr.Blocks() as demo: # Instalar accelerate os.system("pip install accelerate") # Instalar bitsandbytes os.system("pip install -i https://pypi.org/simple/ bitsandbytes") chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) model = AutoModelForSequenceClassification.from_pretrained("./modelo") tokenizer = AutoTokenizer.from_pretrained("./tokenizer") query_pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.float16, device_map="auto", max_new_tokens=200) vectordb = Chroma.load(persist_directory="./chroma_db") def test_rag(pipeline, query): docs = vectordb.similarity_search_with_score(query) context = [] for doc,score in docs: if(score<7): doc_details = doc.to_json()['kwargs'] context.append( doc_details['page_content']) if(len(context)!=0): messages = [{"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}] prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) answer = outputs[0]["generated_text"] return answer[answer.rfind("[/INST]")+8:],docs else: return "No tengo información para responder a esta pregunta",docs def respond(message, chat_history): query = message answer, docs = test_rag(query_pipeline, query) chat_history.append((message, answer)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()