Spaces:

AI-RESEARCHER-2024
/

Computing-Chatbot

Sleeping

AI-RESEARCHER-2024 commited on Oct 31, 2024

Commit

0ddbfad

verified ·

1 Parent(s): 5d14fe6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,21 +2,31 @@ import os
 import gradio as gr
 from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from llama_index.llms.ollama import Ollama
-# Set up Ollama
-os.system('curl -fsSL https://ollama.com/install.sh | sh')
-os.system('ollama serve &')
-os.system('sleep 5')
-os.system('ollama pull llama3.2')
-os.system('ollama pull llama3.2')
 # Initialize embeddings and LLM
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
-llama = Ollama(
-    model="llama3.2",
-    request_timeout=1000,
-)
 def initialize_index():
     """Initialize the vector store index from PDF files in the data directory"""
@@ -34,7 +44,7 @@ def initialize_index():
     )
     # Return query engine with Llama
-    return index.as_query_engine(llm=llama)
 # Initialize the query engine at startup
 query_engine = initialize_index()

 import gradio as gr
 from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from llama_index.llms.llama_cpp import LlamaCPP
+from llama_index.llms.llama_cpp.llama_utils import (
+    messages_to_prompt,
+    completion_to_prompt,
+)
+model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'
+llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    model_url=model_url,
+    temperature=0.1,
+    max_new_tokens=256,
+    context_window=2048,
+    # kwargs to pass to __call__()
+    generate_kwargs={},
+    # kwargs to pass to __init__()
+    # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": 1},
+    # transform inputs into Llama2 format
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
 # Initialize embeddings and LLM
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
 def initialize_index():
     """Initialize the vector store index from PDF files in the data directory"""
     )
     # Return query engine with Llama
+    return index.as_query_engine(llm=llm)
 # Initialize the query engine at startup
 query_engine = initialize_index()