Spaces:

pradyutkrishna123
/

AI_BOT

Runtime error

App Files Files Community

pradyutkrishna123 commited on 17 days ago

Commit

aa30c3a

verified ·

1 Parent(s): 40cf94e

Create app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# pdf_airavata_qa.py
+import gradio as gr
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import requests
+import os
+# ----------------------------
+# 1. Load and split PDF
+# ----------------------------
+def load_and_chunk(pdf_path):
+    loader = PyPDFLoader(pdf_path)
+    docs = loader.load()
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    chunks = splitter.split_documents(docs)
+    texts = [c.page_content for c in chunks]
+    return texts
+# ----------------------------
+# 2. Build embedding index
+# ----------------------------
+def build_index(texts):
+    embed_model = SentenceTransformer('all-mpnet-base-v2')  # You can choose another model
+    embeddings = embed_model.encode(texts, convert_to_numpy=True)
+    index = faiss.IndexFlatL2(embeddings.shape[1])
+    index.add(embeddings)
+    return index, embeddings
+# ----------------------------
+# 3. Airavata API call
+# ----------------------------
+def call_airavata(prompt):
+    # Example: Using HuggingFace Inference API (replace with your key or local endpoint)
+    API_URL = "https://api-inference.huggingface.co/models/ai4bharat/airavata"  # Check actual endpoint
+    API_TOKEN = os.environ.get("HF_API_TOKEN")  # Set your token in environment
+    headers = {"Authorization": f"Bearer {API_TOKEN}"}
+    payload = {"inputs": prompt}
+    response = requests.post(API_URL, headers=headers, json=payload)
+    if response.status_code == 200:
+        result = response.json()
+        return result[0]['generated_text']
+    else:
+        return f"Error: {response.status_code} - {response.text}"
+# ----------------------------
+# 4. PDF Q&A function
+# ----------------------------
+texts, index = [], None
+def qa(pdf_file, question):
+    global texts, index
+    if pdf_file is not None:
+        # Load PDF and build index
+        texts = load_and_chunk(pdf_file.name)
+        index, _ = build_index(texts)
+    if not texts:
+        return "Please upload a PDF first."
+    # Embed the question
+    embed_model = SentenceTransformer('all-mpnet-base-v2')
+    q_emb = embed_model.encode([question], convert_to_numpy=True)
+    # Retrieve top 5 relevant chunks
+    D, I = index.search(q_emb, k=5)
+    context = "\n\n".join([texts[i] for i in I[0]])
+    # Build prompt for Airavata
+    prompt = f"""
+You are an AI assistant. Use the following document context to answer the question.
+Context:
+{context}
+Question: {question}
+Answer:
+"""
+    # Get answer from Airavata
+    answer = call_airavata(prompt)
+    return answer
+# ----------------------------
+# 5. Gradio Interface
+# ----------------------------
+demo = gr.Interface(
+    fn=qa,
+    inputs=[
+        gr.File(label="Upload PDF"),
+        gr.Textbox(label="Ask your question", placeholder="Type a question about the PDF...")
+    ],
+    outputs=gr.Textbox(label="Answer"),
+    title="PDF Q&A with Airavata",
+    description="Upload a PDF and ask questions. Airavata will answer based on the document."
+)
+demo.launch()