pradyutkrishna123 commited on
Commit
aa30c3a
·
verified ·
1 Parent(s): 40cf94e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pdf_airavata_qa.py
2
+
3
+ import gradio as gr
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from sentence_transformers import SentenceTransformer
7
+ import faiss
8
+ import numpy as np
9
+ import requests
10
+ import os
11
+
12
+ # ----------------------------
13
+ # 1. Load and split PDF
14
+ # ----------------------------
15
+ def load_and_chunk(pdf_path):
16
+ loader = PyPDFLoader(pdf_path)
17
+ docs = loader.load()
18
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
19
+ chunks = splitter.split_documents(docs)
20
+ texts = [c.page_content for c in chunks]
21
+ return texts
22
+
23
+ # ----------------------------
24
+ # 2. Build embedding index
25
+ # ----------------------------
26
+ def build_index(texts):
27
+ embed_model = SentenceTransformer('all-mpnet-base-v2') # You can choose another model
28
+ embeddings = embed_model.encode(texts, convert_to_numpy=True)
29
+ index = faiss.IndexFlatL2(embeddings.shape[1])
30
+ index.add(embeddings)
31
+ return index, embeddings
32
+
33
+ # ----------------------------
34
+ # 3. Airavata API call
35
+ # ----------------------------
36
+ def call_airavata(prompt):
37
+ # Example: Using HuggingFace Inference API (replace with your key or local endpoint)
38
+ API_URL = "https://api-inference.huggingface.co/models/ai4bharat/airavata" # Check actual endpoint
39
+ API_TOKEN = os.environ.get("HF_API_TOKEN") # Set your token in environment
40
+ headers = {"Authorization": f"Bearer {API_TOKEN}"}
41
+
42
+ payload = {"inputs": prompt}
43
+ response = requests.post(API_URL, headers=headers, json=payload)
44
+ if response.status_code == 200:
45
+ result = response.json()
46
+ return result[0]['generated_text']
47
+ else:
48
+ return f"Error: {response.status_code} - {response.text}"
49
+
50
+ # ----------------------------
51
+ # 4. PDF Q&A function
52
+ # ----------------------------
53
+ texts, index = [], None
54
+
55
+ def qa(pdf_file, question):
56
+ global texts, index
57
+ if pdf_file is not None:
58
+ # Load PDF and build index
59
+ texts = load_and_chunk(pdf_file.name)
60
+ index, _ = build_index(texts)
61
+
62
+ if not texts:
63
+ return "Please upload a PDF first."
64
+
65
+ # Embed the question
66
+ embed_model = SentenceTransformer('all-mpnet-base-v2')
67
+ q_emb = embed_model.encode([question], convert_to_numpy=True)
68
+
69
+ # Retrieve top 5 relevant chunks
70
+ D, I = index.search(q_emb, k=5)
71
+ context = "\n\n".join([texts[i] for i in I[0]])
72
+
73
+ # Build prompt for Airavata
74
+ prompt = f"""
75
+ You are an AI assistant. Use the following document context to answer the question.
76
+
77
+ Context:
78
+ {context}
79
+
80
+ Question: {question}
81
+
82
+ Answer:
83
+ """
84
+ # Get answer from Airavata
85
+ answer = call_airavata(prompt)
86
+ return answer
87
+
88
+ # ----------------------------
89
+ # 5. Gradio Interface
90
+ # ----------------------------
91
+ demo = gr.Interface(
92
+ fn=qa,
93
+ inputs=[
94
+ gr.File(label="Upload PDF"),
95
+ gr.Textbox(label="Ask your question", placeholder="Type a question about the PDF...")
96
+ ],
97
+ outputs=gr.Textbox(label="Answer"),
98
+ title="PDF Q&A with Airavata",
99
+ description="Upload a PDF and ask questions. Airavata will answer based on the document."
100
+ )
101
+
102
+ demo.launch()