Spaces:
Runtime error
Runtime error
| # pdf_airavata_qa.py | |
| import gradio as gr | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| import requests | |
| import os | |
| # ---------------------------- | |
| # 1. Load and split PDF | |
| # ---------------------------- | |
| def load_and_chunk(pdf_path): | |
| loader = PyPDFLoader(pdf_path) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = splitter.split_documents(docs) | |
| texts = [c.page_content for c in chunks] | |
| return texts | |
| # ---------------------------- | |
| # 2. Build embedding index | |
| # ---------------------------- | |
| def build_index(texts): | |
| embed_model = SentenceTransformer('all-mpnet-base-v2') # You can choose another model | |
| embeddings = embed_model.encode(texts, convert_to_numpy=True) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| return index, embeddings | |
| # ---------------------------- | |
| # 3. Airavata API call | |
| # ---------------------------- | |
| def call_airavata(prompt): | |
| # Example: Using HuggingFace Inference API (replace with your key or local endpoint) | |
| API_URL = "https://api-inference.huggingface.co/models/ai4bharat/airavata" # Check actual endpoint | |
| API_TOKEN = os.environ.get("HF_API_TOKEN") # Set your token in environment | |
| headers = {"Authorization": f"Bearer {API_TOKEN}"} | |
| payload = {"inputs": prompt} | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| if response.status_code == 200: | |
| result = response.json() | |
| return result[0]['generated_text'] | |
| else: | |
| return f"Error: {response.status_code} - {response.text}" | |
| # ---------------------------- | |
| # 4. PDF Q&A function | |
| # ---------------------------- | |
| texts, index = [], None | |
| def qa(pdf_file, question): | |
| global texts, index | |
| if pdf_file is not None: | |
| # Load PDF and build index | |
| texts = load_and_chunk(pdf_file.name) | |
| index, _ = build_index(texts) | |
| if not texts: | |
| return "Please upload a PDF first." | |
| # Embed the question | |
| embed_model = SentenceTransformer('all-mpnet-base-v2') | |
| q_emb = embed_model.encode([question], convert_to_numpy=True) | |
| # Retrieve top 5 relevant chunks | |
| D, I = index.search(q_emb, k=5) | |
| context = "\n\n".join([texts[i] for i in I[0]]) | |
| # Build prompt for Airavata | |
| prompt = f""" | |
| You are an AI assistant. Use the following document context to answer the question. | |
| Context: | |
| {context} | |
| Question: {question} | |
| Answer: | |
| """ | |
| # Get answer from Airavata | |
| answer = call_airavata(prompt) | |
| return answer | |
| # ---------------------------- | |
| # 5. Gradio Interface | |
| # ---------------------------- | |
| demo = gr.Interface( | |
| fn=qa, | |
| inputs=[ | |
| gr.File(label="Upload PDF"), | |
| gr.Textbox(label="Ask your question", placeholder="Type a question about the PDF...") | |
| ], | |
| outputs=gr.Textbox(label="Answer"), | |
| title="PDF Q&A with Airavata", | |
| description="Upload a PDF and ask questions. Airavata will answer based on the document." | |
| ) | |
| demo.launch() | |