Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

App Files Files Community

sunbal7 commited on Jun 19

Commit

351c135

verified ·

1 Parent(s): 245f6f3

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -35

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain_community.vectorstores import FAISS
 import requests
 import os
 import time
 # Page configuration
 st.set_page_config(
@@ -70,12 +71,21 @@ st.markdown("""
         background-color: #ffebee;
         border-left: 4px solid #f44336;
         padding: 10px;
     }
     .info {
         background-color: #e3f2fd;
         border-left: 4px solid #2196f3;
         padding: 10px;
     }
     @keyframes fadeIn {
@@ -94,17 +104,32 @@ if 'pages' not in st.session_state:
     st.session_state.pages = []
 if 'history' not in st.session_state:
     st.session_state.history = []
 # Load embedding model with caching
 @st.cache_resource
 def load_embedding_model():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 def query_hf_inference_api(prompt, max_tokens=200):
-    """Query Hugging Face Inference API with error handling and retry"""
-    MODEL = "google/flan-t5-large"  # Smaller, freely accessible model
     API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
     headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
     payload = {
         "inputs": prompt,
         "parameters": {
@@ -122,22 +147,30 @@ def query_hf_inference_api(prompt, max_tokens=200):
             return result[0]['generated_text'] if result else ""
         elif response.status_code == 403:
-            st.error("403 Forbidden: Please check your Hugging Face API token and model access")
-            st.markdown("""
-            <div class="info">
-                <h4>How to fix this:</h4>
                 <ol>
-                    <li>Get your free Hugging Face token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
-                    <li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
-                    <li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">https://huggingface.co/google/flan-t5-large</a></li>
                 </ol>
             </div>
-            """, unsafe_allow_html=True)
             return ""
         elif response.status_code == 429:
             st.warning("Rate limit exceeded. Waiting and retrying...")
-            time.sleep(5)  # Wait 5 seconds before retrying
             return query_hf_inference_api(prompt, max_tokens)
         else:
@@ -173,14 +206,14 @@ def process_pdf(pdf_file):
     st.session_state.pdf_processed = True
     st.success("✅ PDF processed successfully!")
-def ask_question(question):
     """Answer a question using the vector store and Hugging Face API"""
     if not st.session_state.vector_store:
         return "PDF not processed yet", []
     # Find relevant passages
     docs = st.session_state.vector_store.similarity_search(question, k=3)
-    context = "\n\n".join([doc.page_content for doc in docs])
     # Format prompt for the model
     prompt = f"""
@@ -195,18 +228,19 @@ def ask_question(question):
     """
     # Query the model
-    answer = query_hf_inference_api(prompt)
     # Add to history
     st.session_state.history.append({
         "question": question,
         "answer": answer,
-        "sources": [doc.page_content for doc in docs]
     })
     return answer, docs
-def generate_qa_for_chapter(start_page, end_page):
     """Generate Q&A for specific chapter pages"""
     if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page:
         st.error("Invalid page range")
@@ -227,7 +261,7 @@ def generate_qa_for_chapter(start_page, end_page):
         for i, chunk in enumerate(chunks):
             if i % 2 == 0:  # Generate question
                 prompt = f"Based on this text, generate one study question: {chunk[:500]}"
-                question = query_hf_inference_api(prompt, max_tokens=100)
                 if question and not question.endswith("?"):
                     question += "?"
                 if question:  # Only add if we got a valid question
@@ -235,7 +269,7 @@ def generate_qa_for_chapter(start_page, end_page):
             else:  # Generate answer
                 if qa_pairs:  # Ensure we have a question to answer
                     prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
-                    answer = query_hf_inference_api(prompt, max_tokens=200)
                     qa_pairs[-1] = (qa_pairs[-1][0], answer)
     return qa_pairs
@@ -243,19 +277,52 @@ def generate_qa_for_chapter(start_page, end_page):
 # App header
 st.markdown("<h1 class='header'>📚 PDF Study Assistant</h1>", unsafe_allow_html=True)
-# API Token Instructions
-if not os.getenv("HF_API_KEY"):
-    st.markdown("""
-    <div class="info">
-        <h4>Setup Required:</h4>
-        <p>This app requires a free Hugging Face API token to work:</p>
-        <ol>
-            <li>Get your token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
-            <li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
-            <li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">google/flan-t5-large</a></li>
-        </ol>
-    </div>
-    """, unsafe_allow_html=True)
 # PDF Upload Section
 with st.container():
@@ -268,6 +335,15 @@ if pdf_file:
         process_pdf(pdf_file)
     if st.session_state.pdf_processed:
         # Navigation tabs
         selected_tab = option_menu(
             None,
@@ -290,7 +366,7 @@ if pdf_file:
             if user_question:
                 with st.spinner("🤔 Thinking..."):
-                    answer, docs = ask_question(user_question)
                     if answer:
                         st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
@@ -308,7 +384,7 @@ if pdf_file:
                 end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages)))
             if st.button("Generate Q&A", key="generate_qa"):
-                qa_pairs = generate_qa_for_chapter(start_page, end_page)
                 if qa_pairs:
                     st.markdown(f"<h4>📖 Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True)
@@ -329,7 +405,7 @@ if pdf_file:
                 st.info("No questions asked yet.")
             else:
                 for i, item in enumerate(reversed(st.session_state.history)):
-                    with st.expander(f"Q{i+1}: {item['question']}"):
                         st.markdown(f"**Answer:** {item['answer']}")
                         st.markdown("**Source Passages:**")
                         for j, source in enumerate(item['sources']):
@@ -339,6 +415,6 @@ if pdf_file:
 st.markdown("---")
 st.markdown("""
 <div style="text-align: center; padding: 20px;">
-    Built with ❤️ for students | PDF Study Assistant v3.0
 </div>
 """, unsafe_allow_html=True)

 import requests
 import os
 import time
+import base64
 # Page configuration
 st.set_page_config(
         background-color: #ffebee;
         border-left: 4px solid #f44336;
         padding: 10px;
+        margin: 10px 0;
     }
     .info {
         background-color: #e3f2fd;
         border-left: 4px solid #2196f3;
         padding: 10px;
+        margin: 10px 0;
+    }
+    .success {
+        background-color: #e8f5e9;
+        border-left: 4px solid #4caf50;
+        padding: 10px;
+        margin: 10px 0;
     }
     @keyframes fadeIn {
     st.session_state.pages = []
 if 'history' not in st.session_state:
     st.session_state.history = []
+if 'token_valid' not in st.session_state:
+    st.session_state.token_valid = None
 # Load embedding model with caching
 @st.cache_resource
 def load_embedding_model():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+def check_token_validity():
+    """Check if the token is valid by making a simple API call"""
+    if not os.getenv("HF_API_KEY"):
+        return False
+    try:
+        headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
+        response = requests.get("https://huggingface.co/api/whoami", headers=headers)
+        return response.status_code == 200
+    except:
+        return False
 def query_hf_inference_api(prompt, max_tokens=200):
+    """Query Hugging Face Inference API with better error handling"""
+    MODEL = "google/flan-t5-base"  # Switch to base model for better accessibility
     API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
     headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
     payload = {
         "inputs": prompt,
         "parameters": {
             return result[0]['generated_text'] if result else ""
         elif response.status_code == 403:
+            # Detailed debug information
+            st.session_state.token_valid = check_token_validity()
+            debug_info = f"""
+            <div class="error">
+                <h4>403 Forbidden Error</h4>
+                <p>Token is set: <strong>{'Yes' if os.getenv('HF_API_KEY') else 'No'}</strong></p>
+                <p>Token valid: <strong>{'Yes' if st.session_state.token_valid else 'No'}</strong></p>
+                <p>Model: {MODEL}</p>
+                <p>Possible solutions:</p>
                 <ol>
+                    <li>Visit the <a href="https://huggingface.co/google/flan-t5-base" target="_blank">model page</a> and click "Agree and access repository"</li>
+                    <li>Ensure your token has "read" permissions</li>
+                    <li>Wait 5-10 minutes after accepting terms</li>
+                    <li>Try a different model using the dropdown below</li>
                 </ol>
             </div>
+            """
+            st.markdown(debug_info, unsafe_allow_html=True)
             return ""
         elif response.status_code == 429:
             st.warning("Rate limit exceeded. Waiting and retrying...")
+            time.sleep(3)
             return query_hf_inference_api(prompt, max_tokens)
         else:
     st.session_state.pdf_processed = True
     st.success("✅ PDF processed successfully!")
+def ask_question(question, model_choice):
     """Answer a question using the vector store and Hugging Face API"""
     if not st.session_state.vector_store:
         return "PDF not processed yet", []
     # Find relevant passages
     docs = st.session_state.vector_store.similarity_search(question, k=3)
+    context = "\n\n".join([doc.page_content[:500] for doc in docs])
     # Format prompt for the model
     prompt = f"""
     """
     # Query the model
+    answer = query_hf_inference_api(prompt, model=model_choice)
     # Add to history
     st.session_state.history.append({
         "question": question,
         "answer": answer,
+        "sources": [doc.page_content for doc in docs],
+        "model": model_choice
     })
     return answer, docs
+def generate_qa_for_chapter(start_page, end_page, model_choice):
     """Generate Q&A for specific chapter pages"""
     if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page:
         st.error("Invalid page range")
         for i, chunk in enumerate(chunks):
             if i % 2 == 0:  # Generate question
                 prompt = f"Based on this text, generate one study question: {chunk[:500]}"
+                question = query_hf_inference_api(prompt, model=model_choice, max_tokens=100)
                 if question and not question.endswith("?"):
                     question += "?"
                 if question:  # Only add if we got a valid question
             else:  # Generate answer
                 if qa_pairs:  # Ensure we have a question to answer
                     prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
+                    answer = query_hf_inference_api(prompt, model=model_choice, max_tokens=200)
                     qa_pairs[-1] = (qa_pairs[-1][0], answer)
     return qa_pairs
 # App header
 st.markdown("<h1 class='header'>📚 PDF Study Assistant</h1>", unsafe_allow_html=True)
+# Model selection
+MODEL_OPTIONS = {
+    "google/flan-t5-base": "T5 Base (Recommended)",
+    "google/flan-t5-large": "T5 Large (Requires Auth)",
+    "mrm8488/t5-base-finetuned-question-generation-ap": "Question Generation",
+    "declare-lab/flan-alpaca-base": "Alpaca Base"
+}
+# Debug info panel
+with st.expander("🔧 Debug Information", expanded=False):
+    st.subheader("Hugging Face Token Status")
+    # Check token validity
+    token_valid = check_token_validity()
+    st.session_state.token_valid = token_valid
+    col1, col2 = st.columns(2)
+    with col1:
+        st.write(f"Token is set: {'✅ Yes' if os.getenv('HF_API_KEY') else '❌ No'}")
+    with col2:
+        st.write(f"Token is valid: {'✅ Yes' if token_valid else '❌ No'}")
+    if os.getenv('HF_API_KEY'):
+        st.markdown("""
+        <div class="info">
+            <p>Your token is set but we're still having issues. Try these steps:</p>
+            <ol>
+                <li>Visit the <a href="https://huggingface.co/google/flan-t5-base" target="_blank">model page</a></li>
+                <li>Click "Agree and access repository"</li>
+                <li>Wait 5-10 minutes for changes to propagate</li>
+                <li>Try a different model from the dropdown</li>
+            </ol>
+        </div>
+        """, unsafe_allow_html=True)
+    else:
+        st.markdown("""
+        <div class="error">
+            <p>Token is not set! Add it in your Space secrets:</p>
+            <ol>
+                <li>Go to your Space → Settings → Secrets</li>
+                <li>Add <code>HF_API_KEY</code> with your token</li>
+                <li>Redeploy the Space</li>
+            </ol>
+            <p>Get your token: <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></p>
+        </div>
+        """, unsafe_allow_html=True)
 # PDF Upload Section
 with st.container():
         process_pdf(pdf_file)
     if st.session_state.pdf_processed:
+        # Model selection
+        st.subheader("Model Selection")
+        model_choice = st.selectbox(
+            "Choose AI model:",
+            options=list(MODEL_OPTIONS.keys()),
+            format_func=lambda x: MODEL_OPTIONS[x],
+            help="Some models require accepting terms on Hugging Face"
+        )
         # Navigation tabs
         selected_tab = option_menu(
             None,
             if user_question:
                 with st.spinner("🤔 Thinking..."):
+                    answer, docs = ask_question(user_question, model_choice)
                     if answer:
                         st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
                 end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages)))
             if st.button("Generate Q&A", key="generate_qa"):
+                qa_pairs = generate_qa_for_chapter(start_page, end_page, model_choice)
                 if qa_pairs:
                     st.markdown(f"<h4>📖 Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True)
                 st.info("No questions asked yet.")
             else:
                 for i, item in enumerate(reversed(st.session_state.history)):
+                    with st.expander(f"Q{i+1}: {item['question']} ({MODEL_OPTIONS.get(item['model'], item['model'])})"):
                         st.markdown(f"**Answer:** {item['answer']}")
                         st.markdown("**Source Passages:**")
                         for j, source in enumerate(item['sources']):
 st.markdown("---")
 st.markdown("""
 <div style="text-align: center; padding: 20px;">
+    Built with ❤️ for students | PDF Study Assistant v4.0
 </div>
 """, unsafe_allow_html=True)