Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from streamlit_option_menu import option_menu | |
| import fitz # PyMuPDF | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from langchain_community.llms import HuggingFaceEndpoint | |
| import requests | |
| import os | |
| import json | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="PDF Study Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS for colorful design | |
| st.markdown(""" | |
| <style> | |
| :root { | |
| --primary: #ff4b4b; | |
| --secondary: #ff9a3d; | |
| --accent1: #ffcb74; | |
| --accent2: #3a86ff; | |
| --background: #f0f2f6; | |
| --card: #ffffff; | |
| } | |
| .stApp { | |
| background: linear-gradient(135deg, var(--background) 0%, #e0e5ec 100%); | |
| } | |
| .stButton>button { | |
| background: linear-gradient(to right, var(--secondary), var(--primary)); | |
| color: white; | |
| border-radius: 12px; | |
| padding: 8px 20px; | |
| font-weight: 600; | |
| } | |
| .stTextInput>div>div>input { | |
| border-radius: 12px; | |
| border: 2px solid var(--accent2); | |
| padding: 10px; | |
| } | |
| .card { | |
| background: var(--card); | |
| border-radius: 15px; | |
| box-shadow: 0 8px 16px rgba(0,0,0,0.1); | |
| padding: 20px; | |
| margin-bottom: 20px; | |
| } | |
| .header { | |
| background: linear-gradient(to right, var(--accent2), var(--primary)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| text-align: center; | |
| margin-bottom: 30px; | |
| } | |
| .tab-content { | |
| animation: fadeIn 0.5s ease-in-out; | |
| } | |
| @keyframes fadeIn { | |
| from { opacity: 0; } | |
| to { opacity: 1; } | |
| } | |
| .spinner { | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| height: 100px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state | |
| if 'pdf_processed' not in st.session_state: | |
| st.session_state.pdf_processed = False | |
| if 'vector_store' not in st.session_state: | |
| st.session_state.vector_store = None | |
| if 'pages' not in st.session_state: | |
| st.session_state.pages = [] | |
| if 'history' not in st.session_state: | |
| st.session_state.history = [] | |
| # Load embedding model with caching | |
| def load_embedding_model(): | |
| return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| def query_hf_inference_api(prompt, model="google/flan-t5-xxl", max_tokens=200): | |
| """Query Hugging Face Inference API directly""" | |
| API_URL = f"https://api-inference.huggingface.co/models/{model}" | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": max_tokens, | |
| "temperature": 0.5, | |
| "do_sample": False | |
| } | |
| } | |
| try: | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| response.raise_for_status() | |
| result = response.json() | |
| return result[0]['generated_text'] if result else "" | |
| except Exception as e: | |
| st.error(f"Error querying model: {str(e)}") | |
| return "" | |
| def process_pdf(pdf_file): | |
| """Extract text from PDF and create vector store""" | |
| with st.spinner("π Reading PDF..."): | |
| doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
| text = "" | |
| st.session_state.pages = [] | |
| for page in doc: | |
| page_text = page.get_text() | |
| text += page_text | |
| st.session_state.pages.append(page_text) | |
| with st.spinner("π Processing text..."): | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| chunks = text_splitter.split_text(text) | |
| embeddings = load_embedding_model() | |
| st.session_state.vector_store = FAISS.from_texts(chunks, embeddings) | |
| st.session_state.pdf_processed = True | |
| st.success("β PDF processed successfully!") | |
| def ask_question(question): | |
| """Answer a question using the vector store and Hugging Face API""" | |
| if not st.session_state.vector_store: | |
| return "PDF not processed yet", [] | |
| # Find relevant passages | |
| docs = st.session_state.vector_store.similarity_search(question, k=3) | |
| context = "\n\n".join([doc.page_content for doc in docs]) | |
| # Format prompt for the model | |
| prompt = f""" | |
| Based on the following context, answer the question. | |
| If the answer isn't in the context, say "I don't know". | |
| Context: | |
| {context} | |
| Question: {question} | |
| Answer: | |
| """ | |
| # Query the model | |
| answer = query_hf_inference_api(prompt) | |
| # Add to history | |
| st.session_state.history.append({ | |
| "question": question, | |
| "answer": answer, | |
| "sources": [doc.page_content for doc in docs] | |
| }) | |
| return answer, docs | |
| def generate_qa_for_chapter(start_page, end_page): | |
| """Generate Q&A for specific chapter pages""" | |
| if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page: | |
| st.error("Invalid page range") | |
| return [] | |
| chapter_text = "\n".join(st.session_state.pages[start_page-1:end_page]) | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=800, | |
| chunk_overlap=100, | |
| length_function=len | |
| ) | |
| chunks = text_splitter.split_text(chapter_text) | |
| qa_pairs = [] | |
| with st.spinner(f"π§ Generating Q&A for pages {start_page}-{end_page}..."): | |
| for i, chunk in enumerate(chunks): | |
| if i % 2 == 0: # Generate question | |
| prompt = f"Based on this text, generate one study question: {chunk[:500]}" | |
| question = query_hf_inference_api(prompt, max_tokens=100) | |
| if question and not question.endswith("?"): | |
| question += "?" | |
| else: # Generate answer | |
| if qa_pairs: # Ensure we have a question to answer | |
| prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}" | |
| answer = query_hf_inference_api(prompt, max_tokens=200) | |
| qa_pairs[-1] = (qa_pairs[-1][0], answer) | |
| return qa_pairs | |
| # App header | |
| st.markdown("<h1 class='header'>π PDF Study Assistant</h1>", unsafe_allow_html=True) | |
| # PDF Upload Section | |
| with st.container(): | |
| st.subheader("π€ Upload Your Textbook/Notes") | |
| pdf_file = st.file_uploader("", type="pdf", label_visibility="collapsed") | |
| # Main content | |
| if pdf_file: | |
| if not st.session_state.pdf_processed: | |
| process_pdf(pdf_file) | |
| if st.session_state.pdf_processed: | |
| # Navigation tabs | |
| selected_tab = option_menu( | |
| None, | |
| ["Ask Questions", "Generate Chapter Q&A", "History"], | |
| icons=["chat", "book", "clock-history"], | |
| menu_icon="cast", | |
| default_index=0, | |
| orientation="horizontal", | |
| styles={ | |
| "container": {"padding": "0!important", "background-color": "#f9f9f9"}, | |
| "nav-link": {"font-size": "16px", "font-weight": "bold"}, | |
| "nav-link-selected": {"background": "linear-gradient(to right, #3a86ff, #ff4b4b)"}, | |
| } | |
| ) | |
| # Question Answering Tab | |
| if selected_tab == "Ask Questions": | |
| st.markdown("### π¬ Ask Questions About Your Document") | |
| user_question = st.text_input("Type your question here:", key="user_question") | |
| if user_question: | |
| with st.spinner("π€ Thinking..."): | |
| answer, docs = ask_question(user_question) | |
| st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True) | |
| with st.expander("π See source passages"): | |
| for i, doc in enumerate(docs): | |
| st.markdown(f"**Passage {i+1}:** {doc.page_content[:500]}...") | |
| # Chapter Q&A Generation Tab | |
| elif selected_tab == "Generate Chapter Q&A": | |
| st.markdown("### π Generate Q&A for Specific Chapter") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| start_page = st.number_input("Start Page", min_value=1, max_value=len(st.session_state.pages), value=1) | |
| with col2: | |
| end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages))) | |
| if st.button("Generate Q&A", key="generate_qa"): | |
| qa_pairs = generate_qa_for_chapter(start_page, end_page) | |
| if qa_pairs: | |
| st.markdown(f"<h4>π Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True) | |
| for i, (question, answer) in enumerate(qa_pairs): | |
| st.markdown(f""" | |
| <div class='card'> | |
| <b>Q{i+1}:</b> {question}<br> | |
| <b>A{i+1}:</b> {answer} | |
| </div> | |
| """, unsafe_allow_html=True) | |
| else: | |
| st.warning("No Q&A pairs generated. Try a different page range.") | |
| # History Tab | |
| elif selected_tab == "History": | |
| st.markdown("### β³ Question History") | |
| if not st.session_state.history: | |
| st.info("No questions asked yet.") | |
| else: | |
| for i, item in enumerate(reversed(st.session_state.history)): | |
| with st.expander(f"Q{i+1}: {item['question']}"): | |
| st.markdown(f"**Answer:** {item['answer']}") | |
| st.markdown("**Source Passages:**") | |
| for j, source in enumerate(item['sources']): | |
| st.markdown(f"{j+1}. {source[:500]}...") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style="text-align: center; padding: 20px;"> | |
| Built with β€οΈ for students | PDF Study Assistant v2.0 | |
| </div> | |
| """, unsafe_allow_html=True) |