Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import tempfile | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.chat_models import ChatOllama | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| import base64 | |
| # Set page config | |
| st.set_page_config( | |
| page_title="EduQuery - Smart PDF Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Custom CSS for colorful UI | |
| def local_css(file_name): | |
| with open(file_name) as f: | |
| st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
| local_css("style.css") | |
| # Header with gradient | |
| st.markdown(""" | |
| <div class="header"> | |
| <h1>π EduQuery</h1> | |
| <p>Smart PDF Assistant for Students</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state | |
| if "vector_store" not in st.session_state: | |
| st.session_state.vector_store = None | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Model selection | |
| MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following | |
| # PDF Processing | |
| def process_pdf(pdf_file): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(pdf_file.getvalue()) | |
| tmp_path = tmp_file.name | |
| loader = PyPDFLoader(tmp_path) | |
| docs = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| chunks = text_splitter.split_documents(docs) | |
| embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5") | |
| vector_store = FAISS.from_documents(chunks, embeddings) | |
| os.unlink(tmp_path) | |
| return vector_store | |
| # RAG Setup | |
| def setup_qa_chain(vector_store): | |
| llm = ChatOllama(model=MODEL_NAME, temperature=0.3) | |
| custom_prompt = """ | |
| You are an expert academic assistant. Answer the question based only on the following context: | |
| {context} | |
| Question: {question} | |
| Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document". | |
| """ | |
| prompt = PromptTemplate( | |
| template=custom_prompt, | |
| input_variables=["context", "question"] | |
| ) | |
| retriever = vector_store.as_retriever(search_kwargs={"k": 3}) | |
| qa_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return qa_chain | |
| # Generate questions from chapter | |
| def generate_chapter_questions(vector_store, chapter_title): | |
| llm = ChatOllama(model=MODEL_NAME, temperature=0.7) | |
| prompt = PromptTemplate( | |
| input_variables=["chapter_title"], | |
| template=""" | |
| You are an expert educator. Generate 5 important questions and answers about '{chapter_title}' | |
| that would help students understand key concepts. Format as: | |
| Q1: [Question] | |
| A1: [Answer with page reference] | |
| Q2: [Question] | |
| A2: [Answer with page reference] | |
| ...""" | |
| ) | |
| chain = prompt | llm | StrOutputParser() | |
| return chain.invoke({"chapter_title": chapter_title}) | |
| # File upload section | |
| st.subheader("π€ Upload Your Textbook/Notes") | |
| uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False) | |
| if uploaded_file: | |
| with st.spinner("Processing PDF..."): | |
| st.session_state.vector_store = process_pdf(uploaded_file) | |
| st.success("PDF processed successfully! You can now ask questions.") | |
| # Main content columns | |
| col1, col2 = st.columns([1, 2]) | |
| # Chapter-based Q&A Generator | |
| with col1: | |
| st.subheader("π Generate Chapter Questions") | |
| chapter_title = st.text_input("Enter chapter title/section name:") | |
| if st.button("Generate Q&A") and chapter_title and st.session_state.vector_store: | |
| with st.spinner(f"Generating questions about {chapter_title}..."): | |
| questions = generate_chapter_questions( | |
| st.session_state.vector_store, | |
| chapter_title | |
| ) | |
| st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True) | |
| elif chapter_title and not st.session_state.vector_store: | |
| st.warning("Please upload a PDF first") | |
| # Chat interface | |
| with col2: | |
| st.subheader("π¬ Ask Anything About the Document") | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if prompt := st.chat_input("Your question..."): | |
| if not st.session_state.vector_store: | |
| st.warning("Please upload a PDF first") | |
| st.stop() | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking..."): | |
| qa_chain = setup_qa_chain(st.session_state.vector_store) | |
| response = qa_chain.invoke(prompt) | |
| st.markdown(response) | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown( | |
| """ | |
| <div class="footer"> | |
| <p>EduQuery - Helping students learn smarter β’ Powered by Nous-Hermes2 and LangChain</p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) |