Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import tempfile | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_community.llms import HuggingFaceHub | |
| import base64 | |
| # Set page config with light purple theme | |
| st.set_page_config( | |
| page_title="EduQuery - Smart PDF Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Embedded CSS for light purple UI | |
| st.markdown(""" | |
| <style> | |
| :root { | |
| --primary: #8a4fff; | |
| --secondary: #d0bcff; | |
| --light: #f3edff; | |
| --dark: #4a2b80; | |
| } | |
| body { | |
| background-color: #f8f5ff; | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| } | |
| .stApp { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| padding: 2rem; | |
| } | |
| .header { | |
| background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%); | |
| color: white; | |
| padding: 2rem; | |
| border-radius: 15px; | |
| margin-bottom: 2rem; | |
| text-align: center; | |
| box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2); | |
| } | |
| .header h1 { | |
| font-size: 2.8rem; | |
| margin-bottom: 0.5rem; | |
| } | |
| .stButton>button { | |
| background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 25px; | |
| padding: 0.75rem 2rem; | |
| font-weight: bold; | |
| font-size: 1rem; | |
| transition: all 0.3s ease; | |
| margin-top: 1rem; | |
| } | |
| .stButton>button:hover { | |
| transform: scale(1.05); | |
| box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3); | |
| } | |
| .stTextInput>div>div>input { | |
| border-radius: 25px; | |
| padding: 0.9rem 1.5rem; | |
| border: 1px solid var(--secondary); | |
| background-color: var(--light); | |
| } | |
| .stTextInput>div>div>input:focus { | |
| border-color: var(--primary); | |
| box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2); | |
| } | |
| .stChatMessage { | |
| padding: 1.5rem; | |
| border-radius: 20px; | |
| margin-bottom: 1rem; | |
| max-width: 80%; | |
| box-shadow: 0 4px 12px rgba(0,0,0,0.05); | |
| } | |
| .stChatMessage[data-testid="user"] { | |
| background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%); | |
| margin-left: auto; | |
| color: #4a2b80; | |
| } | |
| .stChatMessage[data-testid="assistant"] { | |
| background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%); | |
| margin-right: auto; | |
| color: #4a2b80; | |
| border: 1px solid var(--secondary); | |
| } | |
| .upload-area { | |
| background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%); | |
| padding: 2rem; | |
| border-radius: 15px; | |
| text-align: center; | |
| border: 2px dashed var(--primary); | |
| margin-bottom: 2rem; | |
| } | |
| .chat-area { | |
| background: white; | |
| padding: 2rem; | |
| border-radius: 15px; | |
| box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1); | |
| height: 500px; | |
| overflow-y: auto; | |
| } | |
| .footer { | |
| text-align: center; | |
| color: #8a4fff; | |
| padding-top: 2rem; | |
| font-size: 0.9rem; | |
| margin-top: 2rem; | |
| border-top: 1px solid var(--secondary); | |
| } | |
| .spinner { | |
| color: var(--primary) !important; | |
| } | |
| .stSpinner > div > div { | |
| border-top-color: var(--primary) !important; | |
| } | |
| .token-input { | |
| background: var(--light); | |
| padding: 1rem; | |
| border-radius: 15px; | |
| margin-bottom: 1rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Header with gradient | |
| st.markdown(""" | |
| <div class="header"> | |
| <h1>π EduQuery</h1> | |
| <p>Smart PDF Assistant for Students</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state | |
| if "vector_store" not in st.session_state: | |
| st.session_state.vector_store = None | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if "qa_chain" not in st.session_state: | |
| st.session_state.qa_chain = None | |
| # PDF Processing | |
| def process_pdf(pdf_file): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(pdf_file.getvalue()) | |
| tmp_path = tmp_file.name | |
| loader = PyPDFLoader(tmp_path) | |
| pages = loader.load_and_split() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=800, | |
| chunk_overlap=150 | |
| ) | |
| chunks = text_splitter.split_documents(pages) | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_store = FAISS.from_documents(chunks, embeddings) | |
| os.unlink(tmp_path) | |
| return vector_store | |
| # Setup QA Chain | |
| def setup_qa_chain(vector_store, hf_token=None): | |
| # Use free open-source model that doesn't require authentication | |
| repo_id = "google/flan-t5-xxl" # Free model that doesn't require token | |
| try: | |
| if hf_token: | |
| llm = HuggingFaceHub( | |
| repo_id=repo_id, | |
| huggingfacehub_api_token=hf_token, | |
| model_kwargs={"temperature": 0.5, "max_new_tokens": 500} | |
| ) | |
| else: | |
| # Try without token (works for some open models) | |
| llm = HuggingFaceHub( | |
| repo_id=repo_id, | |
| model_kwargs={"temperature": 0.5, "max_new_tokens": 500} | |
| ) | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| return None | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| return_messages=True | |
| ) | |
| qa_chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=vector_store.as_retriever(search_kwargs={"k": 3}), | |
| memory=memory, | |
| chain_type="stuff" | |
| ) | |
| return qa_chain | |
| # Hugging Face Token Input | |
| st.markdown(""" | |
| <div class="token-input"> | |
| <h3>π Hugging Face Token (Optional)</h3> | |
| <p>For better models like Mistral, enter your <a href="https://huggingface.co/settings/tokens" target="_blank">Hugging Face token</a></p> | |
| """, unsafe_allow_html=True) | |
| hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx") | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # File upload section | |
| st.markdown(""" | |
| <div class="upload-area"> | |
| <h3>π€ Upload Your Textbook/Notes</h3> | |
| """, unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed") | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| if uploaded_file: | |
| with st.spinner("Processing PDF..."): | |
| st.session_state.vector_store = process_pdf(uploaded_file) | |
| st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token) | |
| if st.session_state.qa_chain: | |
| st.success("PDF processed successfully! You can now ask questions.") | |
| # Chat interface | |
| st.markdown(""" | |
| <div class="chat-area"> | |
| <h3>π¬ Ask Anything About the Document</h3> | |
| """, unsafe_allow_html=True) | |
| # Display chat history | |
| for message in st.session_state.chat_history: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # User input | |
| if prompt := st.chat_input("Your question..."): | |
| if not st.session_state.vector_store: | |
| st.warning("Please upload a PDF first") | |
| st.stop() | |
| if not st.session_state.qa_chain: | |
| st.error("Model not initialized. Please check your Hugging Face token or try again.") | |
| st.stop() | |
| # Add user message to chat history | |
| st.session_state.chat_history.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| # Get assistant response | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking..."): | |
| try: | |
| response = st.session_state.qa_chain({"question": prompt}) | |
| answer = response["answer"] | |
| except Exception as e: | |
| answer = f"Error: {str(e)}" | |
| st.markdown(answer) | |
| # Add assistant response to chat history | |
| st.session_state.chat_history.append({"role": "assistant", "content": answer}) | |
| st.markdown("</div>", unsafe_allow_html=True) | |
| # Footer | |
| st.markdown(""" | |
| <div class="footer"> | |
| <p>EduQuery - Helping students learn smarter β’ Powered by Flan-T5 and LangChain</p> | |
| </div> | |
| """, unsafe_allow_html=True) |