Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

App Files Files Community

PDFQueryApplication / app.py

sunbal7

Update app.py

66d14e0 verified 6 months ago

raw

history blame contribute delete

8.16 kB

	import streamlit as st
	import os
	import tempfile
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory
	from langchain_community.llms import HuggingFaceHub
	import base64

	# Set page config with light purple theme
	st.set_page_config(
	page_title="EduQuery - Smart PDF Assistant",
	page_icon="📚",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Embedded CSS for light purple UI
	st.markdown("""
	<style>
	:root {
	--primary: #8a4fff;
	--secondary: #d0bcff;
	--light: #f3edff;
	--dark: #4a2b80;
	}

	body {
	background-color: #f8f5ff;
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	}

	.stApp {
	max-width: 1200px;
	margin: 0 auto;
	padding: 2rem;
	}

	.header {
	background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
	color: white;
	padding: 2rem;
	border-radius: 15px;
	margin-bottom: 2rem;
	text-align: center;
	box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2);
	}

	.header h1 {
	font-size: 2.8rem;
	margin-bottom: 0.5rem;
	}

	.stButton>button {
	background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
	color: white;
	border: none;
	border-radius: 25px;
	padding: 0.75rem 2rem;
	font-weight: bold;
	font-size: 1rem;
	transition: all 0.3s ease;
	margin-top: 1rem;
	}

	.stButton>button:hover {
	transform: scale(1.05);
	box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3);
	}

	.stTextInput>div>div>input {
	border-radius: 25px;
	padding: 0.9rem 1.5rem;
	border: 1px solid var(--secondary);
	background-color: var(--light);
	}

	.stTextInput>div>div>input:focus {
	border-color: var(--primary);
	box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2);
	}

	.stChatMessage {
	padding: 1.5rem;
	border-radius: 20px;
	margin-bottom: 1rem;
	max-width: 80%;
	box-shadow: 0 4px 12px rgba(0,0,0,0.05);
	}

	.stChatMessage[data-testid="user"] {
	background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%);
	margin-left: auto;
	color: #4a2b80;
	}

	.stChatMessage[data-testid="assistant"] {
	background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%);
	margin-right: auto;
	color: #4a2b80;
	border: 1px solid var(--secondary);
	}

	.upload-area {
	background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%);
	padding: 2rem;
	border-radius: 15px;
	text-align: center;
	border: 2px dashed var(--primary);
	margin-bottom: 2rem;
	}

	.chat-area {
	background: white;
	padding: 2rem;
	border-radius: 15px;
	box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1);
	height: 500px;
	overflow-y: auto;
	}

	.footer {
	text-align: center;
	color: #8a4fff;
	padding-top: 2rem;
	font-size: 0.9rem;
	margin-top: 2rem;
	border-top: 1px solid var(--secondary);
	}

	.spinner {
	color: var(--primary) !important;
	}

	.stSpinner > div > div {
	border-top-color: var(--primary) !important;
	}

	.token-input {
	background: var(--light);
	padding: 1rem;
	border-radius: 15px;
	margin-bottom: 1rem;
	}
	</style>
	""", unsafe_allow_html=True)

	# Header with gradient
	st.markdown("""
	<div class="header">
	<h1>📚 EduQuery</h1>
	<p>Smart PDF Assistant for Students</p>
	</div>
	""", unsafe_allow_html=True)

	# Initialize session state
	if "vector_store" not in st.session_state:
	st.session_state.vector_store = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []
	if "qa_chain" not in st.session_state:
	st.session_state.qa_chain = None

	# PDF Processing
	def process_pdf(pdf_file):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(pdf_file.getvalue())
	tmp_path = tmp_file.name

	loader = PyPDFLoader(tmp_path)
	pages = loader.load_and_split()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=800,
	chunk_overlap=150
	)
	chunks = text_splitter.split_documents(pages)

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vector_store = FAISS.from_documents(chunks, embeddings)

	os.unlink(tmp_path)
	return vector_store

	# Setup QA Chain
	def setup_qa_chain(vector_store, hf_token=None):
	# Use free open-source model that doesn't require authentication
	repo_id = "google/flan-t5-xxl" # Free model that doesn't require token

	try:
	if hf_token:
	llm = HuggingFaceHub(
	repo_id=repo_id,
	huggingfacehub_api_token=hf_token,
	model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
	)
	else:
	# Try without token (works for some open models)
	llm = HuggingFaceHub(
	repo_id=repo_id,
	model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
	)
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True
	)

	qa_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
	memory=memory,
	chain_type="stuff"
	)

	return qa_chain

	# Hugging Face Token Input
	st.markdown("""
	<div class="token-input">
	<h3>🔑 Hugging Face Token (Optional)</h3>
	<p>For better models like Mistral, enter your <a href="https://huggingface.co/settings/tokens" target="_blank">Hugging Face token</a></p>
	""", unsafe_allow_html=True)
	hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx")
	st.markdown("</div>", unsafe_allow_html=True)

	# File upload section
	st.markdown("""
	<div class="upload-area">
	<h3>📤 Upload Your Textbook/Notes</h3>
	""", unsafe_allow_html=True)

	uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")

	st.markdown("</div>", unsafe_allow_html=True)

	if uploaded_file:
	with st.spinner("Processing PDF..."):
	st.session_state.vector_store = process_pdf(uploaded_file)
	st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token)
	if st.session_state.qa_chain:
	st.success("PDF processed successfully! You can now ask questions.")

	# Chat interface
	st.markdown("""
	<div class="chat-area">
	<h3>💬 Ask Anything About the Document</h3>
	""", unsafe_allow_html=True)

	# Display chat history
	for message in st.session_state.chat_history:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# User input
	if prompt := st.chat_input("Your question..."):
	if not st.session_state.vector_store:
	st.warning("Please upload a PDF first")
	st.stop()

	if not st.session_state.qa_chain:
	st.error("Model not initialized. Please check your Hugging Face token or try again.")
	st.stop()

	# Add user message to chat history
	st.session_state.chat_history.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	# Get assistant response
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	try:
	response = st.session_state.qa_chain({"question": prompt})
	answer = response["answer"]
	except Exception as e:
	answer = f"Error: {str(e)}"
	st.markdown(answer)

	# Add assistant response to chat history
	st.session_state.chat_history.append({"role": "assistant", "content": answer})

	st.markdown("</div>", unsafe_allow_html=True)

	# Footer
	st.markdown("""
	<div class="footer">
	<p>EduQuery - Helping students learn smarter • Powered by Flan-T5 and LangChain</p>
	</div>
	""", unsafe_allow_html=True)