shallou commited on
Commit
4d4e63a
·
verified ·
1 Parent(s): c8f9bff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ import pickle
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.llms import OpenAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
+ from langchain.callbacks import get_openai_callback
11
+ import os
12
+
13
+ load_dotenv()
14
+
15
+ def main():
16
+ st.header("LLM-powered PDF Chatbot 💬")
17
+
18
+
19
+ # upload a PDF file
20
+ pdf = st.file_uploader("Upload your PDF", type='pdf')
21
+
22
+ # st.write(pdf)
23
+ if pdf is not None:
24
+ pdf_reader = PdfReader(pdf)
25
+
26
+ text = ""
27
+ for page in pdf_reader.pages:
28
+ text += page.extract_text()
29
+
30
+ text_splitter = RecursiveCharacterTextSplitter(
31
+ chunk_size=1000,
32
+ chunk_overlap=200,
33
+ length_function=len
34
+ )
35
+ chunks = text_splitter.split_text(text=text)
36
+
37
+ # # embeddings
38
+ store_name = pdf.name[:-4]
39
+ st.write(f'{store_name}')
40
+ # st.write(chunks)
41
+
42
+ if os.path.exists(f"{store_name}.pkl"):
43
+ with open(f"{store_name}.pkl", "rb") as f:
44
+ VectorStore = pickle.load(f)
45
+ # st.write('Embeddings Loaded from the Disk')s
46
+ else:
47
+ embeddings = OpenAIEmbeddings()
48
+ VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
49
+ with open(f"{store_name}.pkl", "wb") as f:
50
+ pickle.dump(VectorStore, f)
51
+
52
+ # embeddings = OpenAIEmbeddings()
53
+ # VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
54
+
55
+ # Accept user questions/query
56
+ query = st.text_input("Ask questions about your PDF file:")
57
+ # st.write(query)
58
+
59
+ if query:
60
+ docs = VectorStore.similarity_search(query=query, k=3)
61
+
62
+ llm = OpenAI()
63
+ chain = load_qa_chain(llm=llm, chain_type="stuff")
64
+ with get_openai_callback() as cb:
65
+ response = chain.run(input_documents=docs, question=query)
66
+ print(cb)
67
+ st.write(response)
68
+
69
+ if __name__ == '__main__':
70
+ main()
71
+
72
+ def set_bg_from_url(url, opacity=1):
73
+
74
+ footer = """
75
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
76
+ <footer>
77
+ <div style='visibility: visible;margin-top:7rem;justify-content:center;display:flex;'>
78
+ <p style="font-size:1.1rem;">
79
+ Made by Mohamed Shaad
80
+ &nbsp;
81
+ <a href="https://www.linkedin.com/in/mohamedshaad">
82
+ <svg xmlns="http://www.w3.org/2000/svg" width="23" height="23" fill="white" class="bi bi-linkedin" viewBox="0 0 16 16">
83
+ <path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.474 0 16 .513 16 1.146v13.708c0 .633-.526 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854V1.146zm4.943 12.248V6.169H2.542v7.225h2.401zm-1.2-8.212c.837 0 1.358-.554 1.358-1.248-.015-.709-.52-1.248-1.342-1.248-.822 0-1.359.54-1.359 1.248 0 .694.521 1.248 1.327 1.248h.016zm4.908 8.212V9.359c0-.216.016-.432.08-.586.173-.431.568-.878 1.232-.878.869 0 1.216.662 1.216 1.634v3.865h2.401V9.25c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193v.025h-.016a5.54 5.54 0 0 1 .016-.025V6.169h-2.4c.03.678 0 7.225 0 7.225h2.4z"/>
84
+ </svg>
85
+ </a>
86
+ &nbsp;
87
+ <a href="https://github.com/shaadclt">
88
+ <svg xmlns="http://www.w3.org/2000/svg" width="23" height="23" fill="white" class="bi bi-github" viewBox="0 0 16 16">
89
+ <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
90
+ </svg>
91
+ </a>
92
+ </p>
93
+ </div>
94
+ </footer>
95
+ """
96
+ st.markdown(footer, unsafe_allow_html=True)
97
+
98
+
99
+ # Set background image using HTML and CSS
100
+ st.markdown(
101
+ f"""
102
+ <style>
103
+ body {{
104
+ background: url('{url}') no-repeat center center fixed;
105
+ background-size: cover;
106
+ opacity: {opacity};
107
+ }}
108
+ </style>
109
+ """,
110
+ unsafe_allow_html=True
111
+ )
112
+
113
+ # Set background image from URL
114
+ set_bg_from_url("https://www.1access.com/wp-content/uploads/2019/10/GettyImages-1180389186.jpg", opacity=0.875)