-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
112 lines (84 loc) · 3.71 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, AIMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from flask import Flask, request, render_template
from werkzeug.utils import secure_filename
import warnings
warnings.filterwarnings("ignore")
from src.run_local import initialize_llm
import os
embedding_model_path= "BAAI_bge-base-en-v1.5"
if not os.path.exists(embedding_model_path):
embedding_model = SentenceTransformer('BAAI/bge-base-en-v1.5', cache_folder=".")
else:
pass
embeddings = HuggingFaceEmbeddings(model_name= embedding_model_path,
model_kwargs = {'device':'cpu'},
encode_kwargs = {'normalize_embeddings': True})
chat_history = []
#Load the PDF File
def load_file(file_path):
loader = PyPDFLoader(file_path)
document = loader.load()
return document
# Splitting the file and store it into vector DB
def chunking_vectordb(document):
## Split documnets into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size= 1000,
chunk_overlap= 200)
text_chunk = text_splitter.split_documents(document)
#Load the Embedding Model
#Convert the Text Chunks into Embeddings and Create a FAISS Vector Store
vector_store = FAISS.from_documents(text_chunk, embeddings)
return vector_store
template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow up Input: {question}
Standalone questions: """
CONDENSE_QUESTION_PROMPT = PromptTemplate(template= template, input_variables= ["question"])
application = Flask(__name__)
app = application
## Route for a home page
@app.route("/")
def index():
return render_template("first.html")
@app.route("/start", methods= ["GET", "POST"])
def start():
if request.method == "POST":
os.makedirs("data", exist_ok=True)
file= request.files["file"]
print(file)
if file:
file_path = os.path.join("data/" + secure_filename(file.filename))
file.save(file_path)
document = load_file(file_path)
vector_store = chunking_vectordb(document)
vector_store.save_local("faiss")
return render_template("index.html")
@app.route("/get_answer", methods= ["GET", "POST"])
def get_answer():
if request.method == "POST":
user_input = request.form["question"]
llm = initialize_llm()
store = FAISS.load_local("faiss", embeddings, allow_dangerous_deserialization= True)
chain = ConversationalRetrievalChain.from_llm(llm= llm,retriever=store.as_retriever(search_kwargs={'k': 2}),
condense_question_prompt=CONDENSE_QUESTION_PROMPT,return_source_documents=True,
verbose=False)
result = chain.invoke({"question": user_input, "chat_history": chat_history})
chat_history.extend(
[
HumanMessage(content= user_input),
AIMessage(content=result["answer"])
]
)
print(f"Answer: {result['answer']}")
print(chat_history)
return render_template("index.html", results = str(result['answer']))
if __name__=="__main__":
app.run(host="0.0.0.0", port=5000, debug=True)