-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.py
99 lines (72 loc) · 3.27 KB
/
index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.tools.retriever import create_retriever_tool
from dotenv import load_dotenv
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.agents import AgentExecutor, create_tool_calling_agent
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
embeddings = SpacyEmbeddings(model_name="en_core_web_sm")
def pdf_read(pdf_doc):
text = ""
for pdf in pdf_doc:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_text(text)
return chunks
def vector_store(text_chunks):
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local("faiss_db")
def get_conversational_chain(tools,ques):
#os.environ["ANTHROPIC_API_KEY"]=os.getenv["ANTHROPIC_API_KEY"]
#llm = ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0, api_key=os.getenv("ANTHROPIC_API_KEY"),verbose=True)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, api_key="")
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a helpful assistant. Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answer""",
),
("placeholder", "{chat_history}"),
("human", "{input}"),
("placeholder", "{agent_scratchpad}"),
]
)
tool=[tools]
agent = create_tool_calling_agent(llm, tool, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tool, verbose=True)
response=agent_executor.invoke({"input": ques})
print(response)
st.write("Reply: ", response['output'])
def user_input(user_question):
new_db = FAISS.load_local("faiss_db", embeddings,allow_dangerous_deserialization=True)
retriever=new_db.as_retriever()
retrieval_chain= create_retriever_tool(retriever,"pdf_extractor","This tool is to give answer to queries from the pdf")
get_conversational_chain(retrieval_chain,user_question)
def main():
st.set_page_config("Chat PDF")
st.header("RAG based Chat with PDF")
user_question = st.text_input("Ask a Question from the PDF Files")
if user_question:
user_input(user_question)
with st.sidebar:
st.title("Menu:")
pdf_doc = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
if st.button("Submit & Process"):
with st.spinner("Processing..."):
raw_text = pdf_read(pdf_doc)
text_chunks = get_chunks(raw_text)
vector_store(text_chunks)
st.success("Done")
if __name__ == "__main__":
main()