example-langchain-rag/main.py at main · cognitai-labs-dev/example-langchain-rag · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document

# Setup API Key, see .env.example file
load_dotenv()

# ------------------------------------------------------------------
# 1. Setup the Data (The "Knowledge Base")
# ------------------------------------------------------------------
# This section prepares the data that our RAG system will search through.
# In a real app, this would come from PDFs, Web pages, databases, etc.
# For this example, we're using simple in-memory documents.

docs = [
    Document(page_content="LangChain is a framework for developing applications powered by language models."),
    Document(page_content="The core implementation of LangChain is in Python and JavaScript."),
    Document(page_content="RAG stands for Retrieval-Augmented Generation.")
]

# Create a Vector Store (Database) in memory using ChromaDB
# The vector store converts our documents into numerical embeddings (vectors)
# that can be efficiently searched based on semantic similarity.
# OpenAIEmbeddings() uses OpenAI's embedding model to create these vectors.
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=OpenAIEmbeddings()
)

# Create a Retriever from the vector store
# The retriever is responsible for searching the vector store and returning
# the most relevant documents based on a query. It acts as the "search engine"
# for our knowledge base.
retriever = vectorstore.as_retriever()

# ------------------------------------------------------------------
# 2. Define the Components
# ------------------------------------------------------------------
# This section sets up the building blocks for our RAG chain:
# 1. A document formatter
# 2. A prompt template
# 3. The language model (LLM)

# Document Formatter: A helper function to format retrieved documents
# This takes a list of Document objects and joins their content into a single string
# that will be passed to the LLM as context. The double newline creates separation
# between documents for readability.
def format_docs(input_docs: list[Document]) -> str:
    return "\n\n".join(doc.page_content for doc in input_docs)

# Prompt Template: Defines how we structure the input to the LLM
# The template includes two variables:
# - {context}: The retrieved documents relevant to the question
# - {question}: The user's actual question
# This instructs the LLM to answer based ONLY on the provided context,
# which is the key principle of RAG (prevents hallucination).
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# The Language Model (LLM): The AI that generates the final answer
# We're using OpenAI's gpt-4o-mini, which is a fast and cost-effective model.
# You can swap this for other models like gpt-4o, gpt-4, etc.
llm = ChatOpenAI(model="gpt-4o-mini")

# ------------------------------------------------------------------
# 3. Build the Chain (LCEL - LangChain Expression Language)
# ------------------------------------------------------------------
# This is where we connect all components into a single RAG pipeline.
# LCEL uses the pipe operator (|) to chain operations together.
#
# The flow works like this:
# 1. Input question comes in
# 2. The question is sent to TWO places simultaneously (the dictionary):
#    - "context": retriever finds relevant docs -> format_docs joins them
#    - "question": RunnablePassthrough() passes the question unchanged
# 3. Both context and question are inserted into the prompt template
# 4. The formatted prompt is sent to the LLM
# 5. The LLM's response is parsed into a clean string by StrOutputParser()
#
# Example flow for "What is LangChain?":
# Input -> {context: [relevant docs], question: "What is LangChain?"}
# -> Prompt with context + question -> LLM -> String answer
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# ------------------------------------------------------------------
# 4. Run the RAG System
# ------------------------------------------------------------------
# This section executes the RAG chain with a user's question.
#
# How it works:
# 1. input() prompts the user to type a question
# 2. rag_chain.invoke() runs the entire pipeline:
#    - Retrieves relevant documents from the vector store
#    - Formats them as context
#    - Creates a prompt with context + question
#    - Sends to the LLM for generation
#    - Parses the response
# 3. The final answer is printed to the console
#
# Try asking questions like:
# - "What is LangChain?"
# - "What does RAG mean?"
# - "What languages is LangChain available in?"
response = rag_chain.invoke(input("Your question: "))

print("\nAnswer:")
print(response)