-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
98 lines (77 loc) · 3.45 KB
/
Copy pathapp.py
File metadata and controls
98 lines (77 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import glob
from dotenv import load_dotenv
# Verified Modern LangChain Imports
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import gradio as gr
# 1. Environment Configuration, should have a .env file with API Key
load_dotenv(override=True)
def ingest_data():
"""
Task 1: Data Ingestion Pipeline
Scan the 'data/' folder for markdown files and load them into Documents.
"""
file_paths = glob.glob("data/**/*.md", recursive=True)
if not file_paths:
return [Document(page_content="I am a professional twin.", metadata={"source": "none"})]
documents = []
for path in file_paths:
with open(path, "r", encoding="utf-8") as f:
documents.append(Document(page_content=f.read(), metadata={"source": path}))
return documents
def initialize_modern_rag():
"""
Task 2: RAG Orchestration Setup
"""
# A. Load and Split Documents
docs = ingest_data()
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# TODO: Milestone 1 - Implement the RecursiveCharacterTextSplitter
splitter = None
chunks = []
# B. Vector Storage
# TODO: Milestone 2 - Initialize Chroma vectorstore and create a retriever
vectorstore = None
retriever = None
# C. Language Model Initialization
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
# --- PART A: History Management ---
# TODO: Milestone 3 - Define the 'contextualize_q_prompt'
# Use MessagesPlaceholder("chat_history") and the human input.
contextualize_q_prompt = None
# TODO: Milestone 4 - Create the history_aware_retriever
history_aware_retriever = None
# --- PART B: Answer Question (Persona & Context) ---
# TODO: Milestone 5 - WRITE YOUR OWN PERSONA PROMPT
# Instructions: You MUST define who you are, how you should answer,
# and ensure the AI uses the provided {context} and {chat_history}.
qa_prompt = None
# TODO: Milestone 6 - Create the question_answer_chain (Stuff Documents)
question_answer_chain = None
# --- PART C: Final RAG Chain ---
# TODO: Milestone 7 - Combine Parts A and B into a final chain
return None
# Initialize global components
rag_chain = initialize_modern_rag()
stateful_history = []
def chat_handler(message, history):
global stateful_history
# TODO: Milestone 8 - Invoke the rag_chain with 'input' and 'chat_history'
# Use: response = rag_chain.invoke({"input": message, "chat_history": stateful_history})
response = {"answer": "I am waiting for you to complete the implementation in app.py!"}
# Update stateful history
stateful_history.append(("human", message))
stateful_history.append(("assistant", response["answer"]))
return response["answer"]
# 3. UI Implementation
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Project 2: RAG Orchestrator")
gr.ChatInterface(fn=chat_handler, type="messages")
if __name__ == "__main__":
demo.launch()