Skip to content

Commit 7da7b78

Browse files
committed
docs: Add agent integration guide - comprehensive guide for AI agents using RAGPipe for knowledge retrieval and understanding
1 parent c867b6e commit 7da7b78

1 file changed

Lines changed: 333 additions & 0 deletions

File tree

docs/agent_integration.md

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
# Agent Integration with RAGPipe
2+
3+
RAGPipe is designed to be perfect for AI agents that need to access and understand large knowledge bases. This guide shows how to integrate RAGPipe into your agent workflows.
4+
5+
## Quick Start
6+
7+
```python
8+
import ragpipe
9+
10+
# Ingest your knowledge base
11+
ragpipe.ingest(
12+
"./docs",
13+
sink="qdrant",
14+
collection="my-knowledge"
15+
)
16+
17+
# Query from your agent
18+
def ask_agent(question: str) -> str:
19+
results = ragpipe.query(
20+
question,
21+
sink="qdrant",
22+
collection="my-knowledge",
23+
top_k=3
24+
)
25+
26+
context = "\n\n".join([r.content for r in results])
27+
28+
# Send to your LLM with context
29+
prompt = f"""
30+
Context:
31+
{context}
32+
33+
Question: {question}
34+
"""
35+
36+
return llm.complete(prompt)
37+
```
38+
39+
## Use Cases for Agents
40+
41+
### 1. Codebase Understanding
42+
43+
Agents can query your entire codebase to understand context before making changes:
44+
45+
```python
46+
# Index your code
47+
ragpipe.ingest(
48+
"./src",
49+
transforms=[
50+
ragpipe.RecursiveChunker(chunk_size=512),
51+
ragpipe.AutoEmbed()
52+
],
53+
sink="qdrant"
54+
)
55+
56+
# Agent queries before making changes
57+
context = ragpipe.query("How does authentication work?", top_k=5)
58+
# Agent uses this context to understand existing patterns
59+
```
60+
61+
### 2. Documentation Assistant
62+
63+
Build agents that can answer questions about your docs:
64+
65+
```python
66+
# Ingest all documentation
67+
ragpipe.ingest(
68+
ragpipe.GitSource("https://github.com/owner/repo"),
69+
transforms=[
70+
ragpipe.HTMLCleaner(),
71+
ragpipe.RecursiveChunker(chunk_size=768),
72+
ragpipe.AutoEmbed()
73+
],
74+
sink="qdrant"
75+
)
76+
77+
# Agent answers user questions
78+
def answer_docs_question(question: str) -> str:
79+
results = ragpipe.query(question, top_k=3)
80+
81+
response = f"""
82+
Based on the documentation:
83+
84+
{''.join([f"- {r.content}\n" for r in results])}
85+
86+
In summary: {summarize(results, question)}
87+
"""
88+
89+
return response
90+
```
91+
92+
### 3. Continuous Learning
93+
94+
Agents can continuously update their knowledge base:
95+
96+
```python
97+
def on_new_data(file_path: str):
98+
# Ingest new data immediately
99+
ragpipe.ingest(
100+
file_path,
101+
sink="qdrant",
102+
collection="live-knowledge"
103+
)
104+
105+
# Agent now knows about this data
106+
print("Knowledge base updated. Agent is smarter now.")
107+
```
108+
109+
### 4. Multi-Agent Coordination
110+
111+
Multiple agents can share a knowledge base:
112+
113+
```python
114+
# Shared knowledge index
115+
ragpipe.ingest(
116+
"./shared-knowledge",
117+
sink="qdrant",
118+
collection="team-knowledge"
119+
)
120+
121+
# All agents query the same knowledge base
122+
def expert_agent(question: str, expertise: str):
123+
results = ragpipe.query(
124+
f"{question} (focus on {expertise})",
125+
sink="qdrant",
126+
collection="team-knowledge",
127+
top_k=5
128+
)
129+
130+
# Specialized agent reasoning
131+
return specialized_reasoning(results, expertise)
132+
```
133+
134+
## Agent-Friendly Features
135+
136+
### Zero Configuration
137+
138+
```python
139+
# Just works - no setup needed
140+
ragpipe.ingest("./docs")
141+
ragpipe.query("How do I deploy?")
142+
```
143+
144+
### Fast Queries
145+
146+
```python
147+
# Sub-second queries for real-time agent responses
148+
results = ragpipe.query(question, top_k=3)
149+
# Typical: 50-200ms on Qdrant with 100K docs
150+
```
151+
152+
### Multiple Embedding Models
153+
154+
```python
155+
# Use different models for different use cases
156+
157+
# Fast for chat agents
158+
ragpipe.ingest(
159+
"./docs",
160+
embed_model="sentence-transformers/all-MiniLM-L6-v2" # Fast
161+
)
162+
163+
# Accurate for code agents
164+
ragpipe.ingest(
165+
"./src",
166+
embed_model="jinaai/jina-embeddings-v2-base-code" # Code-aware
167+
)
168+
```
169+
170+
### Incremental Updates
171+
172+
```python
173+
# Agents can update knowledge incrementally
174+
ragpipe.ingest(
175+
"./new-docs",
176+
sink="qdrant",
177+
collection="my-knowledge",
178+
mode="append" # Don't re-index everything
179+
)
180+
```
181+
182+
## Integration Patterns
183+
184+
### Pattern 1: Context-Enhanced Generation
185+
186+
```python
187+
def agent_with_rag(query: str) -> str:
188+
# 1. Retrieve relevant context
189+
context = ragpipe.query(query, top_k=5)
190+
191+
# 2. Build prompt with context
192+
prompt = f"""
193+
You are an AI assistant with access to a knowledge base.
194+
195+
RELEVANT CONTEXT:
196+
{format_context(context)}
197+
198+
USER QUESTION:
199+
{query}
200+
201+
Answer based on the context above. If the context doesn't contain enough information, say so.
202+
"""
203+
204+
# 3. Generate response
205+
return llm.complete(prompt)
206+
```
207+
208+
### Pattern 2: Fact-Checking Agent
209+
210+
```python
211+
def fact_check_agent(statement: str) -> dict:
212+
# Retrieve facts
213+
facts = ragpipe.query(extract_key_terms(statement), top_k=10)
214+
215+
# Verify statement against facts
216+
verification = verify_against_facts(statement, facts)
217+
218+
return {
219+
"statement": statement,
220+
"is_factual": verification.is_accurate,
221+
"confidence": verification.confidence,
222+
"supporting_evidence": [f.content for f in verification.supporting_facts]
223+
}
224+
```
225+
226+
### Pattern 3: Exploration Agent
227+
228+
```python
229+
def exploration_agent(topic: str) -> list[str]:
230+
# Explore related topics
231+
related = ragpipe.query(topic, top_k=20)
232+
233+
# Extract key themes
234+
themes = extract_themes(related)
235+
236+
# Return exploration paths
237+
return [f"{theme} - explore this topic" for theme in themes]
238+
```
239+
240+
## Performance Tips for Agents
241+
242+
### 1. Use Efficient Chunk Sizes
243+
244+
```python
245+
# For chat agents: smaller chunks, more context
246+
ragpipe.ingest(
247+
docs,
248+
transforms=[ragpipe.RecursiveChunker(chunk_size=256)]
249+
)
250+
251+
# For code agents: larger chunks, less noise
252+
ragpipe.ingest(
253+
code,
254+
transforms=[ragpipe.RecursiveChunker(chunk_size=1024)]
255+
)
256+
```
257+
258+
### 2. Cache Common Queries
259+
260+
```python
261+
from functools import lru_cache
262+
263+
@lru_cache(maxsize=100)
264+
def cached_query(question: str) -> list:
265+
return ragpipe.query(question, top_k=5)
266+
```
267+
268+
### 3. Batch Queries
269+
270+
```python
271+
# Agent can retrieve multiple contexts at once
272+
def agent_research(questions: list[str]) -> dict[str, list]:
273+
return {q: ragpipe.query(q) for q in questions}
274+
```
275+
276+
## Full Example: Autonomous Documentation Agent
277+
278+
```python
279+
import ragpipe
280+
from some_llm_library import LLM
281+
282+
class DocumentationAgent:
283+
def __init__(self, docs_path: str):
284+
# Initialize knowledge base
285+
ragpipe.ingest(
286+
docs_path,
287+
transforms=[
288+
ragpipe.HTMLCleaner(),
289+
ragpipe.RecursiveChunker(chunk_size=768),
290+
ragpipe.AutoEmbed()
291+
],
292+
sink="qdrant"
293+
)
294+
295+
self.llm = LLM(model="gpt-4")
296+
297+
def ask(self, question: str) -> str:
298+
# Retrieve context
299+
context = ragpipe.query(question, top_k=5)
300+
301+
# Generate response
302+
prompt = self._build_prompt(question, context)
303+
response = self.llm.complete(prompt)
304+
305+
return response
306+
307+
def _build_prompt(self, question: str, context: list) -> str:
308+
return f"""
309+
You are a documentation assistant.
310+
311+
RELEVANT DOCS:
312+
{self._format_context(context)}
313+
314+
QUESTION:
315+
{question}
316+
317+
Provide a helpful answer based on the documentation above.
318+
Include code examples if relevant.
319+
"""
320+
321+
def _format_context(self, context: list) -> str:
322+
return "\n\n".join([f"- {doc.content}" for doc in context])
323+
324+
# Usage
325+
agent = DocumentationAgent("./docs")
326+
print(agent.ask("How do I deploy to production?"))
327+
```
328+
329+
## Next Steps
330+
331+
- See [Examples](../examples/) for more patterns
332+
- Check [Pipeline API](pipeline.md) for advanced usage
333+
- Review [Sources](sources.md), [Transforms](transforms.md), and [Sinks](sinks.md)

0 commit comments

Comments
 (0)