Skip to content

Commit ae23d10

Browse files
Added rag dependencies
1 parent 621db72 commit ae23d10

File tree

3 files changed

+120
-8
lines changed

3 files changed

+120
-8
lines changed

ReadMe.md

+114-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# llama-cpp-agent Framework
22

33
## Introduction
4-
The llama-cpp-agent framework is a tool designed for easy interaction with Large Language Models (LLMs). Allowing users to chat with LLM models, execute structured function calls and get structured output (objects).
4+
The llama-cpp-agent framework is a tool designed for easy interaction with Large Language Models (LLMs). Allowing users to chat with LLM models, execute structured function calls, get structured output (objects) and do retrieval augmented generation.
55

66
It provides a simple yet robust interface and supports llama-cpp-python and OpenAI endpoints with GBNF grammar support (like the llama-cpp-python server) and the llama.cpp backend server.
77
It works by generating a formal GGML-BNF grammar of the user defined structures and functions, which is then used by llama.cpp to generate text valid to that grammar. In contrast to most GBNF grammar generators it also supports nested objects, dictionaries, enums and lists of them.
@@ -10,6 +10,7 @@ It works by generating a formal GGML-BNF grammar of the user defined structures
1010
- **Simple Chat Interface**: Engage in seamless conversations with LLMs.
1111
- **Structured Output**: Get structured output (objects) from LLMs.
1212
- **Single and Parallel Function Calling**: Let the LLM execute functions.
13+
- **RAG - Retrieval Augmented Generation**: Perform retrieval augmented generation with colbert reranking.
1314
- **Flexibility**: Suited for various applications from casual chatting to specific function executions.
1415

1516
## Installation
@@ -75,7 +76,6 @@ while True:
7576

7677
```python
7778
# Example that uses the FunctionCallingAgent class to create a function calling agent.
78-
# Example that uses the FunctionCallingAgent class to create a function calling agent.
7979
import datetime
8080
from enum import Enum
8181
from typing import Union, Optional
@@ -111,7 +111,7 @@ class MathOperation(Enum):
111111

112112

113113
# llama-cpp-agent also supports "Instructor" library like function definitions as Pydantic models for function calling.
114-
# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt.
114+
# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in the system prompt.
115115
class calculator(BaseModel):
116116
"""
117117
Perform a math operation on two numbers.
@@ -299,6 +299,117 @@ title='The Feynman Lectures on Physics' author='Richard Feynman, Robert B. Leigh
299299
300300
```
301301

302+
### RAG - Retrieval Augmented Generation
303+
This example shows how to do RAG with colbert reranking.
304+
```python
305+
import json
306+
307+
from ragatouille.utils import get_wikipedia_page
308+
309+
from llama_cpp_agent.messages_formatter import MessagesFormatterType
310+
311+
from typing import List
312+
313+
from pydantic import BaseModel, Field
314+
315+
from llama_cpp_agent.llm_agent import LlamaCppAgent
316+
from llama_cpp_agent.gbnf_grammar_generator.gbnf_grammar_from_pydantic_models import (
317+
generate_gbnf_grammar_and_documentation,
318+
)
319+
from llama_cpp_agent.providers.llama_cpp_endpoint_provider import (
320+
LlamaCppEndpointSettings,
321+
)
322+
from llama_cpp_agent.rag.rag_colbert_reranker import RAGColbertReranker
323+
from llama_cpp_agent.rag.text_utils import RecursiveCharacterTextSplitter
324+
325+
326+
# Initialize the chromadb vector database with a colbert reranker.
327+
rag = RAGColbertReranker(persistent=False)
328+
329+
# Initialize a recursive character text splitter with the correct chunk size of the embedding model.
330+
length_function = len
331+
splitter = RecursiveCharacterTextSplitter(
332+
separators=["\n\n", "\n", " ", ""],
333+
chunk_size=512,
334+
chunk_overlap=0,
335+
length_function=length_function,
336+
keep_separator=True
337+
)
338+
339+
# Use the ragatouille helper function to get the content of a wikipedia page.
340+
page = get_wikipedia_page("Synthetic_diamond")
341+
342+
# Split the text of the wikipedia page into chunks for the vector database.
343+
splits = splitter.split_text(page)
344+
345+
# Add the splits into the vector database
346+
for split in splits:
347+
rag.add_document(split)
348+
349+
# Define the query we want to ask based on the retrieved information
350+
query = "What is a BARS apparatus?"
351+
352+
# Define a pydantic class to represent a query extension as additional queries to the original query.
353+
class QueryExtension(BaseModel):
354+
"""
355+
Represents an extension of a query as additional queries.
356+
"""
357+
queries: List[str] = Field(default_factory=list, description="List of queries.")
358+
359+
360+
# Generate a grammar and documentation of the query extension model.
361+
grammar, docs = generate_gbnf_grammar_and_documentation([QueryExtension])
362+
363+
# Define a llamacpp server endpoint.
364+
main_model = LlamaCppEndpointSettings(completions_endpoint_url="http://127.0.0.1:8080/completion")
365+
366+
# Define a query extension agent which will extend the query with additional queries.
367+
query_extension_agent = LlamaCppAgent(
368+
main_model,
369+
debug_output=True,
370+
system_prompt="You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format. Structure your output according to the following model:\n\n" + docs.strip(),
371+
predefined_messages_formatter_type=MessagesFormatterType.MIXTRAL
372+
)
373+
374+
# Perform the query extension with the agent.
375+
output = query_extension_agent.get_chat_response(
376+
f"Consider the following query: {query}", grammar=grammar)
377+
378+
# Load the query extension in JSON format and create an instance of the query extension model.
379+
queries = QueryExtension.model_validate(json.loads(output))
380+
381+
# Define the final prompt for the query with the retrieved information
382+
prompt = "Consider the following context:\n==========Context===========\n"
383+
384+
# Retrieve the most fitting document chunks based on the original query and add them to the prompt.
385+
documents = rag.retrieve_documents(query, k=3)
386+
for doc in documents:
387+
prompt += doc["content"] + "\n\n"
388+
389+
# Retrieve the most fitting document chunks based on the extended queries and add them to the prompt.
390+
for qu in queries.queries:
391+
documents = rag.retrieve_documents(qu, k=3)
392+
for doc in documents:
393+
if doc["content"] not in prompt:
394+
prompt += doc["content"] + "\n\n"
395+
prompt += "\n======================\nQuestion: " + query
396+
397+
# Define a new agent to answer the original query based on the retrieved information.
398+
agent_with_rag_information = LlamaCppAgent(
399+
main_model,
400+
debug_output=True,
401+
system_prompt="You are an advanced AI assistant, trained by OpenAI. Only answer question based on the context information provided.",
402+
predefined_messages_formatter_type=MessagesFormatterType.MIXTRAL
403+
)
404+
405+
# Ask the agent the original query with the generated prompt that contains the retrieved information.
406+
agent_with_rag_information.get_chat_response(prompt)
407+
408+
```
409+
Example output
410+
```text
411+
BARS (Bridgman-Anvil High Pressure Reactor System) apparatus is a type of diamond-producing press used in the HPHT (High Pressure High Temperature) method for synthetic diamond growth. It consists of a ceramic cylindrical "synthesis capsule" placed in a cube of pressure-transmitting material, which is pressed by inner anvils and outer anvils. The whole assembly is locked in a disc-type barrel filled with oil, which pressurizes upon heating, and the oil pressure is transferred to the central cell. The BARS apparatus is claimed to be the most compact, efficient, and economical press design for diamond synthesis.
412+
```
302413

303414
### Knowledge Graph Creation Example
304415
This example, based on an example of the Instructor library for OpenAI,

examples/06_Rag/example_synthetic_diamonds_bars.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class QueryExtension(BaseModel):
7575
query_extension_agent = LlamaCppAgent(
7676
main_model,
7777
debug_output=True,
78-
system_prompt="You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format. Structure your output according to the following model:\n\n" + docs,
78+
system_prompt="You are a world class query extension algorithm capable of extending queries by writing new queries. Do not answer the queries, simply provide a list of additional queries in JSON format. Structure your output according to the following model:\n\n" + docs.strip(),
7979
predefined_messages_formatter_type=MessagesFormatterType.MIXTRAL
8080
)
8181

@@ -90,13 +90,13 @@ class QueryExtension(BaseModel):
9090
prompt = "Consider the following context:\n==========Context===========\n"
9191

9292
# Retrieve the most fitting document chunks based on the original query and add them to the prompt.
93-
documents = rag.retrieve_documents(query, k=2)
93+
documents = rag.retrieve_documents(query, k=3)
9494
for doc in documents:
9595
prompt += doc["content"] + "\n\n"
9696

9797
# Retrieve the most fitting document chunks based on the extended queries and add them to the prompt.
9898
for qu in queries.queries:
99-
documents = rag.retrieve_documents(qu, k=2)
99+
documents = rag.retrieve_documents(qu, k=3)
100100
for doc in documents:
101101
if doc["content"] not in prompt:
102102
prompt += doc["content"] + "\n\n"
@@ -106,7 +106,7 @@ class QueryExtension(BaseModel):
106106
agent_with_rag_information = LlamaCppAgent(
107107
main_model,
108108
debug_output=True,
109-
system_prompt="You are an advanced AI assistant, trained by OpenAI.",
109+
system_prompt="You are an advanced AI assistant, trained by OpenAI. Only answer question based on the context information provided.",
110110
predefined_messages_formatter_type=MessagesFormatterType.MIXTRAL
111111
)
112112

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "llama-cpp-agent"
7-
version = "0.0.26"
7+
version = "0.0.27"
88
description = "A framework for building LLM based AI agents with llama-cpp-python."
99

1010
readme = "ReadMe.md"
@@ -24,6 +24,7 @@ email = "[email protected]"
2424

2525
[project.optional-dependencies]
2626
agent_memory = ["chromadb", "SQLAlchemy", "numpy", "scipy"]
27+
rag = ["ragatouille"]
2728

2829
[project.urls]
2930
Homepage = "https://github.com/Maximilian-Winter/llama-cpp-agent"

0 commit comments

Comments
 (0)