Skip to content

Commit e024112

Browse files
committed
Merge branch 'patch-6'
2 parents 52eca33 + fb1c17c commit e024112

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed

examples/lightrag_vllm_demo.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
"""
2+
LightRAG Demo with vLLM (LLM, Embeddings, and Reranker)
3+
4+
This example demonstrates how to use LightRAG with:
5+
- vLLM-served LLM (OpenAI-compatible API)
6+
- vLLM-served embedding model
7+
- Jina-compatible reranker (also vLLM-served)
8+
9+
Prerequisites:
10+
1. Create a .env file or export environment variables:
11+
- LLM_MODEL
12+
- LLM_BINDING_HOST
13+
- LLM_BINDING_API_KEY
14+
- EMBEDDING_MODEL
15+
- EMBEDDING_BINDING_HOST
16+
- EMBEDDING_BINDING_API_KEY
17+
- EMBEDDING_DIM
18+
- EMBEDDING_TOKEN_LIMIT
19+
- RERANK_MODEL
20+
- RERANK_BINDING_HOST
21+
- RERANK_BINDING_API_KEY
22+
23+
2. Prepare a text file to index (default: Data/book-small.txt)
24+
25+
3. Configure storage backends via environment variables or modify
26+
the storage parameters in initialize_rag() below.
27+
28+
Usage:
29+
python examples/lightrag_vllm_demo.py
30+
"""
31+
32+
import os
33+
import asyncio
34+
from functools import partial
35+
from dotenv import load_dotenv
36+
37+
from lightrag import LightRAG, QueryParam
38+
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
39+
from lightrag.utils import EmbeddingFunc
40+
from lightrag.rerank import jina_rerank
41+
42+
load_dotenv()
43+
44+
# --------------------------------------------------
45+
# Constants
46+
# --------------------------------------------------
47+
48+
WORKING_DIR = "./LightRAG_Data"
49+
BOOK_FILE = "Data/book-small.txt"
50+
51+
# --------------------------------------------------
52+
# LLM function (vLLM, OpenAI-compatible)
53+
# --------------------------------------------------
54+
55+
56+
async def llm_model_func(
57+
prompt, system_prompt=None, history_messages=[], **kwargs
58+
) -> str:
59+
return await openai_complete_if_cache(
60+
model=os.getenv("LLM_MODEL", "Qwen/Qwen3-14B-AWQ"),
61+
prompt=prompt,
62+
system_prompt=system_prompt,
63+
history_messages=history_messages,
64+
base_url=os.getenv("LLM_BINDING_HOST", "http://0.0.0.0:4646/v1"),
65+
api_key=os.getenv("LLM_BINDING_API_KEY", "not_needed"),
66+
timeout=600,
67+
**kwargs,
68+
)
69+
70+
71+
# --------------------------------------------------
72+
# Embedding function (vLLM)
73+
# --------------------------------------------------
74+
75+
vLLM_emb_func = EmbeddingFunc(
76+
model_name=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"),
77+
send_dimensions=False,
78+
embedding_dim=int(os.getenv("EMBEDDING_DIM", 1024)),
79+
max_token_size=int(os.getenv("EMBEDDING_TOKEN_LIMIT", 4096)),
80+
func=partial(
81+
openai_embed.func,
82+
model=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"),
83+
base_url=os.getenv(
84+
"EMBEDDING_BINDING_HOST",
85+
"http://0.0.0.0:1234/v1",
86+
),
87+
api_key=os.getenv("EMBEDDING_BINDING_API_KEY", "not_needed"),
88+
),
89+
)
90+
91+
# --------------------------------------------------
92+
# Reranker (Jina-compatible, vLLM-served)
93+
# --------------------------------------------------
94+
95+
jina_rerank_model_func = partial(
96+
jina_rerank,
97+
model=os.getenv("RERANK_MODEL", "Qwen/Qwen3-Reranker-0.6B"),
98+
api_key=os.getenv("RERANK_BINDING_API_KEY"),
99+
base_url=os.getenv(
100+
"RERANK_BINDING_HOST",
101+
"http://0.0.0.0:3535/v1/rerank",
102+
),
103+
)
104+
105+
# --------------------------------------------------
106+
# Initialize RAG
107+
# --------------------------------------------------
108+
109+
110+
async def initialize_rag():
111+
rag = LightRAG(
112+
working_dir=WORKING_DIR,
113+
llm_model_func=llm_model_func,
114+
embedding_func=vLLM_emb_func,
115+
rerank_model_func=jina_rerank_model_func,
116+
# Storage backends (configurable via environment or modify here)
117+
kv_storage=os.getenv("KV_STORAGE", "PGKVStorage"),
118+
doc_status_storage=os.getenv("DOC_STATUS_STORAGE", "PGDocStatusStorage"),
119+
vector_storage=os.getenv("VECTOR_STORAGE", "PGVectorStorage"),
120+
graph_storage=os.getenv("GRAPH_STORAGE", "Neo4JStorage"),
121+
)
122+
123+
await rag.initialize_storages()
124+
return rag
125+
126+
127+
# --------------------------------------------------
128+
# Main
129+
# --------------------------------------------------
130+
131+
132+
async def main():
133+
rag = None
134+
try:
135+
# Validate book file exists
136+
if not os.path.exists(BOOK_FILE):
137+
raise FileNotFoundError(
138+
f"'{BOOK_FILE}' not found. Please provide a text file to index."
139+
)
140+
141+
rag = await initialize_rag()
142+
143+
# --------------------------------------------------
144+
# Data Ingestion
145+
# --------------------------------------------------
146+
print(f"Indexing {BOOK_FILE}...")
147+
with open(BOOK_FILE, "r", encoding="utf-8") as f:
148+
await rag.ainsert(f.read())
149+
print("Indexing complete.")
150+
151+
# --------------------------------------------------
152+
# Query
153+
# --------------------------------------------------
154+
query = (
155+
"What are the main themes of the book, and how do the key characters "
156+
"evolve throughout the story?"
157+
)
158+
159+
print("\nHybrid Search with Reranking:")
160+
result = await rag.aquery(
161+
query,
162+
param=QueryParam(
163+
mode="hybrid",
164+
stream=False,
165+
enable_rerank=True,
166+
),
167+
)
168+
169+
print("\nResult:\n", result)
170+
171+
except Exception as e:
172+
print(f"An error occurred: {e}")
173+
finally:
174+
if rag:
175+
await rag.finalize_storages()
176+
177+
178+
if __name__ == "__main__":
179+
asyncio.run(main())
180+
print("\nDone!")

0 commit comments

Comments
 (0)