Skip to content

Commit 505c054

Browse files
committed
add creating vector store when id not provided. when created update .env
1 parent 64e408b commit 505c054

3 files changed

Lines changed: 55 additions & 26 deletions

File tree

agents/community/langgraph_agentic_rag/data/load_documents.py

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
2-
Script to load documents from text files into Milvus Lite vector store.
2+
Script to load documents from text files into a vector store via LlamaStack.
33
4-
This script reads text files from the data directory, splits them into chunks,
5-
creates embeddings, and stores them in a Milvus Lite vector database.
4+
If VECTOR_STORE_ID is set, documents are added to the existing store.
5+
Otherwise a new vector store is created using VECTOR_STORE_NAME,
6+
its ID is printed and written back into the .env file.
67
"""
78

89
import uuid
10+
from pathlib import Path
911

1012
from dotenv import load_dotenv
1113
from langchain_community.document_loaders import TextLoader
@@ -18,16 +20,37 @@
1820
load_dotenv(verbose=True)
1921

2022

23+
def update_env_file(key: str, value: str):
24+
"""Update or add a key=value pair in the .env file next to this script."""
25+
env_path = Path(__file__).resolve().parent.parent / ".env" # data/ -> langgraph_agentic_rag/.env
26+
if not env_path.exists():
27+
env_path.write_text(f"{key}={value}\n")
28+
return
29+
30+
lines = env_path.read_text().splitlines()
31+
found = False
32+
for i, line in enumerate(lines):
33+
stripped = line.strip()
34+
if stripped.startswith(f"{key}=") or stripped == key:
35+
lines[i] = f"{key}={value}"
36+
found = True
37+
break
38+
if not found:
39+
lines.append(f"{key}={value}")
40+
41+
env_path.write_text("\n".join(lines) + "\n")
42+
43+
2144
def load_and_index_documents(
2245
docs_to_load: str = None,
2346
embedding_model: str = None,
2447
base_url: str = None,
2548
api_key: str = None,
26-
chunk_size: int = 512, # Increased from 64 to 512 for better context
27-
chunk_overlap: int = 128, # Increased from 32 to 128 for better overlap
49+
chunk_size: int = 512,
50+
chunk_overlap: int = 128,
2851
):
2952
"""
30-
Load documents from directory and index them in Milvus Lite.
53+
Load documents from directory and index them in a vector store.
3154
3255
Args:
3356
docs_to_load: Directory containing text files to load
@@ -54,28 +77,30 @@ def load_and_index_documents(
5477
api_key=api_key,
5578
)
5679

80+
vector_store_id = getenv("VECTOR_STORE_ID")
5781
vector_store_name = getenv("VECTOR_STORE_NAME") or "my_vector_store"
5882
provider_id = "milvus"
5983
embedding_dimension = 768
6084

61-
# Delete any existing vector stores with the same name, then create a fresh one
62-
vector_store_list = client.vector_stores.list()
63-
64-
for vs in vector_store_list.data:
65-
if vs.name == vector_store_name:
66-
print(f"Deleting existing vector store: {vs.id} ({vs.name})")
67-
client.vector_stores.delete(vector_store_id=vs.id)
68-
69-
vector_store = client.vector_stores.create(
70-
name=vector_store_name,
71-
extra_body={
72-
"provider_id": provider_id,
73-
"embedding_model": embedding_model,
74-
"embedding_dimension": embedding_dimension,
75-
},
76-
)
85+
if vector_store_id:
86+
# Use existing vector store
87+
print(f"Using existing vector store: {vector_store_id}")
88+
else:
89+
# Create a new vector store
90+
vector_store = client.vector_stores.create(
91+
name=vector_store_name,
92+
extra_body={
93+
"provider_id": provider_id,
94+
"embedding_model": embedding_model,
95+
"embedding_dimension": embedding_dimension,
96+
},
97+
)
98+
vector_store_id = vector_store.id
99+
print(f"Vector store created: id={vector_store_id} name={vector_store_name}")
77100

78-
print(f"Vector store created: {vector_store.id} ({vector_store_name})")
101+
# Persist the new ID to .env
102+
update_env_file("VECTOR_STORE_ID", vector_store_id)
103+
print(f"Updated .env with VECTOR_STORE_ID={vector_store_id}")
79104

80105
print("Loading documents from directory...")
81106
loader = TextLoader(docs_to_load)
@@ -131,9 +156,11 @@ def load_and_index_documents(
131156
print("\nLoading chunks to Vector Store...")
132157
client.vector_io.insert(
133158
chunks=formatted_chunks,
134-
vector_store_id=vector_store.id,
159+
vector_store_id=vector_store_id,
135160
)
136161

162+
print(f"Done! {len(formatted_chunks)} chunks inserted into vector store {vector_store_id}")
163+
137164

138165
if __name__ == "__main__":
139-
load_and_index_documents()
166+
load_and_index_documents()

agents/community/langgraph_agentic_rag/deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
set -e # Exit on error
1515

1616
source .env
17-
export CONTAINER_IMAGE BASE_URL MODEL_ID EMBEDDING_MODEL VECTOR_STORE_NAME
17+
export CONTAINER_IMAGE BASE_URL MODEL_ID EMBEDDING_MODEL VECTOR_STORE_NAME VECTOR_STORE_ID
1818

1919
## ============================================
2020
# DOCKER BUILD

agents/community/langgraph_agentic_rag/k8s/deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ spec:
4747
value: "${EMBEDDING_MODEL}"
4848
- name: VECTOR_STORE_NAME
4949
value: "${VECTOR_STORE_NAME}"
50+
- name: VECTOR_STORE_ID
51+
value: "${VECTOR_STORE_ID}"
5052
- name: VECTOR_STORE_PATH
5153
value: "/app/data/vector_store"
5254
volumeMounts:

0 commit comments

Comments
 (0)