File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -91,7 +91,7 @@ def load_and_index_documents(
9191 chunks = []
9292 for doc in all_chunks :
9393 content = doc .page_content .strip ()
94- if content and not all (c in "=-_*#\n \r \t " for c in content ):
94+ if content and not all (c in "=-_*#| \n \r \t " for c in content ):
9595 chunks .append (content )
9696 print (f"Created { len (chunks )} chunks (filtered out empty/separator chunks)" )
9797
@@ -131,8 +131,6 @@ def load_and_index_documents(
131131 vector_store_id = vector_store .id ,
132132 )
133133
134- print ("\n =" )
135-
136134
137135if __name__ == "__main__" :
138136 load_and_index_documents ()
Original file line number Diff line number Diff line change 1414set -e # Exit on error
1515
1616source .env
17- export CONTAINER_IMAGE BASE_URL MODEL_ID EMBEDDING_MODEL
17+ export CONTAINER_IMAGE BASE_URL MODEL_ID EMBEDDING_MODEL VECTOR_STORE_NAME
1818
1919# # ============================================
2020# DOCKER BUILD
Original file line number Diff line number Diff line change @@ -40,14 +40,22 @@ def get_retriever_components(
4040 api_key = getenv ("API_KEY" ),
4141 )
4242
43- # Get the vector store ID
43+ # Get the vector store ID by name
4444 vector_store_list = client .vector_stores .list ()
45+ vector_store_id = None
4546
4647 for vs in vector_store_list .data :
4748 if vs .name == vector_store_name :
4849 print (f"Your Vector Store: { vs .id } ({ vs .name } )" )
4950 vector_store_id = vs .id
5051
52+ if not vector_store_id :
53+ available = [f"{ vs .name } ({ vs .id } )" for vs in vector_store_list .data ]
54+ raise RuntimeError (
55+ f"Vector store '{ vector_store_name } ' not found. "
56+ f"Available: { available } . Run load_documents.py first."
57+ )
58+
5159 # Cache the components
5260 _client_cache = client
5361 _vector_store_id_cache = vector_store_id
@@ -105,7 +113,7 @@ def retriever_tool(query: str) -> str:
105113 for i , chunk in enumerate (response .chunks , 1 ):
106114 # Skip chunks that are empty or just separators/whitespace
107115 content = chunk .content .strip ()
108- if not content or all (c in "=-_*#" for c in content ):
116+ if not content or all (c in "=-_*#| " for c in content ):
109117 continue
110118
111119 # Extract source from chunk metadata (Pydantic object)
You can’t perform that action at this time.
0 commit comments