add stream to rag agent + set it to only vector store id support

MRGuziX · MRGuziX · commit ed9568f4184a · 2026-03-06T14:26:01.000+01:00
diff --git a/agents/community/langgraph_agentic_rag/README.md b/agents/community/langgraph_agentic_rag/README.md
@@ -222,12 +222,32 @@ oc get route langgraph-agentic-rag -o jsonpath='{.spec.host}'
 
 Send a test request:
 
+/chat endpoint
+
 ```bash
 curl -X POST https://<YOUR_ROUTE_URL>/chat \
   -H "Content-Type: application/json" \
   -d '{"message": "What is LangChain?"}'
 ```
 
+/stream endpoint
+Classic Print
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "What is LangChain?"}'
+```
+
+Pretty Printed Stream
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "What is LangChain?"}' |
+   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.content // empty'
+```
+
 ## Agent-Specific Documentation
 
 ### Additional Resources
diff --git a/agents/community/langgraph_agentic_rag/data/load_documents.py b/agents/community/langgraph_agentic_rag/data/load_documents.py
@@ -2,7 +2,7 @@
 Script to load documents from text files into a vector store via LlamaStack.
 
 If VECTOR_STORE_ID is set, documents are added to the existing store.
-Otherwise a new vector store is created using VECTOR_STORE_NAME,
+Otherwise, a new vector store is created using VECTOR_STORE_NAME,
 its ID is printed and written back into the .env file.
 """
 
@@ -22,7 +22,9 @@
 
 def update_env_file(key: str, value: str):
     """Update or add a key=value pair in the .env file next to this script."""
-    env_path = Path(__file__).resolve().parent.parent / ".env"  # data/ -> langgraph_agentic_rag/.env
+    env_path = (
+        Path(__file__).resolve().parent.parent / ".env"
+    )  # data/ -> langgraph_agentic_rag/.env
     if not env_path.exists():
         env_path.write_text(f"{key}={value}\n")
         return
@@ -78,7 +80,6 @@ def load_and_index_documents(
     )
 
     vector_store_id = getenv("VECTOR_STORE_ID")
-    vector_store_name = getenv("VECTOR_STORE_NAME") or "my_vector_store"
     provider_id = "milvus"
     embedding_dimension = 768
 
@@ -88,19 +89,19 @@ def load_and_index_documents(
     else:
         # Create a new vector store
         vector_store = client.vector_stores.create(
-            name=vector_store_name,
             extra_body={
                 "provider_id": provider_id,
                 "embedding_model": embedding_model,
                 "embedding_dimension": embedding_dimension,
             },
         )
         vector_store_id = vector_store.id
-        print(f"Vector store created: id={vector_store_id} name={vector_store_name}")
+        print(f"Vector store created: id={vector_store_id} name={vector_store.name}")
 
         # Persist the new ID to .env
         update_env_file("VECTOR_STORE_ID", vector_store_id)
         print(f"Updated .env with VECTOR_STORE_ID={vector_store_id}")
+        print("NOTE!: Please use `source ./init.sh' to update the env variables.")
 
     print("Loading documents from directory...")
     loader = TextLoader(docs_to_load)
@@ -159,8 +160,10 @@ def load_and_index_documents(
         vector_store_id=vector_store_id,
     )
 
-    print(f"Done! {len(formatted_chunks)} chunks inserted into vector store {vector_store_id}")
+    print(
+        f"Done! {len(formatted_chunks)} chunks inserted into vector store {vector_store_id}"
+    )
 
 
 if __name__ == "__main__":
-    load_and_index_documents()
+    load_and_index_documents()
diff --git a/agents/community/langgraph_agentic_rag/k8s/deployment.yaml b/agents/community/langgraph_agentic_rag/k8s/deployment.yaml
@@ -25,58 +25,56 @@ spec:
         app: langgraph-agentic-rag
     spec:
       containers:
-      - name: langgraph-agentic-rag
-        image: "${CONTAINER_IMAGE}"
-        ports:
-        - containerPort: 8080
-          name: http
-        env:
-        - name: PORT
-          value: "8080"
-        # API_KEY from Secret (secure)
-        - name: API_KEY
-          valueFrom:
-            secretKeyRef:
-              name: langgraph-agentic-rag-secrets
-              key: api-key
-        - name: BASE_URL
-          value: "${BASE_URL}"
-        - name: MODEL_ID
-          value: "${MODEL_ID}"
-        - name: EMBEDDING_MODEL
-          value: "${EMBEDDING_MODEL}"
-        - name: VECTOR_STORE_NAME
-          value: "${VECTOR_STORE_NAME}"
-        - name: VECTOR_STORE_ID
-          value: "${VECTOR_STORE_ID}"
-        - name: VECTOR_STORE_PATH
-          value: "/app/data/vector_store"
-        volumeMounts:
-        - name: vector-store
-          mountPath: /app/data
-        resources:
-          requests:
-            memory: "512Mi"
-            cpu: "200m"
-          limits:
-            memory: "1Gi"
-            cpu: "1000m"
-        livenessProbe:
-          httpGet:
-            path: /health
-            port: 8080
-          initialDelaySeconds: 15
-          periodSeconds: 120
-          timeoutSeconds: 5
-          failureThreshold: 3
-        readinessProbe:
-          httpGet:
-            path: /health
-            port: 8080
-          initialDelaySeconds: 10
-          periodSeconds: 30
-          timeoutSeconds: 3
-          failureThreshold: 3
+        - name: langgraph-agentic-rag
+          image: "${CONTAINER_IMAGE}"
+          ports:
+            - containerPort: 8080
+              name: http
+          env:
+            - name: PORT
+              value: "8080"
+            # API_KEY from Secret (secure)
+            - name: API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: langgraph-agentic-rag-secrets
+                  key: api-key
+            - name: BASE_URL
+              value: "${BASE_URL}"
+            - name: MODEL_ID
+              value: "${MODEL_ID}"
+            - name: EMBEDDING_MODEL
+              value: "${EMBEDDING_MODEL}"
+            - name: VECTOR_STORE_ID
+              value: "${VECTOR_STORE_ID}"
+            - name: VECTOR_STORE_PATH
+              value: "/app/data/vector_store"
+          volumeMounts:
+            - name: vector-store
+              mountPath: /app/data
+          resources:
+            requests:
+              memory: "512Mi"
+              cpu: "200m"
+            limits:
+              memory: "1Gi"
+              cpu: "1000m"
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 15
+            periodSeconds: 120
+            timeoutSeconds: 5
+            failureThreshold: 3
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 10
+            periodSeconds: 30
+            timeoutSeconds: 3
+            failureThreshold: 3
       volumes:
-      - name: vector-store
-        emptyDir: {}
+        - name: vector-store
+          emptyDir: { }
diff --git a/agents/community/langgraph_agentic_rag/main.py b/agents/community/langgraph_agentic_rag/main.py
@@ -3,6 +3,7 @@
 from os import getenv
 
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from pydantic import BaseModel
 
@@ -141,6 +142,69 @@ async def chat(request: ChatRequest):
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """
+    Streaming chat endpoint that accepts a message and returns the agent's
+    response as Server-Sent Events (SSE).
+
+    Event types:
+        - token: streamed text token from the LLM
+        - tool_call: tool invocation by the agent
+        - tool_result: result returned by a tool
+        - done: signals the stream is complete
+
+    Args:
+        request: ChatRequest containing the user message
+    """
+    global agent_graph
+
+    if agent_graph is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def event_generator():
+        try:
+            messages = [HumanMessage(content=request.message)]
+
+            async for event in agent_graph.astream_events(
+                {"messages": messages},
+                config={"recursion_limit": 15},
+                version="v2",
+            ):
+                kind = event["event"]
+
+                # LLM streaming tokens
+                if kind == "on_chat_model_stream":
+                    chunk = event["data"]["chunk"]
+                    if chunk.content:
+                        yield f"event: token\ndata: {json.dumps({'content': chunk.content})}\n\n"
+
+                # Complete tool call (after LLM finishes generating the call)
+                elif kind == "on_chat_model_end":
+                    message = event["data"]["output"]
+                    if hasattr(message, "tool_calls") and message.tool_calls:
+                        for tc in message.tool_calls:
+                            yield f"event: tool_call\ndata: {json.dumps({'name': tc['name'], 'args': tc['args']})}\n\n"
+
+                # Tool execution results
+                elif kind == "on_tool_end":
+                    output = event["data"].get("output", "")
+                    if hasattr(output, "content"):
+                        output = output.content
+                    yield f"event: tool_result\ndata: {json.dumps({'name': event.get('name', ''), 'output': str(output)})}\n\n"
+
+            yield "event: done\ndata: {}\n\n"
+
+        except Exception as e:
+            yield f"event: error\ndata: {json.dumps({'detail': str(e)})}\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
 @app.get("/health")
 async def health():
     """Return service health and whether the agent graph has been initialized."""
diff --git a/agents/community/langgraph_agentic_rag/pyproject.toml b/agents/community/langgraph_agentic_rag/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "milvus-lite>=2.5.1",
     "python-dotenv>=1.2.1",
     "setuptools>=80.9.0,<82.0.0",
+    "ipykernel>=7.2.0",
 ]
 
 [project.optional-dependencies]
@@ -32,4 +33,4 @@ where = ["src"]
 
 [build-system]
 requires = ["setuptools>=80.9.0"]
-build-backend = "setuptools.build_meta"
+build-backend = "setuptools.build_meta"
diff --git a/agents/community/langgraph_agentic_rag/src/langgraph_agentic_rag/tools.py b/agents/community/langgraph_agentic_rag/src/langgraph_agentic_rag/tools.py
@@ -32,29 +32,20 @@ def get_retriever_components(
     # Get configuration from environment if not provided
     if not base_url:
         base_url = getenv("BASE_URL")
-    vector_store_name = getenv("VECTOR_STORE_NAME")
+    vector_store_id = getenv("VECTOR_STORE_ID")
+    if not vector_store_id:
+        raise RuntimeError(
+            "VECTOR_STORE_ID env var is not set. Run load_documents.py first."
+            "or check if you provided right ID"
+        )
 
     # Initialize LlamaStack client
     client = LlamaStackClient(
         base_url=base_url,
         api_key=getenv("API_KEY"),
     )
 
-    # Get the vector store ID by name
-    vector_store_list = client.vector_stores.list()
-    vector_store_id = None
-
-    for vs in vector_store_list.data:
-        if vs.name == vector_store_name:
-            print(f"Your Vector Store: {vs.id} ({vs.name})")
-            vector_store_id = vs.id
-
-    if not vector_store_id:
-        available = [f"{vs.name} ({vs.id})" for vs in vector_store_list.data]
-        raise RuntimeError(
-            f"Vector store '{vector_store_name}' not found. "
-            f"Available: {available}. Run load_documents.py first."
-        )
+    print(f"Using vector store: {vector_store_id}")
 
     # Cache the components
     _client_cache = client