SciPhi-AI · emrgnt-cmplxty · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/js/sdk/src/v3/clients/retrieval.ts b/js/sdk/src/v3/clients/retrieval.ts
@@ -8,6 +8,22 @@ import {
 } from "../../types";
 import { ensureSnakeCase } from "../../utils";
 
+function parseSseEvent(raw: { event: string; data: string }) {
+  // Some SSE servers send a "done" event at the end:
+  if (raw.event === "done") return null;
+
+  try {
+    const parsedJson = JSON.parse(raw.data);
+    return {
+      event: raw.event,
+      data: parsedJson,
+    };
+  } catch (err) {
+    console.error("Failed to parse SSE line:", raw.data, err);
+    return null;
+  }
+}
+
 export class RetrievalClient {
   constructor(private client: r2rClient) {}
 
@@ -96,22 +112,149 @@ export class RetrievalClient {
     }
   }
 
-  private async streamRag(
+  private async *streamRag(
     ragData: Record<string, any>,
-  ): Promise<ReadableStream<Uint8Array>> {
-    return this.client.makeRequest<ReadableStream<Uint8Array>>(
-      "POST",
-      "retrieval/rag",
-      {
-        data: ragData,
-        headers: {
-          "Content-Type": "application/json",
+  ): AsyncGenerator<any, void, unknown> {
+    // 1) Make the streaming request with responseType: "stream"
+    const responseStream =
+      await this.client.makeRequest<ReadableStream<Uint8Array>>(
+        "POST",
+        "retrieval/rag",
+        {
+          data: ragData,
+          headers: { "Content-Type": "application/json" },
+          responseType: "stream", // triggers streaming code in BaseClient
         },
-        responseType: "stream",
-      },
-    );
+      );
+
+    if (!responseStream) {
+      throw new Error("No response stream received");
+    }
+
+    const reader = responseStream.getReader();
+    const textDecoder = new TextDecoder("utf-8");
+
+    let buffer = "";
+    let currentEventType = "unknown";
+
+    while (true) {
+      // 2) Read the next chunk
+      const { value, done } = await reader.read();
+      if (done) {
+        break; // end of the stream
+      }
+      // 3) Decode from bytes to text
+      const chunkStr = textDecoder.decode(value, { stream: true });
+      // 4) Append to our buffer (which might already have a partial line)
+      buffer += chunkStr;
+
+      // 5) Split by newline
+      const lines = buffer.split("\n");
+
+      // Keep the last partial line in `buffer`
+      buffer = lines.pop() || "";
+
+      // 6) Process each complete line
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith(":")) {
+          // SSE "heartbeat" or empty line
+          continue;
+        }
+        if (trimmed.startsWith("event:")) {
+          // e.g. event: final_answer
+          currentEventType = trimmed.slice("event:".length).trim();
+        } else if (trimmed.startsWith("data:")) {
+          // e.g. data: {"generated_answer":"DeepSeek R1 ..."}
+          const dataStr = trimmed.slice("data:".length).trim();
+          const parsedEvent = parseSseEvent({ event: currentEventType, data: dataStr });
+          if (parsedEvent !== null) {
+            yield parsedEvent;
+          }
+        }
+      }
+    }
+
+    // End of stream, if there's leftover in buffer, handle if needed
   }
 
+  //   // In retrieval.ts:
+  // private async *streamRag(
+  //   ragData: Record<string, any>,
+  // ): AsyncGenerator<any, void, unknown> {
+  //   // 1) Make the streaming request -> returns a browser ReadableStream<Uint8Array>
+  //   const responseStream =
+  //     await this.client.makeRequest<ReadableStream<Uint8Array>>(
+  //       "POST",
+  //       "retrieval/rag",
+  //       {
+  //         data: ragData,
+  //         headers: { "Content-Type": "application/json" },
+  //         responseType: "stream",
+  //       },
+  //     );
+
+  //   if (!responseStream) {
+  //     throw new Error("No response stream received");
+  //   }
+
+  //   // 2) Get a reader from the stream
+  //   const reader = responseStream.getReader();
+  //   const textDecoder = new TextDecoder("utf-8");
+
+  //   let buffer = "";
+  //   let currentEventType = "unknown";
+
+  //   // 3) Read chunks until done
+  //   while (true) {
+  //     const { value, done } = await reader.read();
+  //     if (done) {
+  //       break;
+  //     }
+  //     // Decode the chunk into a string
+  //     const chunkStr = textDecoder.decode(value, { stream: true });
+  //     buffer += chunkStr;
+
+  //     // 4) Split on newlines
+  //     const lines = buffer.split("\n");
+  //     buffer = lines.pop() || ""; // keep the partial line in the buffer
+
+  //     for (const line of lines) {
+  //       const trimmed = line.trim();
+  //       if (!trimmed || trimmed.startsWith(":")) {
+  //         // SSE heartbeats or blank lines
+  //         continue;
+  //       }
+  //       if (trimmed.startsWith("event:")) {
+  //         currentEventType = trimmed.slice("event:".length).trim();
+  //       } else if (trimmed.startsWith("data:")) {
+  //         const dataStr = trimmed.slice("data:".length).trim();
+  //         // Attempt to parse the SSE event
+  //         const eventObj = parseSseEvent({ event: currentEventType, data: dataStr });
+  //         if (eventObj != null) {
+  //           yield eventObj;
+  //         }
+  //       }
+  //     }
+  //   }
+  // }
+
+  // private async streamRag(
+  //   ragData: Record<string, any>,
+  // ): Promise<ReadableStream<Uint8Array>> {
+  //   return this.client.makeRequest<ReadableStream<Uint8Array>>(
+  //     "POST",
+  //     "retrieval/rag",
+  //     {
+  //       data: ragData,
+  //       headers: {
+  //         "Content-Type": "application/json",
+  //       },
+  //       responseType: "stream",
+  //     },
+  //   );
+  // }
+
   /**
    * Engage with an intelligent RAG-powered conversational agent for complex
    * information retrieval and analysis.
@@ -214,22 +357,72 @@ export class RetrievalClient {
     }
   }
 
-  private async streamAgent(
+  private async *streamAgent(
     agentData: Record<string, any>,
-  ): Promise<ReadableStream<Uint8Array>> {
-    return this.client.makeRequest<ReadableStream<Uint8Array>>(
+  ): AsyncGenerator<any, void, unknown> {
+    // 1) Make a streaming request to your "retrieval/agent" endpoint
+    //    We'll get back a browser `ReadableStream<Uint8Array>` or a Node stream (depending on environment).
+    const responseStream = await this.client.makeRequest<ReadableStream<Uint8Array>>(
       "POST",
       "retrieval/agent",
       {
         data: agentData,
-        headers: {
-          "Content-Type": "application/json",
-        },
+        headers: { "Content-Type": "application/json" },
         responseType: "stream",
       },
     );
+
+    if (!responseStream) {
+      throw new Error("No response stream received from agent endpoint");
+    }
+
+    // 2) Prepare to read the SSE stream line-by-line
+    const reader = responseStream.getReader();
+    const textDecoder = new TextDecoder("utf-8");
+
+    let buffer = "";
+    let currentEventType = "unknown";
+
+    // 3) Read chunks until the stream closes
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) {
+        break; // end of stream
+      }
+      // Convert bytes to text
+      const chunkStr = textDecoder.decode(value, { stream: true });
+      buffer += chunkStr;
+
+      // SSE messages are separated by newlines
+      const lines = buffer.split("\n");
+      // The last element might be a partial line, so re-buffer it
+      buffer = lines.pop() || "";
+
+      for (const line of lines) {
+        const trimmed = line.trim();
+        // Ignore empty lines or lines starting with ":"
+        if (!trimmed || trimmed.startsWith(":")) {
+          continue;
+        }
+        if (trimmed.startsWith("event:")) {
+          // e.g. "event: message"
+          currentEventType = trimmed.slice("event:".length).trim();
+        } else if (trimmed.startsWith("data:")) {
+          // e.g. "data: {...}"
+          const dataStr = trimmed.slice("data:".length).trim();
+          const parsed = parseSseEvent({ event: currentEventType, data: dataStr });
+          if (parsed !== null) {
+            yield parsed;
+          }
+        }
+      }
+    }
+
+    // If anything remains in `buffer`, handle it if needed.
+    // In most SSE flows, we expect the final chunk to end with a newline.
   }
 
+
   /**
    * Generate completions for a list of messages.
    *

diff --git a/llms.txt b/llms.txt
@@ -763,7 +763,7 @@ generation_config = { model = "openai/gpt-4o-mini" }
 
 [agent]
 agent_static_prompt = "rag_agent"
-tools = ["local_search", "web_search"]
+tools = ["search_file_knowledge", "web_search"]
 
 [database.graph_creation_settings]
 entity_types = []
@@ -2266,15 +2266,15 @@ The RAG agent is configured through the `r2r.toml` file. By default, it uses loc
 ```toml
 [agent]
 agent_static_prompt = "rag_agent"
-tools = ["local_search"]
+tools = ["search_file_knowledge"]
 ```
 
 **Enable Web Search:**
 
 ```toml
 [agent]
 agent_static_prompt = "rag_agent"
-tools = ["local_search", "web_search"]
+tools = ["search_file_knowledge", "web_search"]
 ```
 
 ### Using the RAG Agent
@@ -11006,7 +11006,7 @@ default_max_collections_per_user = 10
 
 [agent]
 agent_static_prompt = "rag_agent"
-tools = ["local_search"]
+tools = ["search_file_knowledge"]
 
   [agent.generation_config]
   model = "openai/gpt-4o"

diff --git a/py/all_possible_config.toml b/py/all_possible_config.toml
@@ -56,7 +56,7 @@ audio_lm = ""
 [agent]
 agent_static_prompt = "static_rag_agent"
 agent_dynamic_prompt = "dynamic_rag_agent"
-tools = ["local_search", "content"]
+tools = ["search_file_knowledge", "content"]
 
 ################################################################################
 # Authentication Settings (AuthConfig)

diff --git a/py/core/__init__.py b/py/core/__init__.py
@@ -31,13 +31,14 @@
 
 
 __all__ = [
+    "CitationEvent",
+    "Citation",
     "R2RAgent",
-    "R2RStreamingAgent",
     "SearchResultsCollector",
     "R2RRAGAgent",
+    "R2RXMLToolsRAGAgent",
     "R2RStreamingRAGAgent",
-    "R2RStreamingReasoningRAGAgent",
-    "R2RXMLToolsStreamingReasoningRAGAgent",
+    "R2RXMLToolsStreamingRAGAgent",
     "AsyncSyncMeta",
     "syncable",
     "MessageType",
@@ -61,12 +62,12 @@
     "RAGCompletion",
     "Prompt",
     "AggregateSearchResult",
-    "WebSearchResponse",
+    "WebSearchResult",
     "GraphSearchResult",
     "ChunkSearchSettings",
     "GraphSearchSettings",
     "ChunkSearchResult",
-    "WebSearchResult",
+    "WebPageSearchResult",
     "SearchSettings",
     "select_search_filters",
     "SearchMode",
@@ -106,6 +107,11 @@
     "generate_id",
     "increment_version",
     "validate_uuid",
+    "yield_sse_event",
+    "convert_nonserializable_objects",
+    "num_tokens",
+    "num_tokens_from_messages",
+    "SearchResultsCollector",
     "R2RProviders",
     "R2RApp",
     "R2RBuilder",

diff --git a/py/core/agent/__init__.py b/py/core/agent/__init__.py
@@ -1,21 +1,19 @@
-from .base import R2RAgent, R2RStreamingAgent, R2RStreamingReasoningAgent
+from .base import R2RAgent, R2RStreamingAgent, R2RXMLStreamingAgent
 from .rag import (
     R2RRAGAgent,
     R2RStreamingRAGAgent,
-    R2RStreamingReasoningRAGAgent,
-    R2RXMLToolsStreamingReasoningRAGAgent,
-    SearchResultsCollector,
+    R2RXMLToolsRAGAgent,
+    R2RXMLToolsStreamingRAGAgent,
 )
 
 __all__ = [
     # Base
     "R2RAgent",
     "R2RStreamingAgent",
+    "R2RXMLStreamingAgent",
     # RAG Agents
-    "SearchResultsCollector",
     "R2RRAGAgent",
+    "R2RXMLToolsRAGAgent",
     "R2RStreamingRAGAgent",
-    "R2RStreamingReasoningAgent",
-    "R2RStreamingReasoningRAGAgent",
-    "R2RXMLToolsStreamingReasoningRAGAgent",
+    "R2RXMLToolsStreamingRAGAgent",
 ]