Skip to content

Feature/dev w serve #1991

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a78d60f
improve the rag streaming API
emrgnt-cmplxty Feb 12, 2025
dd9373a
fix stream logic
emrgnt-cmplxty Feb 12, 2025
c0c4fe9
fix stream logic
emrgnt-cmplxty Feb 12, 2025
5e80754
fix stream logic
emrgnt-cmplxty Feb 12, 2025
a5205e4
cleanup retrieval client logic, split fn logic
emrgnt-cmplxty Feb 12, 2025
e3fb3f3
cleanup retrieval client logic, split fn logic
emrgnt-cmplxty Feb 12, 2025
d4f7b9a
add test rag stream
emrgnt-cmplxty Feb 12, 2025
ce3ceb5
up
emrgnt-cmplxty Feb 12, 2025
cf6c5d1
re-introduce hyde / rag-fusion
emrgnt-cmplxty Feb 13, 2025
4423704
up
emrgnt-cmplxty Feb 13, 2025
b092112
up
emrgnt-cmplxty Feb 13, 2025
77a0ed3
Merge pull request #1973 from SciPhi-AI/feature/add-back-hyde-rag-fusion
emrgnt-cmplxty Feb 13, 2025
3c63139
improve response formatting for agent workflows
emrgnt-cmplxty Feb 13, 2025
74625bb
add additional agent tests
emrgnt-cmplxty Feb 13, 2025
f41cef2
update serve
emrgnt-cmplxty Feb 20, 2025
df193bf
up
emrgnt-cmplxty Feb 20, 2025
473a60f
fix citations by removing alias
emrgnt-cmplxty Feb 20, 2025
27747bc
up
emrgnt-cmplxty Feb 20, 2025
f3c19a8
fix minor logic bugs
emrgnt-cmplxty Mar 6, 2025
17a09da
updates
emrgnt-cmplxty Mar 7, 2025
7e22db8
up
emrgnt-cmplxty Mar 7, 2025
8a47f2a
up
emrgnt-cmplxty Mar 7, 2025
2d4a9a2
up
emrgnt-cmplxty Mar 7, 2025
7406f08
up
emrgnt-cmplxty Mar 7, 2025
f34ed8f
up
emrgnt-cmplxty Mar 8, 2025
f53b75e
up
emrgnt-cmplxty Mar 8, 2025
7b3666c
up
emrgnt-cmplxty Mar 8, 2025
bf6c265
up
emrgnt-cmplxty Mar 8, 2025
79c1c83
up
emrgnt-cmplxty Mar 8, 2025
aade1e9
up
emrgnt-cmplxty Mar 8, 2025
1a99e00
up
emrgnt-cmplxty Mar 10, 2025
50f5bd6
up
emrgnt-cmplxty Mar 10, 2025
922135d
up
emrgnt-cmplxty Mar 11, 2025
9471c7a
update impls
emrgnt-cmplxty Mar 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 211 additions & 18 deletions js/sdk/src/v3/clients/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@ import {
} from "../../types";
import { ensureSnakeCase } from "../../utils";

function parseSseEvent(raw: { event: string; data: string }) {
// Some SSE servers send a "done" event at the end:
if (raw.event === "done") return null;

try {
const parsedJson = JSON.parse(raw.data);
return {
event: raw.event,
data: parsedJson,
};
} catch (err) {
console.error("Failed to parse SSE line:", raw.data, err);
return null;
}
}

export class RetrievalClient {
constructor(private client: r2rClient) {}

Expand Down Expand Up @@ -96,22 +112,149 @@ export class RetrievalClient {
}
}

private async streamRag(
private async *streamRag(
ragData: Record<string, any>,
): Promise<ReadableStream<Uint8Array>> {
return this.client.makeRequest<ReadableStream<Uint8Array>>(
"POST",
"retrieval/rag",
{
data: ragData,
headers: {
"Content-Type": "application/json",
): AsyncGenerator<any, void, unknown> {
// 1) Make the streaming request with responseType: "stream"
const responseStream =
await this.client.makeRequest<ReadableStream<Uint8Array>>(
"POST",
"retrieval/rag",
{
data: ragData,
headers: { "Content-Type": "application/json" },
responseType: "stream", // triggers streaming code in BaseClient
},
responseType: "stream",
},
);
);

if (!responseStream) {
throw new Error("No response stream received");
}

const reader = responseStream.getReader();
const textDecoder = new TextDecoder("utf-8");

let buffer = "";
let currentEventType = "unknown";

while (true) {
// 2) Read the next chunk
const { value, done } = await reader.read();
if (done) {
break; // end of the stream
}
// 3) Decode from bytes to text
const chunkStr = textDecoder.decode(value, { stream: true });
// 4) Append to our buffer (which might already have a partial line)
buffer += chunkStr;

// 5) Split by newline
const lines = buffer.split("\n");

// Keep the last partial line in `buffer`
buffer = lines.pop() || "";

// 6) Process each complete line
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith(":")) {
// SSE "heartbeat" or empty line
continue;
}
if (trimmed.startsWith("event:")) {
// e.g. event: final_answer
currentEventType = trimmed.slice("event:".length).trim();
} else if (trimmed.startsWith("data:")) {
// e.g. data: {"generated_answer":"DeepSeek R1 ..."}
const dataStr = trimmed.slice("data:".length).trim();
const parsedEvent = parseSseEvent({ event: currentEventType, data: dataStr });
if (parsedEvent !== null) {
yield parsedEvent;
}
}
}
}

// End of stream, if there's leftover in buffer, handle if needed
}

// // In retrieval.ts:
// private async *streamRag(
// ragData: Record<string, any>,
// ): AsyncGenerator<any, void, unknown> {
// // 1) Make the streaming request -> returns a browser ReadableStream<Uint8Array>
// const responseStream =
// await this.client.makeRequest<ReadableStream<Uint8Array>>(
// "POST",
// "retrieval/rag",
// {
// data: ragData,
// headers: { "Content-Type": "application/json" },
// responseType: "stream",
// },
// );

// if (!responseStream) {
// throw new Error("No response stream received");
// }

// // 2) Get a reader from the stream
// const reader = responseStream.getReader();
// const textDecoder = new TextDecoder("utf-8");

// let buffer = "";
// let currentEventType = "unknown";

// // 3) Read chunks until done
// while (true) {
// const { value, done } = await reader.read();
// if (done) {
// break;
// }
// // Decode the chunk into a string
// const chunkStr = textDecoder.decode(value, { stream: true });
// buffer += chunkStr;

// // 4) Split on newlines
// const lines = buffer.split("\n");
// buffer = lines.pop() || ""; // keep the partial line in the buffer

// for (const line of lines) {
// const trimmed = line.trim();
// if (!trimmed || trimmed.startsWith(":")) {
// // SSE heartbeats or blank lines
// continue;
// }
// if (trimmed.startsWith("event:")) {
// currentEventType = trimmed.slice("event:".length).trim();
// } else if (trimmed.startsWith("data:")) {
// const dataStr = trimmed.slice("data:".length).trim();
// // Attempt to parse the SSE event
// const eventObj = parseSseEvent({ event: currentEventType, data: dataStr });
// if (eventObj != null) {
// yield eventObj;
// }
// }
// }
// }
// }

// private async streamRag(
// ragData: Record<string, any>,
// ): Promise<ReadableStream<Uint8Array>> {
// return this.client.makeRequest<ReadableStream<Uint8Array>>(
// "POST",
// "retrieval/rag",
// {
// data: ragData,
// headers: {
// "Content-Type": "application/json",
// },
// responseType: "stream",
// },
// );
// }

/**
* Engage with an intelligent RAG-powered conversational agent for complex
* information retrieval and analysis.
Expand Down Expand Up @@ -214,22 +357,72 @@ export class RetrievalClient {
}
}

private async streamAgent(
private async *streamAgent(
agentData: Record<string, any>,
): Promise<ReadableStream<Uint8Array>> {
return this.client.makeRequest<ReadableStream<Uint8Array>>(
): AsyncGenerator<any, void, unknown> {
// 1) Make a streaming request to your "retrieval/agent" endpoint
// We'll get back a browser `ReadableStream<Uint8Array>` or a Node stream (depending on environment).
const responseStream = await this.client.makeRequest<ReadableStream<Uint8Array>>(
"POST",
"retrieval/agent",
{
data: agentData,
headers: {
"Content-Type": "application/json",
},
headers: { "Content-Type": "application/json" },
responseType: "stream",
},
);

if (!responseStream) {
throw new Error("No response stream received from agent endpoint");
}

// 2) Prepare to read the SSE stream line-by-line
const reader = responseStream.getReader();
const textDecoder = new TextDecoder("utf-8");

let buffer = "";
let currentEventType = "unknown";

// 3) Read chunks until the stream closes
while (true) {
const { value, done } = await reader.read();
if (done) {
break; // end of stream
}
// Convert bytes to text
const chunkStr = textDecoder.decode(value, { stream: true });
buffer += chunkStr;

// SSE messages are separated by newlines
const lines = buffer.split("\n");
// The last element might be a partial line, so re-buffer it
buffer = lines.pop() || "";

for (const line of lines) {
const trimmed = line.trim();
// Ignore empty lines or lines starting with ":"
if (!trimmed || trimmed.startsWith(":")) {
continue;
}
if (trimmed.startsWith("event:")) {
// e.g. "event: message"
currentEventType = trimmed.slice("event:".length).trim();
} else if (trimmed.startsWith("data:")) {
// e.g. "data: {...}"
const dataStr = trimmed.slice("data:".length).trim();
const parsed = parseSseEvent({ event: currentEventType, data: dataStr });
if (parsed !== null) {
yield parsed;
}
}
}
}

// If anything remains in `buffer`, handle it if needed.
// In most SSE flows, we expect the final chunk to end with a newline.
}


/**
* Generate completions for a list of messages.
*
Expand Down
8 changes: 4 additions & 4 deletions llms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ generation_config = { model = "openai/gpt-4o-mini" }

[agent]
agent_static_prompt = "rag_agent"
tools = ["local_search", "web_search"]
tools = ["search_file_knowledge", "web_search"]

[database.graph_creation_settings]
entity_types = []
Expand Down Expand Up @@ -2266,15 +2266,15 @@ The RAG agent is configured through the `r2r.toml` file. By default, it uses loc
```toml
[agent]
agent_static_prompt = "rag_agent"
tools = ["local_search"]
tools = ["search_file_knowledge"]
```

**Enable Web Search:**

```toml
[agent]
agent_static_prompt = "rag_agent"
tools = ["local_search", "web_search"]
tools = ["search_file_knowledge", "web_search"]
```

### Using the RAG Agent
Expand Down Expand Up @@ -11006,7 +11006,7 @@ default_max_collections_per_user = 10

[agent]
agent_static_prompt = "rag_agent"
tools = ["local_search"]
tools = ["search_file_knowledge"]

[agent.generation_config]
model = "openai/gpt-4o"
Expand Down
2 changes: 1 addition & 1 deletion py/all_possible_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ audio_lm = ""
[agent]
agent_static_prompt = "static_rag_agent"
agent_dynamic_prompt = "dynamic_rag_agent"
tools = ["local_search", "content"]
tools = ["search_file_knowledge", "content"]

################################################################################
# Authentication Settings (AuthConfig)
Expand Down
16 changes: 11 additions & 5 deletions py/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@


__all__ = [
"CitationEvent",
"Citation",
"R2RAgent",
"R2RStreamingAgent",
"SearchResultsCollector",
"R2RRAGAgent",
"R2RXMLToolsRAGAgent",
"R2RStreamingRAGAgent",
"R2RStreamingReasoningRAGAgent",
"R2RXMLToolsStreamingReasoningRAGAgent",
"R2RXMLToolsStreamingRAGAgent",
"AsyncSyncMeta",
"syncable",
"MessageType",
Expand All @@ -61,12 +62,12 @@
"RAGCompletion",
"Prompt",
"AggregateSearchResult",
"WebSearchResponse",
"WebSearchResult",
"GraphSearchResult",
"ChunkSearchSettings",
"GraphSearchSettings",
"ChunkSearchResult",
"WebSearchResult",
"WebPageSearchResult",
"SearchSettings",
"select_search_filters",
"SearchMode",
Expand Down Expand Up @@ -106,6 +107,11 @@
"generate_id",
"increment_version",
"validate_uuid",
"yield_sse_event",
"convert_nonserializable_objects",
"num_tokens",
"num_tokens_from_messages",
"SearchResultsCollector",
"R2RProviders",
"R2RApp",
"R2RBuilder",
Expand Down
14 changes: 6 additions & 8 deletions py/core/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
from .base import R2RAgent, R2RStreamingAgent, R2RStreamingReasoningAgent
from .base import R2RAgent, R2RStreamingAgent, R2RXMLStreamingAgent
from .rag import (
R2RRAGAgent,
R2RStreamingRAGAgent,
R2RStreamingReasoningRAGAgent,
R2RXMLToolsStreamingReasoningRAGAgent,
SearchResultsCollector,
R2RXMLToolsRAGAgent,
R2RXMLToolsStreamingRAGAgent,
)

__all__ = [
# Base
"R2RAgent",
"R2RStreamingAgent",
"R2RXMLStreamingAgent",
# RAG Agents
"SearchResultsCollector",
"R2RRAGAgent",
"R2RXMLToolsRAGAgent",
"R2RStreamingRAGAgent",
"R2RStreamingReasoningAgent",
"R2RStreamingReasoningRAGAgent",
"R2RXMLToolsStreamingReasoningRAGAgent",
"R2RXMLToolsStreamingRAGAgent",
]
Loading
Loading