apache
diff --git a/‎externals/kyuubi-data-agent-engine/src/main/java/org/apache/kyuubi/engine/dataagent/runtime/LlmStreamClient.java‎
Lines changed: 188 additions & 0 deletions b/‎externals/kyuubi-data-agent-engine/src/main/java/org/apache/kyuubi/engine/dataagent/runtime/LlmStreamClient.java‎
Lines changed: 188 additions & 0 deletions
diff --git a/‎externals/kyuubi-data-agent-engine/src/main/java/org/apache/kyuubi/engine/dataagent/runtime/MiddlewareDispatcher.java‎
Lines changed: 198 additions & 0 deletions b/‎externals/kyuubi-data-agent-engine/src/main/java/org/apache/kyuubi/engine/dataagent/runtime/MiddlewareDispatcher.java‎
Lines changed: 198 additions & 0 deletions
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.engine.dataagent.runtime;
+
+import com.openai.client.OpenAIClient;
+import com.openai.core.http.StreamResponse;
+import com.openai.models.chat.completions.ChatCompletionAssistantMessageParam;
+import com.openai.models.chat.completions.ChatCompletionChunk;
+import com.openai.models.chat.completions.ChatCompletionCreateParams;
+import com.openai.models.chat.completions.ChatCompletionMessageFunctionToolCall;
+import com.openai.models.chat.completions.ChatCompletionMessageParam;
+import com.openai.models.chat.completions.ChatCompletionMessageToolCall;
+import com.openai.models.chat.completions.ChatCompletionStreamOptions;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.kyuubi.engine.dataagent.runtime.event.ContentDelta;
+import org.apache.kyuubi.engine.dataagent.tool.ToolRegistry;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** Streams one chat completion call and assembles assistant content plus streamed tool calls. */
+final class LlmStreamClient {
+
+  private static final Logger LOG = LoggerFactory.getLogger(LlmStreamClient.class);
+
+  private final OpenAIClient client;
+  private final ToolRegistry toolRegistry;
+
+  LlmStreamClient(OpenAIClient client, ToolRegistry toolRegistry) {
+    this.client = client;
+    this.toolRegistry = toolRegistry;
+  }
+
+  /**
+   * Stream LLM response, emitting ContentDelta through {@code ctx} for each text chunk. Assembles
+   * tool calls directly from streamed chunks with no non-streaming fallback.
+   */
+  StreamResult stream(
+      AgentRunContext ctx, List<ChatCompletionMessageParam> messages, String effectiveModel) {
+    ChatCompletionCreateParams.Builder paramsBuilder =
+        ChatCompletionCreateParams.builder()
+            .model(effectiveModel)
+            .streamOptions(ChatCompletionStreamOptions.builder().includeUsage(true).build());
+    for (ChatCompletionMessageParam msg : messages) {
+      paramsBuilder.addMessage(msg);
+    }
+    toolRegistry.addToolsTo(paramsBuilder);
+
+    LOG.info("LLM request: model={}", effectiveModel);
+    StreamAccumulator acc = new StreamAccumulator();
+    try (StreamResponse<ChatCompletionChunk> stream =
+        client.chat().completions().createStreaming(paramsBuilder.build())) {
+      stream.stream().forEach(chunk -> consumeChunk(ctx, chunk, acc));
+    }
+    return new StreamResult(acc.content.toString(), acc.buildToolCalls());
+  }
+
+  /** Fold one streaming chunk into {@code acc}, emitting per-token {@link ContentDelta}s. */
+  private void consumeChunk(AgentRunContext ctx, ChatCompletionChunk chunk, StreamAccumulator acc) {
+    if (!acc.serverModelLogged) {
+      LOG.info("LLM response: server-echoed model={}", chunk.model());
+      acc.serverModelLogged = true;
+    }
+    chunk
+        .usage()
+        .ifPresent(u -> ctx.addTokenUsage(u.promptTokens(), u.completionTokens(), u.totalTokens()));
+
+    for (ChatCompletionChunk.Choice c : chunk.choices()) {
+      c.delta()
+          .content()
+          .ifPresent(
+              text -> {
+                acc.content.append(text);
+                ctx.emit(new ContentDelta(text));
+              });
+      c.delta().toolCalls().ifPresent(acc::mergeToolCallDeltas);
+    }
+  }
+
+  /**
+   * Mutable accumulator for a single streaming LLM turn. Tool call fields are keyed by the chunk's
+   * {@code index} because provider SDKs may deliver a single logical call across multiple chunks
+   * and only surface the {@code id}/{@code name} on the first one.
+   */
+  private static final class StreamAccumulator {
+    final StringBuilder content = new StringBuilder();
+    final Map<Integer, String> toolCallIds = new HashMap<>();
+    final Map<Integer, String> toolCallNames = new HashMap<>();
+    final Map<Integer, StringBuilder> toolCallArgs = new HashMap<>();
+    boolean serverModelLogged = false;
+
+    void mergeToolCallDeltas(List<ChatCompletionChunk.Choice.Delta.ToolCall> deltas) {
+      for (ChatCompletionChunk.Choice.Delta.ToolCall tc : deltas) {
+        int idx = (int) tc.index();
+        tc.id().ifPresent(id -> toolCallIds.put(idx, id));
+        tc.function()
+            .ifPresent(
+                fn -> {
+                  fn.name().ifPresent(name -> toolCallNames.put(idx, name));
+                  fn.arguments()
+                      .ifPresent(
+                          args ->
+                              toolCallArgs
+                                  .computeIfAbsent(idx, k -> new StringBuilder())
+                                  .append(args));
+                });
+      }
+    }
+
+    /**
+     * Materialize accumulated deltas into SDK tool-call objects. Returns {@code null} (not an empty
+     * list) if no tool calls were seen, matching the existing {@link StreamResult} contract.
+     */
+    List<ChatCompletionMessageToolCall> buildToolCalls() {
+      if (toolCallIds.isEmpty()) return null;
+      List<ChatCompletionMessageToolCall> out = new ArrayList<>(toolCallIds.size());
+      for (Map.Entry<Integer, String> e : toolCallIds.entrySet()) {
+        int idx = e.getKey();
+        String id = (e.getValue() == null || e.getValue().isEmpty()) ? synthId() : e.getValue();
+        String args = toolCallArgs.containsKey(idx) ? toolCallArgs.get(idx).toString() : "{}";
+        out.add(
+            ChatCompletionMessageToolCall.ofFunction(
+                ChatCompletionMessageFunctionToolCall.builder()
+                    .id(id)
+                    .function(
+                        ChatCompletionMessageFunctionToolCall.Function.builder()
+                            .name(toolCallNames.getOrDefault(idx, ""))
+                            .arguments(args)
+                            .build())
+                    .build()));
+      }
+      return out;
+    }
+
+    /**
+     * Synthesize an id for tool calls whose id never arrived on the stream (some OpenAI-compatible
+     * providers omit it). The id has to be stable within a turn and unique across turns so the
+     * assistant/tool_result pairing downstream holds.
+     */
+    private static String synthId() {
+      return "local_" + java.util.UUID.randomUUID().toString().replace("-", "").substring(0, 24);
+    }
+  }
+
+  /** Result of a streaming LLM call, assembled from chunks. */
+  static final class StreamResult {
+    final String content;
+    final List<ChatCompletionMessageToolCall> toolCalls;
+
+    StreamResult(String content, List<ChatCompletionMessageToolCall> toolCalls) {
+      this.content = content;
+      this.toolCalls = toolCalls;
+    }
+
+    boolean isEmpty() {
+      return content.isEmpty() && (toolCalls == null || toolCalls.isEmpty());
+    }
+
+    /** Build the SDK assistant message corresponding to this streamed result. */
+    ChatCompletionAssistantMessageParam toAssistantMessage() {
+      ChatCompletionAssistantMessageParam.Builder b = ChatCompletionAssistantMessageParam.builder();
+      if (!content.isEmpty()) {
+        b.content(content);
+      }
+      if (toolCalls != null && !toolCalls.isEmpty()) {
+        b.toolCalls(toolCalls);
+      }
+      return b.build();
+    }
+  }
+}
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.engine.dataagent.runtime;
+
+import com.openai.models.chat.completions.ChatCompletionAssistantMessageParam;
+import com.openai.models.chat.completions.ChatCompletionMessageParam;
+import java.util.List;
+import org.apache.kyuubi.engine.dataagent.runtime.event.AgentEvent;
+import org.apache.kyuubi.engine.dataagent.runtime.middleware.AgentMiddleware;
+import org.apache.kyuubi.engine.dataagent.runtime.middleware.ApprovalMiddleware;
+import org.apache.kyuubi.engine.dataagent.runtime.middleware.Decision;
+import org.apache.kyuubi.engine.dataagent.runtime.middleware.ToolInvocation;
+import org.apache.kyuubi.engine.dataagent.tool.ToolRegistry;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Composite {@link AgentMiddleware} — folds a list of middlewares into one. Hook ordering follows
+ * the onion model: {@code before*} / {@code on*Start} run first-to-last, {@code after*} / {@code
+ * on*Finish} run last-to-first.
+ *
+ * <p>Component middlewares are internal framework code. If one throws during ordinary hook
+ * dispatch, the agent run fails via {@link ReactAgent#run}; lifecycle cleanup hooks ({@link
+ * #onAgentFinish}, {@link #onSessionClose}, {@link #onStop}) swallow exceptions so later
+ * middlewares still get a chance to release state.
+ */
+final class MiddlewareDispatcher implements AgentMiddleware {
+
+  private static final Logger LOG = LoggerFactory.getLogger(MiddlewareDispatcher.class);
+
+  private final List<AgentMiddleware> middlewares;
+  private final ApprovalMiddleware approvalMiddleware;
+
+  MiddlewareDispatcher(List<AgentMiddleware> middlewares) {
+    this.middlewares = middlewares;
+    this.approvalMiddleware = findApprovalMiddleware(middlewares);
+  }
+
+  /**
+   * Resolve a pending approval request. Not part of {@link AgentMiddleware} — special accessor for
+   * the approval flow.
+   */
+  boolean resolveApproval(String requestId, boolean approved) {
+    if (approvalMiddleware == null) return false;
+    return approvalMiddleware.resolve(requestId, approved);
+  }
+
+  @Override
+  public void onRegister(ToolRegistry registry) {
+    for (AgentMiddleware mw : middlewares) {
+      mw.onRegister(registry);
+    }
+  }
+
+  @Override
+  public void onAgentStart(AgentRunContext ctx) {
+    for (AgentMiddleware mw : middlewares) {
+      mw.onAgentStart(ctx);
+    }
+  }
+
+  @Override
+  public void onAgentFinish(AgentRunContext ctx) {
+    // Runs even when the agent body threw, so swallow here to ensure every middleware's cleanup
+    // gets a chance to run; otherwise we'd leak session state in later middlewares.
+    for (int i = middlewares.size() - 1; i >= 0; i--) {
+      try {
+        middlewares.get(i).onAgentFinish(ctx);
+      } catch (Exception e) {
+        LOG.warn("Middleware onAgentFinish error", e);
+      }
+    }
+  }
+
+  @Override
+  public void onSessionClose(String sessionId) {
+    for (AgentMiddleware mw : middlewares) {
+      try {
+        mw.onSessionClose(sessionId);
+      } catch (Exception e) {
+        LOG.warn("Middleware onSessionClose error", e);
+      }
+    }
+  }
+
+  @Override
+  public void onStop() {
+    for (AgentMiddleware mw : middlewares) {
+      try {
+        mw.onStop();
+      } catch (Exception e) {
+        LOG.warn("Middleware onStop error", e);
+      }
+    }
+  }
+
+  /**
+   * Fold {@code onEvent} in onion order. Returns PROCEED if untouched, REPLACE with the final event
+   * if any middleware rewrote it, or ABORT if any short-circuited.
+   */
+  @Override
+  public Decision<AgentEvent> onEvent(AgentRunContext ctx, AgentEvent event) {
+    AgentEvent current = event;
+    for (AgentMiddleware mw : middlewares) {
+      Decision<AgentEvent> d = mw.onEvent(ctx, current);
+      if (d.kind() == Decision.Kind.ABORT) return d;
+      if (d.kind() == Decision.Kind.REPLACE) current = d.replacement();
+    }
+    return Decision.of(event, current);
+  }
+
+  /**
+   * Fold {@code beforeLlmCall} in onion order so later middlewares see rewritten messages. Returns
+   * PROCEED if untouched, REPLACE with the final value if any did, or ABORT if any short-circuited.
+   */
+  @Override
+  public Decision<List<ChatCompletionMessageParam>> beforeLlmCall(
+      AgentRunContext ctx, List<ChatCompletionMessageParam> messages) {
+    List<ChatCompletionMessageParam> current = messages;
+    for (AgentMiddleware mw : middlewares) {
+      Decision<List<ChatCompletionMessageParam>> d = mw.beforeLlmCall(ctx, current);
+      if (d.kind() == Decision.Kind.ABORT) return d;
+      if (d.kind() == Decision.Kind.REPLACE) current = d.replacement();
+    }
+    return Decision.of(messages, current);
+  }
+
+  /**
+   * Fold {@code afterLlmCall} in reverse onion order so earlier middlewares see rewritten
+   * responses. Returns the final response, or ABORT if any middleware short-circuits.
+   */
+  @Override
+  public Decision<ChatCompletionAssistantMessageParam> afterLlmCall(
+      AgentRunContext ctx, ChatCompletionAssistantMessageParam response) {
+    ChatCompletionAssistantMessageParam current = response;
+    for (int i = middlewares.size() - 1; i >= 0; i--) {
+      Decision<ChatCompletionAssistantMessageParam> d =
+          middlewares.get(i).afterLlmCall(ctx, current);
+      if (d.kind() == Decision.Kind.ABORT) return d;
+      if (d.kind() == Decision.Kind.REPLACE) current = d.replacement();
+    }
+    return Decision.of(response, current);
+  }
+
+  /**
+   * Fold {@code beforeToolCall} in onion order so later middlewares can further rewrite. Returns
+   * PROCEED if untouched, REPLACE with the final invocation otherwise, or ABORT if any middleware
+   * denies the call.
+   */
+  @Override
+  public Decision<ToolInvocation> beforeToolCall(AgentRunContext ctx, ToolInvocation call) {
+    ToolInvocation current = call;
+    for (AgentMiddleware mw : middlewares) {
+      Decision<ToolInvocation> d = mw.beforeToolCall(ctx, current);
+      if (d.kind() == Decision.Kind.ABORT) return d;
+      if (d.kind() == Decision.Kind.REPLACE) current = d.replacement();
+    }
+    return Decision.of(call, current);
+  }
+
+  /**
+   * Fold {@code afterToolCall} in reverse onion order so earlier middlewares see rewritten results.
+   * Returns the final result, or ABORT if any middleware short-circuits — caller decides how to
+   * surface the abort (typically: use {@code reason()} as the result text the LLM sees).
+   */
+  @Override
+  public Decision<String> afterToolCall(AgentRunContext ctx, ToolInvocation call, String result) {
+    String current = result;
+    for (int i = middlewares.size() - 1; i >= 0; i--) {
+      Decision<String> d = middlewares.get(i).afterToolCall(ctx, call, current);
+      if (d.kind() == Decision.Kind.ABORT) return d;
+      if (d.kind() == Decision.Kind.REPLACE) current = d.replacement();
+    }
+    return Decision.of(result, current);
+  }
+
+  private static ApprovalMiddleware findApprovalMiddleware(List<AgentMiddleware> middlewares) {
+    for (AgentMiddleware mw : middlewares) {
+      if (mw instanceof ApprovalMiddleware) return (ApprovalMiddleware) mw;
+    }
+    return null;
+  }
+}