Skip to content

Commit 1b7f04b

Browse files
Merge remote-tracking branch 'origin/master' into spring-ai/per-model-timeouts
# Conflicts: # temporal-spring-ai/build.gradle # temporal-spring-ai/src/main/java/io/temporal/springai/model/ActivityChatModel.java
2 parents d9baf99 + 9eae4a8 commit 1b7f04b

12 files changed

Lines changed: 1164 additions & 11 deletions

File tree

temporal-spring-ai/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,22 @@ public class MyTools {
114114

115115
Auto-detected and executed as Nexus operations, similar to activity stubs.
116116

117+
## Media in messages
118+
119+
If you attach media (images, audio, etc.) to a `UserMessage` or an `AssistantMessage`, prefer passing it by URI rather than raw bytes:
120+
121+
```java
122+
// Good — only the URL crosses the activity boundary.
123+
Media image = new Media(MimeTypeUtils.IMAGE_PNG, URI.create("https://cdn.example.com/pic.png"));
124+
125+
// Works, but size-limited — see below.
126+
Media image = new Media(MimeTypeUtils.IMAGE_PNG, new ByteArrayResource(bytes));
127+
```
128+
129+
Raw `byte[]` media gets serialized into every chat activity's input *and* result payload, which end up inside Temporal workflow history events. Server-side history events have a fixed 2 MiB size limit; to leave headroom for messages, tool definitions, and options, the plugin enforces a **1 MiB default cap** on inline media bytes and fails fast with an `IllegalArgumentException` pointing you at the URI alternative.
130+
131+
Override the cap by setting the system property `io.temporal.springai.maxMediaBytes` before your worker starts (pass a positive integer; `0` disables the check). For anything larger than a small thumbnail, the URI route is the right answer — have an activity write the bytes to blob storage, then pass only the URL into the conversation.
132+
117133
## Optional Integrations
118134

119135
Auto-configured when their dependencies are on the classpath:

temporal-spring-ai/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ dependencies {
4646
testImplementation "org.mockito:mockito-core:${mockitoVersion}"
4747
testImplementation 'org.springframework.boot:spring-boot-starter-test'
4848
testImplementation 'org.springframework.ai:spring-ai-rag'
49+
// Needed only so McpPluginTest can mock/reference McpSyncClient directly.
50+
testImplementation 'org.springframework.ai:spring-ai-mcp'
4951
// Needed only so tests can reference Spring AI's NonTransientAiException to
5052
// verify the plugin's default retry classification.
5153
testImplementation 'org.springframework.ai:spring-ai-retry'

temporal-spring-ai/src/main/java/io/temporal/springai/activity/ChatModelActivityImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ private ChatModelTypes.MediaContent fromMedia(Media media) {
239239
if (media.getData() instanceof String uri) {
240240
return new ChatModelTypes.MediaContent(mimeType, uri);
241241
} else if (media.getData() instanceof byte[] data) {
242+
ChatModelTypes.checkMediaSize(data);
242243
return new ChatModelTypes.MediaContent(mimeType, data);
243244
}
244245
throw new IllegalArgumentException(

temporal-spring-ai/src/main/java/io/temporal/springai/model/ActivityChatModel.java

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import javax.annotation.Nullable;
1616
import org.springframework.ai.chat.messages.*;
1717
import org.springframework.ai.chat.metadata.ChatResponseMetadata;
18+
import org.springframework.ai.chat.metadata.DefaultUsage;
19+
import org.springframework.ai.chat.metadata.RateLimit;
20+
import org.springframework.ai.chat.metadata.Usage;
1821
import org.springframework.ai.chat.model.ChatModel;
1922
import org.springframework.ai.chat.model.ChatResponse;
2023
import org.springframework.ai.chat.model.Generation;
@@ -401,6 +404,7 @@ private ChatModelTypes.MediaContent toMediaContent(Media media) {
401404
if (media.getData() instanceof String uri) {
402405
return new ChatModelTypes.MediaContent(mimeType, uri);
403406
} else if (media.getData() instanceof byte[] data) {
407+
ChatModelTypes.checkMediaSize(data);
404408
return new ChatModelTypes.MediaContent(mimeType, data);
405409
}
406410
throw new IllegalArgumentException(
@@ -415,11 +419,70 @@ private ChatResponse toResponse(ChatModelTypes.ChatModelActivityOutput output) {
415419

416420
var builder = ChatResponse.builder().generations(generations);
417421
if (output.metadata() != null) {
418-
builder.metadata(ChatResponseMetadata.builder().model(output.metadata().model()).build());
422+
builder.metadata(toResponseMetadata(output.metadata()));
419423
}
420424
return builder.build();
421425
}
422426

427+
private ChatResponseMetadata toResponseMetadata(
428+
ChatModelTypes.ChatModelActivityOutput.ChatResponseMetadata md) {
429+
ChatResponseMetadata.Builder b = ChatResponseMetadata.builder().model(md.model());
430+
Usage usage = toUsage(md.usage());
431+
if (usage != null) {
432+
b.usage(usage);
433+
}
434+
RateLimit rateLimit = toRateLimit(md.rateLimit());
435+
if (rateLimit != null) {
436+
b.rateLimit(rateLimit);
437+
}
438+
return b.build();
439+
}
440+
441+
private Usage toUsage(ChatModelTypes.ChatModelActivityOutput.ChatResponseMetadata.Usage u) {
442+
if (u == null) {
443+
return null;
444+
}
445+
return new DefaultUsage(u.promptTokens(), u.completionTokens(), u.totalTokens());
446+
}
447+
448+
private RateLimit toRateLimit(
449+
ChatModelTypes.ChatModelActivityOutput.ChatResponseMetadata.RateLimit r) {
450+
if (r == null) {
451+
return null;
452+
}
453+
return new RateLimit() {
454+
@Override
455+
public Long getRequestsLimit() {
456+
return r.requestLimit();
457+
}
458+
459+
@Override
460+
public Long getRequestsRemaining() {
461+
return r.requestRemaining();
462+
}
463+
464+
@Override
465+
public java.time.Duration getRequestsReset() {
466+
return r.requestReset();
467+
}
468+
469+
@Override
470+
public Long getTokensLimit() {
471+
return r.tokenLimit();
472+
}
473+
474+
@Override
475+
public Long getTokensRemaining() {
476+
return r.tokenRemaining();
477+
}
478+
479+
@Override
480+
public java.time.Duration getTokensReset() {
481+
return r.tokenReset();
482+
}
483+
};
484+
}
485+
423486
private AssistantMessage toAssistantMessage(ChatModelTypes.Message message) {
424487
List<AssistantMessage.ToolCall> toolCalls = List.of();
425488
if (!CollectionUtils.isEmpty(message.toolCalls())) {

temporal-spring-ai/src/main/java/io/temporal/springai/model/ChatModelTypes.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
55
import com.fasterxml.jackson.annotation.JsonInclude;
66
import com.fasterxml.jackson.annotation.JsonProperty;
7+
import io.temporal.failure.ApplicationFailure;
78
import java.time.Duration;
89
import java.util.List;
910
import javax.annotation.Nullable;
@@ -16,6 +17,50 @@
1617
*/
1718
public final class ChatModelTypes {
1819

20+
/**
21+
* Maximum size, in bytes, of a single {@link MediaContent#data()} byte array carried across the
22+
* chat activity boundary. Bytes above this threshold land inside workflow history events, which
23+
* have a fixed 2 MiB per-event limit on the Temporal server. 1 MiB leaves headroom for the rest
24+
* of a chat payload (messages, tool definitions, options).
25+
*
26+
* <p>Users who want to raise or lower the cap can set the system property {@code
27+
* io.temporal.springai.maxMediaBytes} to a positive integer before the chat activity runs; values
28+
* &lt;= 0 disable the guard entirely. For most workloads, pass media by URI instead — write the
29+
* bytes to a binary store from an activity, and pass only the URL across the conversation.
30+
*/
31+
public static final long MAX_MEDIA_BYTES_IN_HISTORY =
32+
Long.getLong("io.temporal.springai.maxMediaBytes", 1L * 1024 * 1024);
33+
34+
/** Failure type on the {@link ApplicationFailure} thrown by {@link #checkMediaSize(byte[])}. */
35+
public static final String MEDIA_SIZE_EXCEEDED_FAILURE_TYPE = "MediaSizeExceeded";
36+
37+
/**
38+
* Throws a non-retryable {@link ApplicationFailure} if {@code data} exceeds {@link
39+
* #MAX_MEDIA_BYTES_IN_HISTORY}. Non-retryable because this is a permanent, programmer-level error
40+
* — retrying the same oversized payload will never succeed, and using a plain {@link
41+
* RuntimeException} here would cause the workflow task to be retried forever (or the activity to
42+
* churn through its {@code maxAttempts}) rather than surfacing the real problem. The failure
43+
* message points the caller at the URI-based {@code Media} constructor. Pass-through otherwise.
44+
*/
45+
public static void checkMediaSize(byte[] data) {
46+
if (data == null) {
47+
return;
48+
}
49+
long limit = MAX_MEDIA_BYTES_IN_HISTORY;
50+
if (limit > 0 && data.length > limit) {
51+
throw ApplicationFailure.newNonRetryableFailure(
52+
"Media byte[] is "
53+
+ data.length
54+
+ " bytes, which exceeds the "
55+
+ limit
56+
+ "-byte limit for inline media in Temporal workflow history. Pass the media by "
57+
+ "URI instead: store the bytes outside the workflow (e.g. S3) and construct "
58+
+ "Media(mimeType, URI). Set the system property "
59+
+ "'io.temporal.springai.maxMediaBytes' to override this limit (or 0 to disable).",
60+
MEDIA_SIZE_EXCEEDED_FAILURE_TYPE);
61+
}
62+
}
63+
1964
private ChatModelTypes() {}
2065

2166
/**

temporal-spring-ai/src/main/java/io/temporal/springai/plugin/McpPlugin.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import io.temporal.worker.Worker;
77
import java.util.ArrayList;
88
import java.util.List;
9+
import java.util.Map;
910
import javax.annotation.Nonnull;
1011
import org.slf4j.Logger;
1112
import org.slf4j.LoggerFactory;
@@ -39,22 +40,22 @@ public void setApplicationContext(ApplicationContext applicationContext) throws
3940
this.applicationContext = applicationContext;
4041
}
4142

42-
@SuppressWarnings("unchecked")
4343
private List<McpSyncClient> getMcpClients() {
4444
if (!mcpClients.isEmpty()) {
4545
return mcpClients;
4646
}
47+
if (applicationContext == null) {
48+
return mcpClients;
49+
}
4750

48-
if (applicationContext != null && applicationContext.containsBean("mcpSyncClients")) {
49-
try {
50-
Object bean = applicationContext.getBean("mcpSyncClients");
51-
if (bean instanceof List<?> clientList && !clientList.isEmpty()) {
52-
mcpClients = (List<McpSyncClient>) clientList;
53-
log.info("Found {} MCP client(s) in ApplicationContext", mcpClients.size());
54-
}
55-
} catch (Exception e) {
56-
log.debug("Failed to get mcpSyncClients bean: {}", e.getMessage());
51+
try {
52+
Map<String, McpSyncClient> beans = applicationContext.getBeansOfType(McpSyncClient.class);
53+
if (!beans.isEmpty()) {
54+
mcpClients = List.copyOf(beans.values());
55+
log.info("Discovered {} MCP client bean(s): {}", beans.size(), beans.keySet());
5756
}
57+
} catch (Exception e) {
58+
log.debug("Failed to look up McpSyncClient beans: {}", e.getMessage());
5859
}
5960

6061
return mcpClients;

0 commit comments

Comments
 (0)