Skip to content

Commit cf8091d

Browse files
authored
feat(chat): improve media handling and caching for user messages (#81)
1 parent 0518038 commit cf8091d

File tree

7 files changed

+280
-36
lines changed

7 files changed

+280
-36
lines changed

electron/gateway/manager.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ export class GatewayManager extends EventEmitter {
656656
/**
657657
* Wait for Gateway to be ready by checking if the port is accepting connections
658658
*/
659-
private async waitForReady(retries = 120, interval = 1000): Promise<void> {
659+
private async waitForReady(retries = 600, interval = 1000): Promise<void> {
660660
for (let i = 0; i < retries; i++) {
661661
// Early exit if the gateway process has already exited
662662
if (this.process && (this.process.exitCode !== null || this.process.signalCode !== null)) {

electron/main/ipc-handlers.ts

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -441,27 +441,38 @@ function registerGatewayHandlers(
441441
}) => {
442442
try {
443443
let message = params.message;
444-
const imageAttachments: Array<{ type: string; mimeType: string; fileName: string; content: string }> = [];
444+
// The Gateway processes image attachments through TWO parallel paths:
445+
// Path A: `attachments` param → parsed via `parseMessageWithAttachments` →
446+
// injected as inline vision content when the model supports images.
447+
// Format: { content: base64, mimeType: string, fileName?: string }
448+
// Path B: `[media attached: ...]` in message text → Gateway's native image
449+
// detection (`detectAndLoadPromptImages`) reads the file from disk and
450+
// injects it as inline vision content. Also works for history messages.
451+
// We use BOTH paths for maximum reliability.
452+
const imageAttachments: Array<Record<string, unknown>> = [];
445453
const fileReferences: string[] = [];
446454

447455
if (params.media && params.media.length > 0) {
448456
for (const m of params.media) {
449457
logger.info(`[chat:sendWithMedia] Processing file: ${m.fileName} (${m.mimeType}), path: ${m.filePath}, exists: ${existsSync(m.filePath)}, isVision: ${VISION_MIME_TYPES.has(m.mimeType)}`);
458+
459+
// Always add file path reference so the model can access it via tools
460+
fileReferences.push(
461+
`[media attached: ${m.filePath} (${m.mimeType}) | ${m.filePath}]`,
462+
);
463+
450464
if (VISION_MIME_TYPES.has(m.mimeType)) {
451-
// Raster image — inline as base64 vision attachment
465+
// Send as base64 attachment in the format the Gateway expects:
466+
// { content: base64String, mimeType: string, fileName?: string }
467+
// The Gateway normalizer looks for `a.content` (NOT `a.source.data`).
452468
const fileBuffer = readFileSync(m.filePath);
453-
logger.info(`[chat:sendWithMedia] Read ${fileBuffer.length} bytes, base64 length: ${fileBuffer.toString('base64').length}`);
469+
const base64Data = fileBuffer.toString('base64');
470+
logger.info(`[chat:sendWithMedia] Read ${fileBuffer.length} bytes, base64 length: ${base64Data.length}`);
454471
imageAttachments.push({
455-
type: 'image',
472+
content: base64Data,
456473
mimeType: m.mimeType,
457474
fileName: m.fileName,
458-
content: fileBuffer.toString('base64'),
459475
});
460-
} else {
461-
// Non-vision file — reference by path (same format as channel inbound media)
462-
fileReferences.push(
463-
`[media attached: ${m.filePath} (${m.mimeType}) | ${m.filePath}]`,
464-
);
465476
}
466477
}
467478
}
@@ -483,9 +494,9 @@ function registerGatewayHandlers(
483494
rpcParams.attachments = imageAttachments;
484495
}
485496

486-
logger.info(`[chat:sendWithMedia] Sending: message="${message.substring(0, 100)}", imageAttachments=${imageAttachments.length}, fileRefs=${fileReferences.length}`);
497+
logger.info(`[chat:sendWithMedia] Sending: message="${message.substring(0, 100)}", attachments=${imageAttachments.length}, fileRefs=${fileReferences.length}`);
487498

488-
// Use a longer timeout when attachments are present (120s vs default 30s)
499+
// Use a longer timeout when images are present (120s vs default 30s)
489500
const timeoutMs = imageAttachments.length > 0 ? 120000 : 30000;
490501
const result = await gatewayManager.rpc('chat.send', rpcParams, timeoutMs);
491502
logger.info(`[chat:sendWithMedia] RPC result: ${JSON.stringify(result)}`);
@@ -1557,4 +1568,26 @@ function registerFileHandlers(): void {
15571568

15581569
return { id, fileName: payload.fileName, mimeType, fileSize, stagedPath, preview };
15591570
});
1571+
1572+
// Load thumbnails for file paths on disk (used to restore previews in history)
1573+
ipcMain.handle('media:getThumbnails', async (_, paths: Array<{ filePath: string; mimeType: string }>) => {
1574+
const results: Record<string, { preview: string | null; fileSize: number }> = {};
1575+
for (const { filePath, mimeType } of paths) {
1576+
try {
1577+
if (!existsSync(filePath)) {
1578+
results[filePath] = { preview: null, fileSize: 0 };
1579+
continue;
1580+
}
1581+
const stat = statSync(filePath);
1582+
let preview: string | null = null;
1583+
if (mimeType.startsWith('image/')) {
1584+
preview = generateImagePreview(filePath, mimeType);
1585+
}
1586+
results[filePath] = { preview, fileSize: stat.size };
1587+
} catch {
1588+
results[filePath] = { preview: null, fileSize: 0 };
1589+
}
1590+
}
1591+
return results;
1592+
});
15601593
}

electron/preload/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ const electronAPI = {
115115
'log:getFilePath',
116116
'log:getDir',
117117
'log:listFiles',
118-
// File staging
118+
// File staging & media
119119
'file:stage',
120120
'file:stageBuffer',
121+
'media:getThumbnails',
121122
// Chat send with media (reads staged files in main process)
122123
'chat:sendWithMedia',
123124
// OpenClaw extras

src/pages/Chat/ChatMessage.tsx

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,35 +105,41 @@ export const ChatMessage = memo(function ChatMessage({
105105
/>
106106
)}
107107

108-
{/* Images (from assistant/channel content blocks) */}
108+
{/* Images from content blocks (Gateway session data — persists across history reloads) */}
109109
{images.length > 0 && (
110110
<div className="flex flex-wrap gap-2">
111111
{images.map((img, i) => (
112112
<img
113-
key={i}
113+
key={`content-${i}`}
114114
src={`data:${img.mimeType};base64,${img.data}`}
115115
alt="attachment"
116-
className="max-w-xs rounded-lg border"
116+
className={cn(
117+
'rounded-lg border',
118+
isUser ? 'max-w-[200px] max-h-48' : 'max-w-xs',
119+
)}
117120
/>
118121
))}
119122
</div>
120123
)}
121124

122-
{/* File attachments (user-uploaded files) */}
125+
{/* File attachments (local preview — shown before history reload) */}
126+
{/* Only show _attachedFiles images if no content-block images (avoid duplicates) */}
123127
{attachedFiles.length > 0 && (
124128
<div className="flex flex-wrap gap-2">
125-
{attachedFiles.map((file, i) => (
126-
file.mimeType.startsWith('image/') && file.preview ? (
129+
{attachedFiles.map((file, i) => {
130+
// Skip image attachments if we already have images from content blocks
131+
if (file.mimeType.startsWith('image/') && file.preview && images.length > 0) return null;
132+
return file.mimeType.startsWith('image/') && file.preview ? (
127133
<img
128-
key={i}
134+
key={`local-${i}`}
129135
src={file.preview}
130136
alt={file.fileName}
131-
className="max-w-xs max-h-48 rounded-lg border"
137+
className="max-w-[200px] max-h-48 rounded-lg border"
132138
/>
133139
) : (
134-
<FileCard key={i} file={file} />
135-
)
136-
))}
140+
<FileCard key={`local-${i}`} file={file} />
141+
);
142+
})}
137143
</div>
138144
)}
139145
</div>

src/pages/Chat/message-utils.ts

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,38 @@
55
*/
66
import type { RawMessage, ContentBlock } from '@/stores/chat';
77

8+
/**
9+
* Clean Gateway metadata from user message text for display.
10+
* Strips: [media attached: ... | ...], [message_id: ...],
11+
* and the timestamp prefix [Day Date Time Timezone].
12+
*/
13+
function cleanUserText(text: string): string {
14+
return text
15+
// Remove [media attached: path (mime) | path] references
16+
.replace(/\s*\[media attached:[^\]]*\]/g, '')
17+
// Remove [message_id: uuid]
18+
.replace(/\s*\[message_id:\s*[^\]]+\]/g, '')
19+
// Remove Gateway timestamp prefix like [Fri 2026-02-13 22:39 GMT+8]
20+
.replace(/^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+[^\]]+\]\s*/i, '')
21+
.trim();
22+
}
23+
824
/**
925
* Extract displayable text from a message's content field.
1026
* Handles both string content and array-of-blocks content.
27+
* For user messages, strips Gateway-injected metadata.
1128
*/
1229
export function extractText(message: RawMessage | unknown): string {
1330
if (!message || typeof message !== 'object') return '';
1431
const msg = message as Record<string, unknown>;
1532
const content = msg.content;
33+
const isUser = msg.role === 'user';
1634

17-
if (typeof content === 'string') {
18-
return content.trim().length > 0 ? content : '';
19-
}
35+
let result = '';
2036

21-
if (Array.isArray(content)) {
37+
if (typeof content === 'string') {
38+
result = content.trim().length > 0 ? content : '';
39+
} else if (Array.isArray(content)) {
2240
const parts: string[] = [];
2341
for (const block of content as ContentBlock[]) {
2442
if (block.type === 'text' && block.text) {
@@ -28,15 +46,18 @@ export function extractText(message: RawMessage | unknown): string {
2846
}
2947
}
3048
const combined = parts.join('\n\n');
31-
return combined.trim().length > 0 ? combined : '';
49+
result = combined.trim().length > 0 ? combined : '';
50+
} else if (typeof msg.text === 'string') {
51+
// Fallback: try .text field
52+
result = msg.text.trim().length > 0 ? msg.text : '';
3253
}
3354

34-
// Fallback: try .text field
35-
if (typeof msg.text === 'string') {
36-
return msg.text.trim().length > 0 ? msg.text : '';
55+
// Strip Gateway metadata from user messages for clean display
56+
if (isUser && result) {
57+
result = cleanUserText(result);
3758
}
3859

39-
return '';
60+
return result;
4061
}
4162

4263
/**
@@ -64,6 +85,35 @@ export function extractThinking(message: RawMessage | unknown): string | null {
6485
return combined.length > 0 ? combined : null;
6586
}
6687

88+
/**
89+
* Extract media file references from Gateway-formatted user message text.
90+
* Returns array of { filePath, mimeType } from [media attached: path (mime) | path] patterns.
91+
*/
92+
export function extractMediaRefs(message: RawMessage | unknown): Array<{ filePath: string; mimeType: string }> {
93+
if (!message || typeof message !== 'object') return [];
94+
const msg = message as Record<string, unknown>;
95+
if (msg.role !== 'user') return [];
96+
const content = msg.content;
97+
98+
let text = '';
99+
if (typeof content === 'string') {
100+
text = content;
101+
} else if (Array.isArray(content)) {
102+
text = (content as ContentBlock[])
103+
.filter(b => b.type === 'text' && b.text)
104+
.map(b => b.text!)
105+
.join('\n');
106+
}
107+
108+
const refs: Array<{ filePath: string; mimeType: string }> = [];
109+
const regex = /\[media attached:\s*([^\s(]+)\s*\(([^)]+)\)\s*\|[^\]]*\]/g;
110+
let match;
111+
while ((match = regex.exec(text)) !== null) {
112+
refs.push({ filePath: match[1], mimeType: match[2] });
113+
}
114+
return refs;
115+
}
116+
67117
/**
68118
* Extract image attachments from a message.
69119
* Returns array of { mimeType, data } for base64 images.

src/pages/Setup/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ function RuntimeContent({ onStatusChange }: RuntimeContentProps) {
519519
}
520520
return prev;
521521
});
522-
}, 120 * 1000); // 120 seconds — enough for gateway to fully initialize
522+
}, 600 * 1000); // 600 seconds — enough for gateway to fully initialize
523523

524524
return () => {
525525
if (gatewayTimeoutRef.current) {

0 commit comments

Comments
 (0)