Skip to content

Add initial support for images in the ai chat #15410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion packages/ai-anthropic/src/node/anthropic-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import {
LanguageModelTextResponse,
TokenUsageService,
TokenUsageParams,
UserRequest
UserRequest,
LLMImageData
} from '@theia/ai-core';
import { CancellationToken, isArray } from '@theia/core';
import { Anthropic } from '@anthropic-ai/sdk';
Expand All @@ -48,6 +49,10 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont
return [{ id: message.id, input: message.input, name: message.name, type: 'tool_use' }];
} else if (LanguageModelMessage.isToolResultMessage(message)) {
return [{ type: 'tool_result', tool_use_id: message.tool_use_id }];
} else if (LanguageModelMessage.isImageMessage(message)) {
if (LLMImageData.isBase64ImageData(message.image)) {
return [{ type: 'image', source: { type: 'base64', media_type: message.image.mediaType, data: message.image.imageData } }];
}
}
throw new Error(`Unknown message type:'${JSON.stringify(message)}'`);
};
Expand Down
53 changes: 53 additions & 0 deletions packages/ai-chat-ui/src/browser/ImagePreview.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// *****************************************************************************
// Copyright (C) 2025 EclipseSource GmbH.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// http://www.eclipse.org/legal/epl-2.0.
//
// This Source Code may also be made available under the following Secondary
// Licenses when the conditions for such availability set forth in the Eclipse
// Public License v. 2.0 are satisfied: GNU General Public License, version 2
// with the GNU Classpath Exception which is available at
// https://www.gnu.org/software/classpath/license.html.
//
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0
// *****************************************************************************
import { nls } from '@theia/core';
import * as React from '@theia/core/shared/react';

// Interface for pasted image data
export interface PastedImage {
id: string;
data: string;
name: string;
type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
}

// Image Preview Component
interface ImagePreviewProps {
images: PastedImage[];
onRemove: (id: string) => void;
}
export const ImagePreview: React.FC<ImagePreviewProps> = ({ images, onRemove }) => {
if (images.length === 0) { return undefined; }

return (
<div className='theia-ChatInput-ImagePreview'>
{images.map(img => (
<div key={img.id} className='theia-ChatInput-ImagePreview-Item'>
<img src={`data:${img.type};base64,${img.data}`} alt={img.name} />
<div className='theia-ChatInput-ImagePreview-Actions'>
<span
className='codicon codicon-close action'
title={nls.localizeByDefault('Remove')}
onClick={e => {
e.stopPropagation();
onRemove(img.id);
}} />
</div>
</div>
))}
</div>
);
};
146 changes: 126 additions & 20 deletions packages/ai-chat-ui/src/browser/chat-input-widget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,24 @@ import { IMouseEvent } from '@theia/monaco-editor-core';
import { MonacoEditor } from '@theia/monaco/lib/browser/monaco-editor';
import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider';
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
import { AIVariableResolutionRequest } from '@theia/ai-core';
import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core';
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
import { ContextVariablePicker } from './context-variable-picker';
import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service';
import { ImagePreview, PastedImage } from './ImagePreview';

type Query = (query: string) => Promise<void>;
type Unpin = () => void;
type Cancel = (requestModel: ChatRequestModel) => void;
type DeleteChangeSet = (requestModel: ChatRequestModel) => void;
type DeleteChangeSetElement = (requestModel: ChatRequestModel, index: number) => void;

// Interface for the payload submitted to the AI
// interface ChatPayload {
// text: string;
// images?: PastedImage[];
// }

export const AIChatInputConfiguration = Symbol('AIChatInputConfiguration');
export interface AIChatInputConfiguration {
showContext?: boolean;
Expand Down Expand Up @@ -114,13 +121,54 @@ export class AIChatInputWidget extends ReactWidget {
this.update();
}

// State for pasted images
private _pastedImages: PastedImage[] = [];
public get pastedImages(): PastedImage[] {
return this._pastedImages;
}

@postConstruct()
protected init(): void {
this.id = AIChatInputWidget.ID;
this.title.closable = false;
this.update();
}

// Process a file blob into an image
private processImageFromClipboard(blob: File): void {
const reader = new FileReader();
reader.onload = e => {
if (!e.target?.result) { return; }

const imageId = `img-${Date.now()}`;
const dataUrl = e.target.result as string;

// Extract the base64 data by removing the data URL prefix
// Format is like: data:image/png;base64,BASE64DATA
const imageData = dataUrl.substring(dataUrl.indexOf(',') + 1);

// Add image to state
const newImage: PastedImage = {
id: imageId,
data: imageData, // Store just the base64 data without the prefix
name: blob.name || `pasted-image-${Date.now()}.png`,
type: blob.type as PastedImage['type']
};

this._pastedImages = [...this._pastedImages, newImage];

this.update();
};

reader.readAsDataURL(blob);
}

// Remove an image by id
public removeImage(id: string): void {
this._pastedImages = this._pastedImages.filter(img => img.id !== id);
this.update();
}

protected override onActivateRequest(msg: Message): void {
super.onActivateRequest(msg);
this.editorReady.promise.then(() => {
Expand Down Expand Up @@ -157,6 +205,9 @@ export class AIChatInputWidget extends ReactWidget {
showPinnedAgent={this.configuration?.showPinnedAgent}
labelProvider={this.labelProvider}
actionService={this.changeSetActionService}
pastedImages={this._pastedImages}
onRemoveImage={this.removeImage.bind(this)}
onImagePasted={this.processImageFromClipboard.bind(this)}
/>
);
}
Expand Down Expand Up @@ -229,7 +280,7 @@ export class AIChatInputWidget extends ReactWidget {

interface ChatInputProperties {
onCancel: (requestModel: ChatRequestModel) => void;
onQuery: (query: string) => void;
onQuery: (query?: string, images?: LLMImageData[]) => void;
onUnpin: () => void;
onDragOver: (event: React.DragEvent) => void;
onDrop: (event: React.DragEvent) => void;
Expand All @@ -249,6 +300,9 @@ interface ChatInputProperties {
showPinnedAgent?: boolean;
labelProvider: LabelProvider;
actionService: ChangeSetActionService;
pastedImages: PastedImage[];
onRemoveImage: (id: string) => void;
onImagePasted: (blob: File) => void;
}

const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInputProperties) => {
Expand All @@ -274,6 +328,38 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
// eslint-disable-next-line no-null/no-null
const placeholderRef = React.useRef<HTMLDivElement | null>(null);
const editorRef = React.useRef<MonacoEditor | undefined>(undefined);
// eslint-disable-next-line no-null/no-null
const containerRef = React.useRef<HTMLDivElement>(null);

// Handle paste events on the container
const handlePaste = React.useCallback((e: ClipboardEvent) => {
if (!e.clipboardData?.items) { return; }

for (const item of e.clipboardData.items) {
if (item.type.startsWith('image/')) {
const blob = item.getAsFile();
if (blob) {
e.preventDefault();
e.stopPropagation();
props.onImagePasted(blob);
break;
}
}
}
}, [props.onImagePasted]);

// Set up paste handler on the container div
React.useEffect(() => {
const container = containerRef.current;
if (container) {
container.addEventListener('paste', handlePaste, true);

return () => {
container.removeEventListener('paste', handlePaste, true);
};
}
return undefined;
}, [handlePaste]);

React.useEffect(() => {
const uri = new URI(`ai-chat:/input.${CHAT_VIEW_LANGUAGE_EXTENSION}`);
Expand Down Expand Up @@ -397,7 +483,7 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
responseListenerRef.current?.dispose();
responseListenerRef.current = undefined;
};
}, [props.chatModel]);
}, [props.chatModel, props.actionService, props.labelProvider]);

React.useEffect(() => {
const disposable = props.actionService.onDidChange(() => {
Expand All @@ -406,18 +492,28 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
setChangeSetUI(current => !current ? current : { ...current, actions: newActions });
});
return () => disposable.dispose();
});
}, [props.actionService, props.chatModel.changeSet]);

// // Extract image references from text
// const extractImageReferences = (text: string): string[] => {
// const regex = /!\[.*?\]\((img-\d+)\)/g;
// const matches = [...text.matchAll(regex)];
// return matches.map(match => match[1]);
// };

const submit = React.useCallback(function submit(value: string): void {
if (!value || value.trim().length === 0) {
if ((!value || value.trim().length === 0) && props.pastedImages.length === 0) {
return;
}

setInProgress(true);
props.onQuery(value);
props.onQuery(value, props.pastedImages.map(p => ({ imageData: p.data, mediaType: p.type })));

if (editorRef.current) {
editorRef.current.document.textEditorModel.setValue('');
}
}, [props.context, props.onQuery, editorRef]);
}// Clear pasted images after submission
props.pastedImages.forEach(image => props.onRemoveImage(image.id));
}, [props.onQuery, props.pastedImages]);

const onKeyDown = React.useCallback((event: React.KeyboardEvent) => {
if (!props.isEnabled) {
Expand Down Expand Up @@ -517,20 +613,30 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu

const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement);

return <div className='theia-ChatInput' onDragOver={props.onDragOver} onDrop={props.onDrop} >
{changeSetUI?.elements &&
<ChangeSetBox changeSet={changeSetUI} />
}
<div className='theia-ChatInput-Editor-Box'>
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
</div>
{props.context && props.context.length > 0 &&
<ChatContext context={contextUI.context} />
return (
<div
className='theia-ChatInput'
onDragOver={props.onDragOver}
onDrop={props.onDrop}
ref={containerRef}
>
{changeSetUI?.elements &&
<ChangeSetBox changeSet={changeSetUI} />
}
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
<div className='theia-ChatInput-Editor-Box'>
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
</div>
{props.pastedImages.length > 0 &&
<ImagePreview images={props.pastedImages} onRemove={props.onRemoveImage} />
}
{props.context && props.context.length > 0 &&
<ChatContext context={contextUI.context} />
}
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
</div>
</div>
</div>;
);
};

const noPropagation = (handler: () => void) => (e: React.MouseEvent) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ import { nls } from '@theia/core/lib/common/nls';
import { ChatNodeToolbarActionContribution } from '../chat-node-toolbar-action-contribution';
import { ChatResponsePartRenderer } from '../chat-response-part-renderer';
import { useMarkdownRendering } from '../chat-response-renderer/markdown-part-renderer';
import { AIVariableService } from '@theia/ai-core';
import { AIVariableService, LLMImageData } from '@theia/ai-core';
import { ProgressMessage } from '../chat-progress-message';

// TODO Instead of directly operating on the ChatRequestModel we could use an intermediate view model
Expand Down Expand Up @@ -410,6 +410,8 @@ const ChatRequestRender = (
openerService: OpenerService
}) => {
const parts = node.request.message.parts;
const images = node.request.images || [];

return (
<div className="theia-RequestNode">
<p>
Expand Down Expand Up @@ -442,6 +444,20 @@ const ChatRequestRender = (
}
})}
</p>
{images.length > 0 && (
<div className="theia-RequestNode-Images">
{images.map((img, index) => (
<div key={`img-${index}`} className="theia-RequestNode-ImageContainer">
{LLMImageData.isBase64ImageData(img) ?
<img
src={`data:${img.mediaType};base64,${img.imageData}`}
alt={`Image ${index + 1}`}
className="theia-RequestNode-Image"
/> : undefined}
</div>
))}
</div>
)}
</div>
);
};
Expand Down
2 changes: 1 addition & 1 deletion packages/ai-chat-ui/src/browser/chat-view-contribution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ export class ChatViewMenuContribution implements MenuContribution, CommandContri

protected getCopyText(arg: RequestNode | ResponseNode): string {
if (isRequestNode(arg)) {
return arg.request.request.text;
return arg.request.request.text ?? '';
} else if (isResponseNode(arg)) {
return arg.response.response.asDisplayString();
}
Expand Down
8 changes: 4 additions & 4 deletions packages/ai-chat-ui/src/browser/chat-view-widget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { inject, injectable, postConstruct } from '@theia/core/shared/inversify'
import { AIChatInputWidget } from './chat-input-widget';
import { ChatViewTreeWidget } from './chat-tree-view/chat-view-tree-widget';
import { AIActivationService } from '@theia/ai-core/lib/browser/ai-activation-service';
import { AIVariableResolutionRequest } from '@theia/ai-core';
import { AIVariableResolutionRequest, LLMImageData } from '@theia/ai-core';

export namespace ChatViewWidget {
export interface State {
Expand Down Expand Up @@ -164,10 +164,10 @@ export class ChatViewWidget extends BaseWidget implements ExtractableWidget, Sta
return this.onStateChangedEmitter.event;
}

protected async onQuery(query: string): Promise<void> {
if (query.length === 0) { return; }
protected async onQuery(query?: string, imageData?: LLMImageData[]): Promise<void> {
if ((!query || query.length === 0) && (!imageData || imageData.length === 0)) { return; }

const chatRequest: ChatRequest = { text: query };
const chatRequest: ChatRequest = { text: query, images: imageData };
const requestProgress = await this.chatService.sendRequest(this.chatSession.id, chatRequest);
requestProgress?.responseCompleted.then(responseModel => {
if (responseModel.isError) {
Expand Down
Loading
Loading