Skip to content

Add initial support for images in the ai chat #15410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions packages/ai-anthropic/src/node/anthropic-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import {
LanguageModelTextResponse,
TokenUsageService,
TokenUsageParams,
UserRequest
UserRequest,
ImageContent,
ImageMimeType
} from '@theia/ai-core';
import { CancellationToken, isArray } from '@theia/core';
import { Anthropic } from '@anthropic-ai/sdk';
import type { Message, MessageParam } from '@anthropic-ai/sdk/resources';
import { Message, MessageParam, Base64ImageSource } from '@anthropic-ai/sdk/resources';

export const DEFAULT_MAX_TOKENS = 4096;

Expand All @@ -48,10 +50,31 @@ const createMessageContent = (message: LanguageModelMessage): MessageParam['cont
return [{ id: message.id, input: message.input, name: message.name, type: 'tool_use' }];
} else if (LanguageModelMessage.isToolResultMessage(message)) {
return [{ type: 'tool_result', tool_use_id: message.tool_use_id }];
} else if (LanguageModelMessage.isImageMessage(message)) {
if (ImageContent.isBase64(message.image)) {
return [{ type: 'image', source: { type: 'base64', media_type: mimeTypeToMediaType(message.image.mimeType), data: message.image.base64data } }];
} else {
return [{ type: 'image', source: { type: 'url', url: message.image.url } }];
}
}
throw new Error(`Unknown message type:'${JSON.stringify(message)}'`);
};

function mimeTypeToMediaType(mimeType: ImageMimeType): Base64ImageSource['media_type'] {
switch (mimeType) {
case 'image/gif':
return 'image/gif';
case 'image/jpeg':
return 'image/jpeg';
case 'image/png':
return 'image/png';
case 'image/webp':
return 'image/webp';
default:
return 'image/jpeg';
}
}

type NonThinkingParam = Exclude<Anthropic.Messages.ContentBlockParam, Anthropic.Messages.ThinkingBlockParam | Anthropic.Messages.RedactedThinkingBlockParam>;
function isNonThinkingParam(
content: Anthropic.Messages.ContentBlockParam
Expand Down
150 changes: 115 additions & 35 deletions packages/ai-chat-ui/src/browser/chat-input-widget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,25 @@
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0
// *****************************************************************************
import {
ChangeSet, ChangeSetElement, ChatAgent, ChatChangeEvent, ChatModel, ChatRequestModel,
ChatService, ChatSuggestion, EditableChatRequestModel, ChatHierarchyBranch
ChangeSet, ChangeSetElement, ChatAgent, ChatChangeEvent, ChatHierarchyBranch,
ChatModel, ChatRequestModel, ChatService, ChatSuggestion, EditableChatRequestModel
} from '@theia/ai-chat';
import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service';
import { ImageContextVariable } from '@theia/ai-chat/lib/common/image-context-variable';
import { AIVariableResolutionRequest } from '@theia/ai-core';
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
import { DisposableCollection, InMemoryResources, URI, nls } from '@theia/core';
import { ContextMenuRenderer, LabelProvider, Message, OpenerService, ReactWidget } from '@theia/core/lib/browser';
import { Deferred } from '@theia/core/lib/common/promise-util';
import { inject, injectable, optional, postConstruct } from '@theia/core/shared/inversify';
import * as React from '@theia/core/shared/react';
import { IMouseEvent } from '@theia/monaco-editor-core';
import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor';
import { MonacoEditorProvider } from '@theia/monaco/lib/browser/monaco-editor-provider';
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
import { AIVariableResolutionRequest } from '@theia/ai-core';
import { FrontendVariableService } from '@theia/ai-core/lib/browser';
import { ContextVariablePicker } from './context-variable-picker';
import { SimpleMonacoEditor } from '@theia/monaco/lib/browser/simple-monaco-editor';
import { ChangeSetActionRenderer, ChangeSetActionService } from './change-set-actions/change-set-action-service';
import { ChangeSetDecoratorService } from '@theia/ai-chat/lib/browser/change-set-decorator-service';
import { ChatInputAgentSuggestions } from './chat-input-agent-suggestions';
import { CHAT_VIEW_LANGUAGE_EXTENSION } from './chat-view-language-contribution';
import { ContextVariablePicker } from './context-variable-picker';

type Query = (query: string) => Promise<void>;
type Unpin = () => void;
Expand Down Expand Up @@ -185,6 +186,7 @@ export class AIChatInputWidget extends ReactWidget {
onCancel={this._onCancel.bind(this)}
onDragOver={this.onDragOver.bind(this)}
onDrop={this.onDrop.bind(this)}
onPaste={this.onPaste.bind(this)}
onDeleteChangeSet={this._onDeleteChangeSet.bind(this)}
onDeleteChangeSetElement={this._onDeleteChangeSetElement.bind(this)}
onAddContextElement={this.addContextElement.bind(this)}
Expand Down Expand Up @@ -257,6 +259,30 @@ export class AIChatInputWidget extends ReactWidget {
});
}

protected onPaste(event: ClipboardEvent): void {
event.preventDefault();
event.stopPropagation();

this.variableService.getPasteResult(event, { type: 'ai-chat-input-widget' }).then(result => {
result.variables.forEach(variable => this.addContext(variable));

if (result.text) {
const position = this.editorRef?.getControl().getPosition();
if (position && result.text) {
this.editorRef?.getControl().executeEdits('paste', [{
range: {
startLineNumber: position.lineNumber,
startColumn: position.column,
endLineNumber: position.lineNumber,
endColumn: position.column
},
text: result.text
}]);
}
}
});
}

protected async openContextElement(request: AIVariableResolutionRequest): Promise<void> {
const session = this.chatService.getSessions().find(candidate => candidate.model.id === this._chatModel.id);
const context = { session };
Expand Down Expand Up @@ -306,6 +332,7 @@ interface ChatInputProperties {
onUnpin: () => void;
onDragOver: (event: React.DragEvent) => void;
onDrop: (event: React.DragEvent) => void;
onPaste: (event: ClipboardEvent) => void;
onDeleteChangeSet: (sessionId: string) => void;
onDeleteChangeSetElement: (sessionId: string, uri: URI) => void;
onAddContextElement: () => void;
Expand Down Expand Up @@ -355,6 +382,25 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
// eslint-disable-next-line no-null/no-null
const placeholderRef = React.useRef<HTMLDivElement | null>(null);
const editorRef = React.useRef<SimpleMonacoEditor | undefined>(undefined);
// eslint-disable-next-line no-null/no-null
const containerRef = React.useRef<HTMLDivElement>(null);

// Handle paste events on the container
const handlePaste = React.useCallback((event: ClipboardEvent) => {
props.onPaste(event);
}, [props.onPaste]);

// Set up paste handler on the container div
React.useEffect(() => {
const container = containerRef.current;
if (container) {
container.addEventListener('paste', handlePaste, true);
return () => {
container.removeEventListener('paste', handlePaste, true);
};
}
return undefined;
}, [handlePaste]);

React.useEffect(() => {
const uri = props.uri;
Expand Down Expand Up @@ -472,7 +518,7 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
setChangeSetUI(current => !current ? current : { ...current, actions: newActions });
});
return () => disposable.dispose();
});
}, [props.actionService, props.chatModel.changeSet]);

React.useEffect(() => {
const disposable = props.decoratorService.onDidChangeDecorations(() => {
Expand All @@ -498,8 +544,13 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu
if (!value || value.trim().length === 0) {
return;
}

props.onQuery(value);
setValue('');

if (editorRef.current) {
editorRef.current.document.textEditorModel.setValue('');
}
}, [props.context, props.onQuery, setValue]);

const onKeyDown = React.useCallback((event: React.KeyboardEvent) => {
Expand Down Expand Up @@ -620,21 +671,23 @@ const ChatInput: React.FunctionComponent<ChatInputProperties> = (props: ChatInpu

const contextUI = buildContextUI(props.context, props.labelProvider, props.onDeleteContextElement, props.onOpenContextElement);

return <div className='theia-ChatInput' onDragOver={props.onDragOver} onDrop={props.onDrop} >
{props.showSuggestions !== false && <ChatInputAgentSuggestions suggestions={props.suggestions} opener={props.openerService} />}
{props.showChangeSet && changeSetUI?.elements &&
<ChangeSetBox changeSet={changeSetUI} />
}
<div className='theia-ChatInput-Editor-Box'>
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
</div>
{props.context && props.context.length > 0 &&
<ChatContext context={contextUI.context} />
return (
<div className='theia-ChatInput' onDragOver={props.onDragOver} onDrop={props.onDrop} ref={containerRef}>
{props.showSuggestions !== false && <ChatInputAgentSuggestions suggestions={props.suggestions} opener={props.openerService} />}
{props.showChangeSet && changeSetUI?.elements &&
<ChangeSetBox changeSet={changeSetUI} />
}
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
<div className='theia-ChatInput-Editor-Box'>
<div className='theia-ChatInput-Editor' ref={editorContainerRef} onKeyDown={onKeyDown} onFocus={handleInputFocus} onBlur={handleInputBlur}>
<div ref={placeholderRef} className='theia-ChatInput-Editor-Placeholder'>{nls.localizeByDefault('Ask a question')}</div>
</div>
{props.context && props.context.length > 0 &&
<ChatContext context={contextUI.context} />
}
<ChatInputOptions leftOptions={leftOptions} rightOptions={rightOptions} />
</div>
</div>
</div>;
);
};

const noPropagation = (handler: () => void) => (e: React.MouseEvent) => {
Expand Down Expand Up @@ -816,6 +869,7 @@ function buildContextUI(
}
return {
context: context.map((element, index) => ({
variable: element,
name: labelProvider.getName(element),
iconClass: labelProvider.getIcon(element),
nameClass: element.variable.name,
Expand All @@ -829,6 +883,7 @@ function buildContextUI(

interface ChatContextUI {
context: {
variable: AIVariableResolutionRequest,
name: string;
iconClass: string;
nameClass: string;
Expand All @@ -842,20 +897,45 @@ interface ChatContextUI {
const ChatContext: React.FunctionComponent<ChatContextUI> = ({ context }) => (
<div className="theia-ChatInput-ChatContext">
<ul>
{context.map((element, index) => (
<li key={index} className="theia-ChatInput-ChatContext-Element" title={element.details} onClick={() => element.open?.()}>
<div className={`theia-ChatInput-ChatContext-Icon ${element.iconClass}`} />
<div className="theia-ChatInput-ChatContext-labelParts">
<span className={`theia-ChatInput-ChatContext-title ${element.nameClass}`}>
{element.name}
</span>
<span className='theia-ChatInput-ChatContext-additionalInfo'>
{element.additionalInfo}
</span>
{context.map((element, index) => {
if (ImageContextVariable.isImageContextRequest(element.variable)) {
const variable = ImageContextVariable.parseRequest(element.variable)!;
return <li key={index} className="theia-ChatInput-ChatContext-Element theia-ChatInput-ImageContext-Element"
title={variable.name ?? variable.wsRelativePath} onClick={() => element.open?.()}>
<div className="theia-ChatInput-ChatContext-Row">
<div className={`theia-ChatInput-ChatContext-Icon ${element.iconClass}`} />
<div className="theia-ChatInput-ChatContext-labelParts">
<span className={`theia-ChatInput-ChatContext-title ${element.nameClass}`}>
{variable.name ?? variable.wsRelativePath?.split('/').pop()}
</span>
<span className='theia-ChatInput-ChatContext-additionalInfo'>
{element.additionalInfo}
</span>
</div>
<span className="codicon codicon-close action" title={nls.localizeByDefault('Delete')} onClick={e => { e.stopPropagation(); element.delete(); }} />
</div>
<div className="theia-ChatInput-ChatContext-ImageRow">
<div className='theia-ChatInput-ImagePreview-Item'>
<img src={`data:${variable.mimeType};base64,${variable.data}`} alt={variable.name} />
</div>
</div>
</li>;
}
return <li key={index} className="theia-ChatInput-ChatContext-Element" title={element.details} onClick={() => element.open?.()}>
<div className="theia-ChatInput-ChatContext-Row">
<div className={`theia-ChatInput-ChatContext-Icon ${element.iconClass}`} />
<div className="theia-ChatInput-ChatContext-labelParts">
<span className={`theia-ChatInput-ChatContext-title ${element.nameClass}`}>
{element.name}
</span>
<span className='theia-ChatInput-ChatContext-additionalInfo'>
{element.additionalInfo}
</span>
</div>
<span className="codicon codicon-close action" title={nls.localizeByDefault('Delete')} onClick={e => { e.stopPropagation(); element.delete(); }} />
</div>
<span className="codicon codicon-close action" title={nls.localizeByDefault('Delete')} onClick={e => { e.stopPropagation(); element.delete(); }} />
</li>
))}
</li>;
})}
</ul>
</div>
);
2 changes: 1 addition & 1 deletion packages/ai-chat-ui/src/browser/chat-view-contribution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ export class ChatViewMenuContribution implements MenuContribution, CommandContri

protected getCopyText(arg: RequestNode | ResponseNode): string {
if (isRequestNode(arg)) {
return arg.request.request.text;
return arg.request.request.text ?? '';
} else if (isResponseNode(arg)) {
return arg.response.response.asDisplayString();
}
Expand Down
4 changes: 2 additions & 2 deletions packages/ai-chat-ui/src/browser/chat-view-widget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ export class ChatViewWidget extends BaseWidget implements ExtractableWidget, Sta
return this.onStateChangedEmitter.event;
}

protected async onQuery(query: string | ChatRequest): Promise<void> {
const chatRequest: ChatRequest = typeof query === 'string' ? { text: query } : { ...query };
protected async onQuery(query?: string | ChatRequest): Promise<void> {
const chatRequest: ChatRequest = !query ? { text: '' } : typeof query === 'string' ? { text: query } : { ...query };
if (chatRequest.text.length === 0) { return; }

const requestProgress = await this.chatService.sendRequest(this.chatSession.id, chatRequest);
Expand Down
40 changes: 37 additions & 3 deletions packages/ai-chat-ui/src/browser/style/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -200,22 +200,28 @@ div:last-child > .theia-ChatNode {
margin: 0;
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 6px;
}

.theia-ChatInput-ChatContext-Element {
display: flex;
align-items: center;
flex-direction: column;
border-radius: calc(var(--theia-ui-padding) * 2 / 3);
border: var(--theia-border-width) solid var(--theia-dropdown-border);
padding: 2px 4px 2px 6px;
height: 18px;
line-height: 16px;
min-width: 0;
user-select: none;
cursor: pointer;
}

.theia-ChatInput-ChatContext-Row {
display: flex;
align-items: center;
width: 100%;
}

.theia-ChatInput-ChatContext-labelParts {
flex: 1;
min-width: 0;
Expand Down Expand Up @@ -497,6 +503,32 @@ div:last-child > .theia-ChatNode {
padding-left: 8px !important;
}

.theia-ChatInput-ImagePreview-Item {
position: relative;
border: var(--theia-border-width) solid var(--theia-dropdown-border);
border-radius: 4px;
overflow: hidden;
height: 100px;
width: 120px;
}

.theia-ChatInput-ChatContext-ImageRow {
margin-top: 4px;
width: 100%;
display: flex;
justify-content: center;
}

.theia-ChatInput-ImageContext-Element {
min-width: 150px;
}

.theia-ChatInput-ImagePreview-Item img {
width: 100%;
height: 100%;
object-fit: contain;
}

.theia-ChatInputOptions {
width: 100%;
height: 25px;
Expand Down Expand Up @@ -706,7 +738,7 @@ div:last-child > .theia-ChatNode {
background: var(--theia-menu-background);
border: 1px solid var(--theia-menu-border);
border-radius: 4px;
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
margin: 0;
padding: 0;
list-style: none;
Expand Down Expand Up @@ -900,6 +932,7 @@ details[open].collapsible-arguments .collapsible-arguments-summary {
margin-bottom: 10px;
height: calc(100% - 50px);
}

.monaco-session-settings-dialog {
flex: 1;
min-height: 350px;
Expand All @@ -909,6 +942,7 @@ details[open].collapsible-arguments .collapsible-arguments-summary {
border: 1px solid var(--theia-editorWidget-border);
margin-bottom: 10px;
}

.session-settings-error {
color: var(--theia-errorForeground);
min-height: 1em;
Expand Down
Loading
Loading