Skip to content

Commit e33e936

Browse files
Handle invalid uris better and move out internal tool name (#244049)
Follow up to @connor4312's comment here #243922 (comment)
1 parent 59e5c3e commit e33e936

File tree

2 files changed

+54
-19
lines changed

2 files changed

+54
-19
lines changed

src/vs/workbench/contrib/chat/common/tools/tools.ts

+2
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,5 @@ export class BuiltinToolsContribution extends Disposable implements IWorkbenchCo
2828
export interface IToolInputProcessor {
2929
processInput(input: any): any;
3030
}
31+
32+
export const InternalFetchWebPageToolId = 'vscode_fetchWebPage_internal';

src/vs/workbench/contrib/chat/electron-sandbox/tools/fetchPageTool.ts

+52-19
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import { IWebContentExtractorService } from '../../../../../platform/webContentE
1010
import { ITrustedDomainService } from '../../../url/browser/trustedDomainService.js';
1111
import { CountTokensCallback, IPreparedToolInvocation, IToolData, IToolImpl, IToolInvocation, IToolResult, IToolResultTextPart } from '../../common/languageModelToolsService.js';
1212
import { MarkdownString } from '../../../../../base/common/htmlContent.js';
13+
import { InternalFetchWebPageToolId } from '../../common/tools/tools.js';
1314

14-
export const InternalFetchWebPageToolId = 'vscode_fetchWebPage_internal';
1515
export const FetchWebPageToolData: IToolData = {
1616
id: InternalFetchWebPageToolId,
1717
displayName: 'Fetch Web Page',
@@ -41,26 +41,50 @@ export class FetchWebPageTool implements IToolImpl {
4141
) { }
4242

4343
async invoke(invocation: IToolInvocation, _countTokens: CountTokensCallback, _token: CancellationToken): Promise<IToolResult> {
44-
const { valid } = this._parseUris((invocation.parameters as { urls?: string[] }).urls);
45-
if (!valid.length) {
44+
const parsedUriResults = this._parseUris((invocation.parameters as { urls?: string[] }).urls);
45+
const validUris = Array.from(parsedUriResults.values()).filter((uri): uri is URI => !!uri);
46+
if (!validUris.length) {
4647
return {
4748
content: [{ kind: 'text', value: localize('fetchWebPage.noValidUrls', 'No valid URLs provided.') }]
4849
};
4950
}
5051

51-
for (const uri of valid) {
52+
// We approved these via confirmation, so mark them as "approved" in this session
53+
// if they are not approved via the trusted domain service.
54+
for (const uri of validUris) {
5255
if (!this._trustedDomainService.isValid(uri)) {
5356
this._alreadyApprovedDomains.add(uri.toString(true));
5457
}
5558
}
5659

57-
const contents = await this._readerModeService.extract(valid);
60+
const contents = await this._readerModeService.extract(validUris);
61+
// Make an array that conatains either the content or undefined for invalid URLs
62+
const contentsWithUndefined = new Map<string, string | undefined>();
63+
let indexInContents = 0;
64+
parsedUriResults.forEach((uri, url) => {
65+
if (uri) {
66+
contentsWithUndefined.set(url, contents[indexInContents]);
67+
indexInContents++;
68+
} else {
69+
contentsWithUndefined.set(url, undefined);
70+
}
71+
});
72+
5873
// TODO: Should we return a content for invalid URLs so that the indexes are aligned?
59-
return { content: contents.map((content, index) => this._getPromptPartForWebPageContents(content, valid[index])) };
74+
return { content: this._getPromptPartsForResults(contentsWithUndefined) };
6075
}
6176

6277
async prepareToolInvocation(parameters: any, token: CancellationToken): Promise<IPreparedToolInvocation | undefined> {
63-
const { invalid, valid } = this._parseUris(parameters.urls);
78+
const map = this._parseUris(parameters.urls);
79+
const invalid = new Array<string>();
80+
const valid = new Array<URI>();
81+
map.forEach((uri, url) => {
82+
if (!uri) {
83+
invalid.push(url);
84+
} else {
85+
valid.push(uri);
86+
}
87+
});
6488
const urlsNeedingConfirmation = valid.filter(url => !this._trustedDomainService.isValid(url) && !this._alreadyApprovedDomains.has(url.toString(true)));
6589

6690
const pastTenseMessage = invalid.length
@@ -118,25 +142,34 @@ export class FetchWebPageTool implements IToolImpl {
118142
return result;
119143
}
120144

121-
private _parseUris(urls?: string[]): { invalid: string[]; valid: URI[] } {
122-
const invalidUrls: string[] = [];
123-
const validUrls: URI[] = [];
145+
private _parseUris(urls?: string[]): Map<string, URI | undefined> {
146+
const results = new Map<string, URI | undefined>();
124147
urls?.forEach(uri => {
125148
try {
126149
const uriObj = URI.parse(uri);
127-
validUrls.push(uriObj);
150+
results.set(uri, uriObj);
128151
} catch (e) {
129-
invalidUrls.push(uri);
152+
results.set(uri, undefined);
130153
}
131154
});
132-
133-
return { invalid: invalidUrls, valid: validUrls };
155+
return results;
134156
}
135157

136-
private _getPromptPartForWebPageContents(webPageContents: string, uri: URI): IToolResultTextPart {
137-
return {
138-
kind: 'text',
139-
value: `<!-- ${uri.toString(true)} -->\n\n` + webPageContents
140-
};
158+
private _getPromptPartsForResults(results: Map<string, string | undefined>): IToolResultTextPart[] {
159+
const arr = new Array<IToolResultTextPart>();
160+
for (const [url, content] of results.entries()) {
161+
if (content) {
162+
arr.push({
163+
kind: 'text',
164+
value: `<!-- ${url} -->\n\n` + content
165+
});
166+
} else {
167+
arr.push({
168+
kind: 'text',
169+
value: `<!-- ${url} -->\n\n` + localize('fetchWebPage.invalidUrl', 'Invalid URL')
170+
});
171+
}
172+
}
173+
return arr;
141174
}
142175
}

0 commit comments

Comments
 (0)