@@ -10,8 +10,8 @@ import { IWebContentExtractorService } from '../../../../../platform/webContentE
10
10
import { ITrustedDomainService } from '../../../url/browser/trustedDomainService.js' ;
11
11
import { CountTokensCallback , IPreparedToolInvocation , IToolData , IToolImpl , IToolInvocation , IToolResult , IToolResultTextPart } from '../../common/languageModelToolsService.js' ;
12
12
import { MarkdownString } from '../../../../../base/common/htmlContent.js' ;
13
+ import { InternalFetchWebPageToolId } from '../../common/tools/tools.js' ;
13
14
14
- export const InternalFetchWebPageToolId = 'vscode_fetchWebPage_internal' ;
15
15
export const FetchWebPageToolData : IToolData = {
16
16
id : InternalFetchWebPageToolId ,
17
17
displayName : 'Fetch Web Page' ,
@@ -41,26 +41,50 @@ export class FetchWebPageTool implements IToolImpl {
41
41
) { }
42
42
43
43
async invoke ( invocation : IToolInvocation , _countTokens : CountTokensCallback , _token : CancellationToken ) : Promise < IToolResult > {
44
- const { valid } = this . _parseUris ( ( invocation . parameters as { urls ?: string [ ] } ) . urls ) ;
45
- if ( ! valid . length ) {
44
+ const parsedUriResults = this . _parseUris ( ( invocation . parameters as { urls ?: string [ ] } ) . urls ) ;
45
+ const validUris = Array . from ( parsedUriResults . values ( ) ) . filter ( ( uri ) : uri is URI => ! ! uri ) ;
46
+ if ( ! validUris . length ) {
46
47
return {
47
48
content : [ { kind : 'text' , value : localize ( 'fetchWebPage.noValidUrls' , 'No valid URLs provided.' ) } ]
48
49
} ;
49
50
}
50
51
51
- for ( const uri of valid ) {
52
+ // We approved these via confirmation, so mark them as "approved" in this session
53
+ // if they are not approved via the trusted domain service.
54
+ for ( const uri of validUris ) {
52
55
if ( ! this . _trustedDomainService . isValid ( uri ) ) {
53
56
this . _alreadyApprovedDomains . add ( uri . toString ( true ) ) ;
54
57
}
55
58
}
56
59
57
- const contents = await this . _readerModeService . extract ( valid ) ;
60
+ const contents = await this . _readerModeService . extract ( validUris ) ;
61
+ // Make an array that conatains either the content or undefined for invalid URLs
62
+ const contentsWithUndefined = new Map < string , string | undefined > ( ) ;
63
+ let indexInContents = 0 ;
64
+ parsedUriResults . forEach ( ( uri , url ) => {
65
+ if ( uri ) {
66
+ contentsWithUndefined . set ( url , contents [ indexInContents ] ) ;
67
+ indexInContents ++ ;
68
+ } else {
69
+ contentsWithUndefined . set ( url , undefined ) ;
70
+ }
71
+ } ) ;
72
+
58
73
// TODO: Should we return a content for invalid URLs so that the indexes are aligned?
59
- return { content : contents . map ( ( content , index ) => this . _getPromptPartForWebPageContents ( content , valid [ index ] ) ) } ;
74
+ return { content : this . _getPromptPartsForResults ( contentsWithUndefined ) } ;
60
75
}
61
76
62
77
async prepareToolInvocation ( parameters : any , token : CancellationToken ) : Promise < IPreparedToolInvocation | undefined > {
63
- const { invalid, valid } = this . _parseUris ( parameters . urls ) ;
78
+ const map = this . _parseUris ( parameters . urls ) ;
79
+ const invalid = new Array < string > ( ) ;
80
+ const valid = new Array < URI > ( ) ;
81
+ map . forEach ( ( uri , url ) => {
82
+ if ( ! uri ) {
83
+ invalid . push ( url ) ;
84
+ } else {
85
+ valid . push ( uri ) ;
86
+ }
87
+ } ) ;
64
88
const urlsNeedingConfirmation = valid . filter ( url => ! this . _trustedDomainService . isValid ( url ) && ! this . _alreadyApprovedDomains . has ( url . toString ( true ) ) ) ;
65
89
66
90
const pastTenseMessage = invalid . length
@@ -118,25 +142,34 @@ export class FetchWebPageTool implements IToolImpl {
118
142
return result ;
119
143
}
120
144
121
- private _parseUris ( urls ?: string [ ] ) : { invalid : string [ ] ; valid : URI [ ] } {
122
- const invalidUrls : string [ ] = [ ] ;
123
- const validUrls : URI [ ] = [ ] ;
145
+ private _parseUris ( urls ?: string [ ] ) : Map < string , URI | undefined > {
146
+ const results = new Map < string , URI | undefined > ( ) ;
124
147
urls ?. forEach ( uri => {
125
148
try {
126
149
const uriObj = URI . parse ( uri ) ;
127
- validUrls . push ( uriObj ) ;
150
+ results . set ( uri , uriObj ) ;
128
151
} catch ( e ) {
129
- invalidUrls . push ( uri ) ;
152
+ results . set ( uri , undefined ) ;
130
153
}
131
154
} ) ;
132
-
133
- return { invalid : invalidUrls , valid : validUrls } ;
155
+ return results ;
134
156
}
135
157
136
- private _getPromptPartForWebPageContents ( webPageContents : string , uri : URI ) : IToolResultTextPart {
137
- return {
138
- kind : 'text' ,
139
- value : `<!-- ${ uri . toString ( true ) } -->\n\n` + webPageContents
140
- } ;
158
+ private _getPromptPartsForResults ( results : Map < string , string | undefined > ) : IToolResultTextPart [ ] {
159
+ const arr = new Array < IToolResultTextPart > ( ) ;
160
+ for ( const [ url , content ] of results . entries ( ) ) {
161
+ if ( content ) {
162
+ arr . push ( {
163
+ kind : 'text' ,
164
+ value : `<!-- ${ url } -->\n\n` + content
165
+ } ) ;
166
+ } else {
167
+ arr . push ( {
168
+ kind : 'text' ,
169
+ value : `<!-- ${ url } -->\n\n` + localize ( 'fetchWebPage.invalidUrl' , 'Invalid URL' )
170
+ } ) ;
171
+ }
172
+ }
173
+ return arr ;
141
174
}
142
175
}
0 commit comments