@@ -110,6 +110,11 @@ Note: if you already provided you final reply, you will need to provide it again
110110Or did you want to call some other tools, but did not actually do that?
111111`
112112
113+ const llmAnswerNow = `
114+ Provide a best-effort answer to the original question with all of the information
115+ you have so far without calling any more tools!
116+ `
117+
113118type llmOutputs struct {
114119 tool Tool
115120 provideOutputs func (* verifyContext , string , bool )
@@ -178,22 +183,43 @@ func (a *LLMAgent) executeOne(ctx *Context, candidate int) (string, map[string]a
178183func (a * LLMAgent ) chat (ctx * Context , cfg * genai.GenerateContentConfig , tools map [string ]Tool ,
179184 prompt string , candidate int ) (string , map [string ]any , error ) {
180185 var outputs map [string ]any
186+ answerNow := false
181187 req := []* genai.Content {genai .NewContentFromText (prompt , genai .RoleUser )}
182188 for {
183- reqSpan := & trajectory.Span {
189+ span := & trajectory.Span {
184190 Type : trajectory .SpanLLM ,
185191 Name : a .Name ,
186192 Model : ctx .modelName (a .Model ),
187193 }
188- if err := ctx .startSpan (reqSpan ); err != nil {
194+ if err := ctx .startSpan (span ); err != nil {
189195 return "" , nil , err
190196 }
191- resp , err := a .generateContent (ctx , cfg , req , candidate )
192- if err != nil {
193- return "" , nil , ctx .finishSpan (reqSpan , err )
197+ resp , respErr := a .generateContent (ctx , cfg , req , candidate )
198+ if respErr != nil {
199+ span .Error = respErr .Error ()
200+ if err := ctx .finishSpan (span , nil ); err != nil {
201+ return "" , nil , err
202+ }
203+ // Input overflows maximum number of tokens.
204+ // If this is an LLMTool, we remove the last tool reply,
205+ // and replace it with an order to answer right now.
206+ if isTokenOverflowError (respErr ) &&
207+ a .Reply == llmToolReply &&
208+ len (req ) >= 3 &&
209+ ! answerNow {
210+ answerNow = true
211+ cfg .ToolConfig = & genai.ToolConfig {
212+ FunctionCallingConfig : & genai.FunctionCallingConfig {
213+ Mode : genai .FunctionCallingConfigModeNone ,
214+ },
215+ }
216+ req [len (req )- 1 ] = genai .NewContentFromText (llmAnswerNow , genai .RoleUser )
217+ continue
218+ }
219+ return "" , nil , respErr
194220 }
195- reply , calls , respErr := a .parseResponse (resp , reqSpan )
196- if err := ctx .finishSpan (reqSpan , respErr ); err != nil {
221+ reply , calls , respErr := a .parseResponse (resp , span )
222+ if err := ctx .finishSpan (span , respErr ); err != nil {
197223 return "" , nil , err
198224 }
199225 req = append (req , resp .Candidates [0 ].Content )
@@ -361,13 +387,15 @@ func (a *LLMAgent) generateContent(ctx *Context, cfg *genai.GenerateContentConfi
361387 for try := 0 ; ; try ++ {
362388 resp , err := a .generateContentCached (ctx , cfg , req , candidate )
363389 var apiErr genai.APIError
364- if err != nil && try < 100 && errors .As (err , & apiErr ) &&
365- apiErr .Code == http .StatusServiceUnavailable {
390+ if err == nil || ! errors .As (err , & apiErr ) {
391+ return resp , err
392+ }
393+ if try < 100 && apiErr .Code == http .StatusServiceUnavailable {
366394 time .Sleep (backoff )
367395 backoff = min (backoff + time .Second , 10 * time .Second )
368396 continue
369397 }
370- if err != nil && errors . As ( err , & apiErr ) && apiErr .Code == http .StatusTooManyRequests &&
398+ if apiErr .Code == http .StatusTooManyRequests &&
371399 strings .Contains (apiErr .Message , "Quota exceeded for metric" ) {
372400 if match := rePleaseRetry .FindStringSubmatch (apiErr .Message ); match != nil {
373401 sec , _ := strconv .Atoi (match [1 ])
@@ -378,6 +406,10 @@ func (a *LLMAgent) generateContent(ctx *Context, cfg *genai.GenerateContentConfi
378406 return resp , & modelQuotaError {ctx .modelName (a .Model )}
379407 }
380408 }
409+ if apiErr .Code == http .StatusBadRequest &&
410+ strings .Contains (apiErr .Message , "The input token count exceeds the maximum" ) {
411+ return resp , & tokenOverflowError {err }
412+ }
381413 return resp , err
382414 }
383415}
0 commit comments