11import axios , { AxiosInstance , AxiosResponse , AxiosRequestConfig } from 'axios' ;
22import { PromptFunctions , PromptMemory , PromptSection , Tokenizer } from "promptrix" ;
3- import { PromptCompletionModel , PromptResponse , ChatCompletionFunction , PromptResponseDetails } from "./types" ;
3+ import { PromptCompletionModel , PromptResponse , ChatCompletionFunction , PromptResponseDetails , JsonSchema , ChatCompletionTool } from "./types" ;
44import { ChatCompletionRequestMessage , CreateChatCompletionRequest , CreateChatCompletionResponse , CreateCompletionRequest , CreateCompletionResponse , OpenAICreateChatCompletionRequest , OpenAICreateCompletionRequest } from "./internals" ;
55import { Colorize } from "./internals" ;
66
@@ -111,11 +111,13 @@ export interface BaseOpenAIModelOptions {
111111 requestConfig ?: AxiosRequestConfig ;
112112
113113 /**
114+ * @deprecated
114115 * Optional. A list of functions the model may generate JSON inputs for.
115116 */
116117 functions ?: ChatCompletionFunction [ ] ;
117118
118119 /**
120+ * @deprecated
119121 * Optional. Controls how the model responds to function calls.
120122 * @remarks
121123 * `"none"` means the model does not call a function, and responds to the end-user.
@@ -131,14 +133,33 @@ export interface BaseOpenAIModelOptions {
131133 * @remarks
132134 * Only available on select models but lets you guarantee that the model will output a JSON object.
133135 */
134- response_format ?: { type : 'json_object' ; } ;
136+ response_format ?: { type : 'json_object' | 'json_schema' ; json_schema ?: JsonSchema ; } ;
135137
136138 /**
137139 * Optional. Specifies the seed to the model should use when generating its response.
138140 * @remarks
139141 * Only available on select models but can be used to improve the models determinism in its responses.
140142 */
141143 seed ?: number ;
144+
145+ /**
146+ * Optional. A list of tools the model may generate JSON inputs for.
147+ */
148+ tools ?: ChatCompletionTool [ ] ;
149+
150+ /**
151+ * Optional. Controls how the model responds to tool calls.
152+ * @remarks
153+ * Defaults to `auto`.
154+ */
155+ tool_choice ?: 'auto' | 'none' | 'required' | ChatCompletionTool ;
156+
157+ /**
158+ * Optional. Whether to support calling tools in parallel.
159+ * @remarks
160+ * Defaults to `true`.
161+ */
162+ parallel_tool_calls ?: boolean ;
142163}
143164
144165/**
@@ -302,133 +323,84 @@ export class OpenAIModel implements PromptCompletionModel {
302323 const startTime = Date . now ( ) ;
303324 const max_input_tokens = this . options . max_input_tokens ?? 1024 ;
304325 if ( this . options . completion_type == 'text' ) {
305- // Render prompt
306- const result = await prompt . renderAsText ( memory , functions , tokenizer , max_input_tokens ) ;
307- if ( result . tooLong ) {
308- return {
309- status : 'too_long' ,
310- prompt : result . output ,
311- error : `The generated text completion prompt had a length of ${ result . length } tokens which exceeded the max_input_tokens of ${ max_input_tokens } .` ,
312- } ;
313- }
314- if ( this . options . logRequests ) {
315- console . log ( Colorize . title ( 'PROMPT:' ) ) ;
316- console . log ( Colorize . output ( result . output ) ) ;
326+ throw new Error ( 'Text completions are no longer supported by OpenAI.' ) ;
327+ }
328+
329+ // Render prompt
330+ const result = await prompt . renderAsMessages ( memory , functions , tokenizer , max_input_tokens ) ;
331+ if ( result . tooLong ) {
332+ return {
333+ status : 'too_long' ,
334+ prompt : result . output ,
335+ error : `The generated chat completion prompt had a length of ${ result . length } tokens which exceeded the max_input_tokens of ${ max_input_tokens } .`
336+ } ;
337+ }
338+ if ( this . options . logRequests ) {
339+ console . log ( Colorize . title ( 'CHAT PROMPT:' ) ) ;
340+ console . log ( Colorize . output ( result . output ) ) ;
341+ if ( Array . isArray ( this . options . tools ) && this . options . tools . length > 0 ) {
342+ console . log ( Colorize . title ( 'TOOLS:' ) ) ;
343+ console . log ( Colorize . output ( this . options . tools ) ) ;
317344 }
345+ }
318346
319- // Call text completion API
320- const request : CreateCompletionRequest = this . copyOptionsToRequest < CreateCompletionRequest > ( {
321- prompt : result . output ,
322- } , this . options , [ 'max_tokens' , 'temperature' , 'top_p' , 'n' , 'stream' , 'logprobs' , 'echo' , 'stop' , 'presence_penalty' , 'frequency_penalty' , 'best_of' , 'logit_bias' , 'user' ] ) ;
323- const response = await this . createCompletion ( request ) ;
324- const request_duration = Date . now ( ) - startTime ; ;
325- if ( this . options . logRequests ) {
326- console . log ( Colorize . title ( 'RESPONSE:' ) ) ;
327- console . log ( Colorize . value ( 'status' , response . status ) ) ;
328- console . log ( Colorize . value ( 'duration' , request_duration , 'ms' ) ) ;
329- console . log ( Colorize . output ( response . data ) ) ;
330- }
347+ // Call chat completion API
348+ const request : CreateChatCompletionRequest = this . patchBreakingChanges ( this . copyOptionsToRequest < CreateChatCompletionRequest > ( {
349+ messages : result . output as ChatCompletionRequestMessage [ ] ,
350+ } , this . options , [
351+ 'max_tokens' , 'temperature' , 'top_p' , 'n' , 'stream' , 'logprobs' , 'echo' , 'stop' , 'presence_penalty' ,
352+ 'frequency_penalty' , 'best_of' , 'logit_bias' , 'user' , 'functions' , 'function_call' , 'response_format' ,
353+ 'seed' , 'tools' , 'tool_choice' , 'parallel_tool_calls'
354+ ] ) ) ;
355+ const response = await this . createChatCompletion ( request ) ;
356+ const request_duration = Date . now ( ) - startTime ;
357+ if ( this . options . logRequests ) {
358+ console . log ( Colorize . title ( 'CHAT RESPONSE:' ) ) ;
359+ console . log ( Colorize . value ( 'status' , response . status ) ) ;
360+ console . log ( Colorize . value ( 'duration' , request_duration , 'ms' ) ) ;
361+ console . log ( Colorize . output ( response . data ) ) ;
362+ }
331363
332- // Process response
333- if ( response . status < 300 ) {
334- const completion = response . data . choices [ 0 ] ;
335- const usage = response . data . usage ;
336- const details : PromptResponseDetails = {
337- finish_reason : completion . finish_reason as any ,
338- completion_tokens : usage ?. completion_tokens ?? - 1 ,
339- prompt_tokens : usage ?. prompt_tokens ?? - 1 ,
340- total_tokens : usage ?. total_tokens ?? - 1 ,
341- request_duration,
342- } ;
343- return {
344- status : 'success' ,
345- prompt : result . output ,
346- message : { role : 'assistant' , content : completion . text ?? '' } ,
347- details
348- } ;
349- } else if ( response . status == 429 ) {
350- if ( this . options . logRequests ) {
351- console . log ( Colorize . title ( 'HEADERS:' ) ) ;
352- console . log ( Colorize . output ( response . headers ) ) ;
353- }
354- return {
355- status : 'rate_limited' ,
356- prompt : result . output ,
357- error : `The text completion API returned a rate limit error.`
358- }
359- } else {
360- return {
361- status : 'error' ,
362- prompt : result . output ,
363- error : `The text completion API returned an error status of ${ response . status } : ${ response . statusText } `
364- } ;
365- }
366- } else {
367- // Render prompt
368- const result = await prompt . renderAsMessages ( memory , functions , tokenizer , max_input_tokens ) ;
369- if ( result . tooLong ) {
370- return {
371- status : 'too_long' ,
372- prompt : result . output ,
373- error : `The generated chat completion prompt had a length of ${ result . length } tokens which exceeded the max_input_tokens of ${ max_input_tokens } .`
374- } ;
375- }
376- if ( this . options . logRequests ) {
377- console . log ( Colorize . title ( 'CHAT PROMPT:' ) ) ;
378- console . log ( Colorize . output ( result . output ) ) ;
379- if ( Array . isArray ( this . options . functions ) && this . options . functions . length > 0 ) {
380- console . log ( Colorize . title ( 'FUNCTIONS:' ) ) ;
381- console . log ( Colorize . output ( this . options . functions ) ) ;
382- }
364+ // Process response
365+ if ( response . status < 300 ) {
366+ const completion = response . data . choices [ 0 ] ;
367+ const usage = response . data . usage ;
368+ const details : PromptResponseDetails = {
369+ finish_reason : completion . finish_reason as any ,
370+ completion_tokens : usage ?. completion_tokens ?? - 1 ,
371+ prompt_tokens : usage ?. prompt_tokens ?? - 1 ,
372+ total_tokens : usage ?. total_tokens ?? - 1 ,
373+ request_duration,
374+ } ;
375+
376+ // Ensure message content is text
377+ const message = completion . message ?? { role : 'assistant' , content : '' } ;
378+ if ( typeof message . content == 'object' ) {
379+ message . content = JSON . stringify ( message . content ) ;
383380 }
384381
385- // Call chat completion API
386- const request : CreateChatCompletionRequest = this . copyOptionsToRequest < CreateChatCompletionRequest > ( {
387- messages : result . output as ChatCompletionRequestMessage [ ] ,
388- } , this . options , [ 'max_tokens' , 'temperature' , 'top_p' , 'n' , 'stream' , 'logprobs' , 'echo' , 'stop' , 'presence_penalty' , 'frequency_penalty' , 'best_of' , 'logit_bias' , 'user' , 'functions' , 'function_call' , 'response_format' , 'seed' ] ) ;
389- const response = await this . createChatCompletion ( request ) ;
390- const request_duration = Date . now ( ) - startTime ;
382+ return {
383+ status : 'success' ,
384+ prompt : result . output ,
385+ message,
386+ details
387+ } ;
388+ } else if ( response . status == 429 && ! response . statusText . includes ( 'quota' ) ) {
391389 if ( this . options . logRequests ) {
392- console . log ( Colorize . title ( 'CHAT RESPONSE:' ) ) ;
393- console . log ( Colorize . value ( 'status' , response . status ) ) ;
394- console . log ( Colorize . value ( 'duration' , request_duration , 'ms' ) ) ;
395- console . log ( Colorize . output ( response . data ) ) ;
390+ console . log ( Colorize . title ( 'HEADERS:' ) ) ;
391+ console . log ( Colorize . output ( response . headers ) ) ;
396392 }
397-
398- // Process response
399- if ( response . status < 300 ) {
400- const completion = response . data . choices [ 0 ] ;
401- const usage = response . data . usage ;
402- const details : PromptResponseDetails = {
403- finish_reason : completion . finish_reason as any ,
404- completion_tokens : usage ?. completion_tokens ?? - 1 ,
405- prompt_tokens : usage ?. prompt_tokens ?? - 1 ,
406- total_tokens : usage ?. total_tokens ?? - 1 ,
407- request_duration,
408- } ;
409- return {
410- status : 'success' ,
411- prompt : result . output ,
412- message : completion . message ?? { role : 'assistant' , content : '' } ,
413- details
414- } ;
415- } else if ( response . status == 429 ) {
416- if ( this . options . logRequests ) {
417- console . log ( Colorize . title ( 'HEADERS:' ) ) ;
418- console . log ( Colorize . output ( response . headers ) ) ;
419- }
420- return {
421- status : 'rate_limited' ,
422- prompt : result . output ,
423- error : `The chat completion API returned a rate limit error.`
424- }
425- } else {
426- return {
427- status : 'error' ,
428- prompt : result . output ,
429- error : `The chat completion API returned an error status of ${ response . status } : ${ response . statusText } `
430- } ;
393+ return {
394+ status : 'rate_limited' ,
395+ prompt : result . output ,
396+ error : `The chat completion API returned a rate limit error.`
431397 }
398+ } else {
399+ return {
400+ status : 'error' ,
401+ prompt : result . output ,
402+ error : `The chat completion API returned an error status of ${ response . status } : ${ response . statusText } `
403+ } ;
432404 }
433405 }
434406
@@ -445,6 +417,39 @@ export class OpenAIModel implements PromptCompletionModel {
445417 return target as TRequest ;
446418 }
447419
420+ protected patchBreakingChanges ( request : CreateChatCompletionRequest ) : CreateChatCompletionRequest {
421+ if ( this . _clientType == ClientType . OpenAI ) {
422+ const options = this . options as OpenAIModelOptions ;
423+ if ( options . model . startsWith ( 'o1-' ) ) {
424+ if ( request . max_tokens !== undefined ) {
425+ ( request as any ) . max_completion_tokens = request . max_tokens ;
426+ delete request . max_tokens ;
427+ }
428+ if ( request . temperature !== undefined ) {
429+ delete request . temperature ;
430+ }
431+ if ( request . top_p !== undefined ) {
432+ delete request . top_p ;
433+ }
434+ if ( request . frequency_penalty !== undefined ) {
435+ delete request . frequency_penalty ;
436+ }
437+ if ( request . messages [ 0 ] . role == 'system' ) {
438+ if ( request . messages . length > 1 && request . messages [ 1 ] . role == 'user' ) {
439+ // Merge 'system' message with 'user' message
440+ request . messages [ 1 ] . content = `${ request . messages [ 0 ] . content } \n\n${ request . messages [ 1 ] . content } ` ;
441+ request . messages . shift ( ) ;
442+ } else {
443+ // Convert 'system' message to 'user' message
444+ request . messages [ 0 ] . role = 'user' ;
445+ }
446+ }
447+ }
448+ }
449+
450+ return request ;
451+ }
452+
448453 /**
449454 * @private
450455 */
0 commit comments