[New_Features] Adds recently added Assistant cost saving parameters (#710)

qhenkart · web-flow · commit a42f51967f5c · 2024-04-16T23:26:14.000+04:00
* add cost saving parameters

* add periods at the end of comments

* shorten commnet

* further lower comment length

* fix type
diff --git a/run.go b/run.go
@@ -28,6 +28,16 @@ type Run struct {
 	Metadata       map[string]any     `json:"metadata"`
 	Usage          Usage              `json:"usage,omitempty"`
 
+	Temperature *float32 `json:"temperature,omitempty"`
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
+
 	httpHeader
 }
 
@@ -78,8 +88,42 @@ type RunRequest struct {
 	AdditionalInstructions string         `json:"additional_instructions,omitempty"`
 	Tools                  []Tool         `json:"tools,omitempty"`
 	Metadata               map[string]any `json:"metadata,omitempty"`
+
+	// Sampling temperature between 0 and 2. Higher values like 0.8 are  more random.
+	// lower values are more focused and deterministic.
+	Temperature *float32 `json:"temperature,omitempty"`
+
+	// The maximum number of prompt tokens that may be used over the course of the run.
+	// If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.
+	MaxPromptTokens int `json:"max_prompt_tokens,omitempty"`
+
+	// The maximum number of completion tokens that may be used over the course of the run.
+	// If the run exceeds the number of completion tokens specified, the run will end with status 'complete'.
+	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
+
+	// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+	TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"`
 }
 
+// ThreadTruncationStrategy defines the truncation strategy to use for the thread.
+// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy.
+type ThreadTruncationStrategy struct {
+	// default 'auto'.
+	Type TruncationStrategy `json:"type,omitempty"`
+	// this field should be set if the truncation strategy is set to LastMessages.
+	LastMessages *int `json:"last_messages,omitempty"`
+}
+
+// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant.
+type TruncationStrategy string
+
+const (
+	// TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model.
+	TruncationStrategyAuto = TruncationStrategy("auto")
+	// TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread.
+	TruncationStrategyLastMessages = TruncationStrategy("last_messages")
+)
+
 type RunModifyRequest struct {
 	Metadata map[string]any `json:"metadata,omitempty"`
 }