|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | | -from typing import Any, AsyncGenerator, Dict, Iterator, List |
| 3 | +from typing import Any, AsyncGenerator, Dict, Iterator, List, Literal |
4 | 4 |
|
5 | 5 | from together.abstract import api_requestor |
6 | 6 | from together.together_response import TogetherResponse |
@@ -32,6 +32,7 @@ def create( |
32 | 32 | frequency_penalty: float | None = None, |
33 | 33 | min_p: float | None = None, |
34 | 34 | logit_bias: Dict[str, float] | None = None, |
| 35 | + context_length_exceeded_behavior: Literal["truncate", "error"] | None = None, |
35 | 36 | seed: int | None = None, |
36 | 37 | stream: bool = False, |
37 | 38 | logprobs: int | None = None, |
@@ -80,6 +81,9 @@ def create( |
80 | 81 | logit_bias (Dict[str, float], optional): A dictionary of tokens and their bias values that modify the |
81 | 82 | likelihood of specific tokens being sampled. Bias values must be in the range [-100, 100]. |
82 | 83 | Defaults to None. |
| 84 | + context_length_exceeded_behavior ("truncate" | "error", optional): Behavior when max_tokens exceeds the |
| 85 | + model context length. "error" returns a 400, while "truncate" overrides max_tokens with the model's |
| 86 | + maximum context length. |
83 | 87 | seed (int, optional): A seed value to use for reproducibility. |
84 | 88 | stream (bool, optional): Flag indicating whether to stream the generated completions. |
85 | 89 | Defaults to False. |
@@ -126,6 +130,7 @@ def create( |
126 | 130 | frequency_penalty=frequency_penalty, |
127 | 131 | min_p=min_p, |
128 | 132 | logit_bias=logit_bias, |
| 133 | + context_length_exceeded_behavior=context_length_exceeded_behavior, |
129 | 134 | seed=seed, |
130 | 135 | stream=stream, |
131 | 136 | logprobs=logprobs, |
@@ -174,6 +179,7 @@ async def create( |
174 | 179 | frequency_penalty: float | None = None, |
175 | 180 | min_p: float | None = None, |
176 | 181 | logit_bias: Dict[str, float] | None = None, |
| 182 | + context_length_exceeded_behavior: Literal["truncate", "error"] | None = None, |
177 | 183 | seed: int | None = None, |
178 | 184 | stream: bool = False, |
179 | 185 | logprobs: int | None = None, |
@@ -222,6 +228,9 @@ async def create( |
222 | 228 | logit_bias (Dict[str, float], optional): A dictionary of tokens and their bias values that modify the |
223 | 229 | likelihood of specific tokens being sampled. Bias values must be in the range [-100, 100]. |
224 | 230 | Defaults to None. |
| 231 | + context_length_exceeded_behavior ("truncate" | "error", optional): Behavior when max_tokens exceeds the |
| 232 | + model context length. "error" returns a 400, while "truncate" overrides max_tokens with the model's |
| 233 | + maximum context length. |
225 | 234 | seed (int, optional): A seed value to use for reproducibility. |
226 | 235 | stream (bool, optional): Flag indicating whether to stream the generated completions. |
227 | 236 | Defaults to False. |
@@ -268,6 +277,7 @@ async def create( |
268 | 277 | frequency_penalty=frequency_penalty, |
269 | 278 | min_p=min_p, |
270 | 279 | logit_bias=logit_bias, |
| 280 | + context_length_exceeded_behavior=context_length_exceeded_behavior, |
271 | 281 | seed=seed, |
272 | 282 | stream=stream, |
273 | 283 | logprobs=logprobs, |
|
0 commit comments