1
1
from __future__ import annotations
2
2
3
- from typing import Dict , List , Literal , Optional , Union
3
+ from typing import Dict , List , Optional , Union
4
4
5
5
from pydantic import BaseModel , Field
6
- from typing_extensions import TypedDict
6
+ from typing_extensions import Literal , TypedDict
7
7
8
8
import llama_cpp
9
9
10
10
model_field = Field (
11
- description = "The model to use for generating completions." , default = None ,
11
+ description = "The model to use for generating completions." , default = None
12
12
)
13
13
14
14
max_tokens_field = Field (
15
- default = 16 , ge = 1 , description = "The maximum number of tokens to generate." ,
15
+ default = 16 , ge = 1 , description = "The maximum number of tokens to generate."
16
16
)
17
17
18
18
min_tokens_field = Field (
96
96
)
97
97
98
98
mirostat_eta_field = Field (
99
- default = 0.1 , ge = 0.001 , le = 1.0 , description = "Mirostat learning rate" ,
99
+ default = 0.1 , ge = 0.001 , le = 1.0 , description = "Mirostat learning rate"
100
100
)
101
101
102
102
grammar = Field (
106
106
107
107
108
108
class CreateCompletionRequest (BaseModel ):
109
- prompt : str | list [str ] = Field (
110
- default = "" , description = "The prompt to generate completions for." ,
109
+ prompt : Union [ str , List [str ] ] = Field (
110
+ default = "" , description = "The prompt to generate completions for."
111
111
)
112
- suffix : str | None = Field (
112
+ suffix : Optional [ str ] = Field (
113
113
default = None ,
114
114
description = "A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots." ,
115
115
)
116
- max_tokens : int | None = Field (
117
- default = 16 , ge = 0 , description = "The maximum number of tokens to generate." ,
116
+ max_tokens : Optional [ int ] = Field (
117
+ default = 16 , ge = 0 , description = "The maximum number of tokens to generate."
118
118
)
119
119
min_tokens : int = min_tokens_field
120
120
temperature : float = temperature_field
@@ -124,172 +124,172 @@ class CreateCompletionRequest(BaseModel):
124
124
default = False ,
125
125
description = "Whether to echo the prompt in the generated text. Useful for chatbots." ,
126
126
)
127
- stop : str | list [str ] | None = stop_field
127
+ stop : Optional [ Union [ str , List [str ]]] = stop_field
128
128
stream : bool = stream_field
129
- logprobs : int | None = Field (
129
+ logprobs : Optional [ int ] = Field (
130
130
default = None ,
131
131
ge = 0 ,
132
132
description = "The number of logprobs to generate. If None, no logprobs are generated." ,
133
133
)
134
- presence_penalty : float | None = presence_penalty_field
135
- frequency_penalty : float | None = frequency_penalty_field
136
- logit_bias : dict [ str , float ] | None = Field (None )
137
- seed : int | None = Field (None )
134
+ presence_penalty : Optional [ float ] = presence_penalty_field
135
+ frequency_penalty : Optional [ float ] = frequency_penalty_field
136
+ logit_bias : Optional [ Dict [ str , float ]] = Field (None )
137
+ seed : Optional [ int ] = Field (None )
138
138
139
139
# ignored or currently unsupported
140
- model : str | None = model_field
141
- n : int | None = 1
142
- best_of : int | None = 1
143
- user : str | None = Field (default = None )
140
+ model : Optional [ str ] = model_field
141
+ n : Optional [ int ] = 1
142
+ best_of : Optional [ int ] = 1
143
+ user : Optional [ str ] = Field (default = None )
144
144
145
145
# llama.cpp specific parameters
146
146
top_k : int = top_k_field
147
147
repeat_penalty : float = repeat_penalty_field
148
- logit_bias_type : Literal ["input_ids" , "tokens" ] | None = Field (None )
148
+ logit_bias_type : Optional [ Literal ["input_ids" , "tokens" ]] = Field (None )
149
149
mirostat_mode : int = mirostat_mode_field
150
150
mirostat_tau : float = mirostat_tau_field
151
151
mirostat_eta : float = mirostat_eta_field
152
- grammar : str | None = None
152
+ grammar : Optional [ str ] = None
153
153
154
154
model_config = {
155
155
"json_schema_extra" : {
156
156
"examples" : [
157
157
{
158
158
"prompt" : "\n \n ### Instructions:\n What is the capital of France?\n \n ### Response:\n " ,
159
159
"stop" : ["\n " , "###" ],
160
- },
161
- ],
162
- },
160
+ }
161
+ ]
162
+ }
163
163
}
164
164
165
165
166
166
class CreateEmbeddingRequest (BaseModel ):
167
- model : str | None = model_field
168
- input : str | list [str ] = Field (description = "The input to embed." )
169
- user : str | None = Field (default = None )
167
+ model : Optional [ str ] = model_field
168
+ input : Union [ str , List [str ] ] = Field (description = "The input to embed." )
169
+ user : Optional [ str ] = Field (default = None )
170
170
171
171
model_config = {
172
172
"json_schema_extra" : {
173
173
"examples" : [
174
174
{
175
175
"input" : "The food was delicious and the waiter..." ,
176
- },
177
- ],
178
- },
176
+ }
177
+ ]
178
+ }
179
179
}
180
180
181
181
182
182
class ChatCompletionRequestMessage (BaseModel ):
183
183
role : Literal ["system" , "user" , "assistant" , "function" ] = Field (
184
- default = "user" , description = "The role of the message." ,
184
+ default = "user" , description = "The role of the message."
185
185
)
186
- content : str | None = Field (
187
- default = "" , description = "The content of the message." ,
186
+ content : Optional [ str ] = Field (
187
+ default = "" , description = "The content of the message."
188
188
)
189
189
190
190
191
191
class CreateChatCompletionRequest (BaseModel ):
192
- messages : list [llama_cpp .ChatCompletionRequestMessage ] = Field (
193
- default = [], description = "A list of messages to generate completions for." ,
192
+ messages : List [llama_cpp .ChatCompletionRequestMessage ] = Field (
193
+ default = [], description = "A list of messages to generate completions for."
194
194
)
195
- functions : list [ llama_cpp .ChatCompletionFunction ] | None = Field (
195
+ functions : Optional [ List [ llama_cpp .ChatCompletionFunction ]] = Field (
196
196
default = None ,
197
197
description = "A list of functions to apply to the generated completions." ,
198
198
)
199
- function_call : llama_cpp .ChatCompletionRequestFunctionCall | None = Field (
199
+ function_call : Optional [ llama_cpp .ChatCompletionRequestFunctionCall ] = Field (
200
200
default = None ,
201
201
description = "A function to apply to the generated completions." ,
202
202
)
203
- tools : list [ llama_cpp .ChatCompletionTool ] | None = Field (
203
+ tools : Optional [ List [ llama_cpp .ChatCompletionTool ]] = Field (
204
204
default = None ,
205
205
description = "A list of tools to apply to the generated completions." ,
206
206
)
207
- tool_choice : llama_cpp .ChatCompletionToolChoiceOption | None = Field (
207
+ tool_choice : Optional [ llama_cpp .ChatCompletionToolChoiceOption ] = Field (
208
208
default = None ,
209
209
description = "A tool to apply to the generated completions." ,
210
210
) # TODO: verify
211
- max_tokens : int | None = Field (
211
+ max_tokens : Optional [ int ] = Field (
212
212
default = None ,
213
213
description = "The maximum number of tokens to generate. Defaults to inf" ,
214
214
)
215
215
min_tokens : int = min_tokens_field
216
- logprobs : bool | None = Field (
216
+ logprobs : Optional [ bool ] = Field (
217
217
default = False ,
218
218
description = "Whether to output the logprobs or not. Default is True" ,
219
219
)
220
- top_logprobs : int | None = Field (
220
+ top_logprobs : Optional [ int ] = Field (
221
221
default = None ,
222
222
ge = 0 ,
223
223
description = "The number of logprobs to generate. If None, no logprobs are generated. logprobs need to set to True." ,
224
224
)
225
225
temperature : float = temperature_field
226
226
top_p : float = top_p_field
227
227
min_p : float = min_p_field
228
- stop : str | list [str ] | None = stop_field
228
+ stop : Optional [ Union [ str , List [str ]]] = stop_field
229
229
stream : bool = stream_field
230
- presence_penalty : float | None = presence_penalty_field
231
- frequency_penalty : float | None = frequency_penalty_field
232
- logit_bias : dict [ str , float ] | None = Field (None )
233
- seed : int | None = Field (None )
234
- response_format : llama_cpp .ChatCompletionRequestResponseFormat | None = Field (
230
+ presence_penalty : Optional [ float ] = presence_penalty_field
231
+ frequency_penalty : Optional [ float ] = frequency_penalty_field
232
+ logit_bias : Optional [ Dict [ str , float ]] = Field (None )
233
+ seed : Optional [ int ] = Field (None )
234
+ response_format : Optional [ llama_cpp .ChatCompletionRequestResponseFormat ] = Field (
235
235
default = None ,
236
236
)
237
237
238
238
# ignored or currently unsupported
239
- model : str | None = model_field
240
- n : int | None = 1
241
- user : str | None = Field (None )
239
+ model : Optional [ str ] = model_field
240
+ n : Optional [ int ] = 1
241
+ user : Optional [ str ] = Field (None )
242
242
243
243
# llama.cpp specific parameters
244
244
top_k : int = top_k_field
245
245
repeat_penalty : float = repeat_penalty_field
246
- logit_bias_type : Literal ["input_ids" , "tokens" ] | None = Field (None )
246
+ logit_bias_type : Optional [ Literal ["input_ids" , "tokens" ]] = Field (None )
247
247
mirostat_mode : int = mirostat_mode_field
248
248
mirostat_tau : float = mirostat_tau_field
249
249
mirostat_eta : float = mirostat_eta_field
250
- grammar : str | None = None
250
+ grammar : Optional [ str ] = None
251
251
252
252
model_config = {
253
253
"json_schema_extra" : {
254
254
"examples" : [
255
255
{
256
256
"messages" : [
257
257
ChatCompletionRequestMessage (
258
- role = "system" , content = "You are a helpful assistant." ,
258
+ role = "system" , content = "You are a helpful assistant."
259
259
).model_dump (),
260
260
ChatCompletionRequestMessage (
261
- role = "user" , content = "What is the capital of France?" ,
261
+ role = "user" , content = "What is the capital of France?"
262
262
).model_dump (),
263
- ],
264
- },
265
- ],
266
- },
263
+ ]
264
+ }
265
+ ]
266
+ }
267
267
}
268
268
269
269
270
270
class ModelData (TypedDict ):
271
271
id : str
272
272
object : Literal ["model" ]
273
273
owned_by : str
274
- permissions : list [str ]
274
+ permissions : List [str ]
275
275
276
276
277
277
class ModelList (TypedDict ):
278
278
object : Literal ["list" ]
279
- data : list [ModelData ]
279
+ data : List [ModelData ]
280
280
281
281
282
282
class TokenizeInputRequest (BaseModel ):
283
- model : str | None = model_field
283
+ model : Optional [ str ] = model_field
284
284
input : str = Field (description = "The input to tokenize." )
285
285
286
286
model_config = {
287
- "json_schema_extra" : {"examples" : [{"input" : "How many tokens in this query?" }]},
287
+ "json_schema_extra" : {"examples" : [{"input" : "How many tokens in this query?" }]}
288
288
}
289
289
290
290
291
291
class TokenizeInputResponse (BaseModel ):
292
- tokens : list [int ] = Field (description = "A list of tokens." )
292
+ tokens : List [int ] = Field (description = "A list of tokens." )
293
293
294
294
model_config = {"json_schema_extra" : {"example" : {"tokens" : [123 , 321 , 222 ]}}}
295
295
@@ -301,8 +301,8 @@ class TokenizeInputCountResponse(BaseModel):
301
301
302
302
303
303
class DetokenizeInputRequest (BaseModel ):
304
- model : str | None = model_field
305
- tokens : list [int ] = Field (description = "A list of toekns to detokenize." )
304
+ model : Optional [ str ] = model_field
305
+ tokens : List [int ] = Field (description = "A list of toekns to detokenize." )
306
306
307
307
model_config = {"json_schema_extra" : {"example" : [{"tokens" : [123 , 321 , 222 ]}]}}
308
308
@@ -311,5 +311,5 @@ class DetokenizeInputResponse(BaseModel):
311
311
text : str = Field (description = "The detokenized text." )
312
312
313
313
model_config = {
314
- "json_schema_extra" : {"example" : {"text" : "How many tokens in this query?" }},
314
+ "json_schema_extra" : {"example" : {"text" : "How many tokens in this query?" }}
315
315
}
0 commit comments