Skip to content

Commit 4494458

Browse files
authored
Update settings.py
1 parent 5af3b53 commit 4494458

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

llama_cpp/server/settings.py

+21-21
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import multiprocessing
44
from typing import Dict, List, Literal, Optional, Union, cast
5-
from typing_extensions import Self
65

76
from pydantic import Field, model_validator
87
from pydantic_settings import BaseSettings
8+
from typing_extensions import Self
99

1010
import llama_cpp
1111

@@ -19,7 +19,7 @@ class ModelSettings(BaseSettings):
1919
model: str = Field(
2020
description="The path to the model to use for generating completions.",
2121
)
22-
model_alias: str | None = Field(
22+
model_alias: Optional[str] = Field(
2323
default=None,
2424
description="The alias of the model to use for generating completions.",
2525
)
@@ -38,7 +38,7 @@ class ModelSettings(BaseSettings):
3838
ge=0,
3939
description="Main GPU to use.",
4040
)
41-
tensor_split: list[float] | None = Field(
41+
tensor_split: Optional[List[float]] = Field(
4242
default=None,
4343
description="Split layers across multiple GPUs in proportion.",
4444
)
@@ -53,11 +53,11 @@ class ModelSettings(BaseSettings):
5353
default=llama_cpp.llama_supports_mlock(),
5454
description="Use mlock.",
5555
)
56-
kv_overrides: list[str] | None = Field(
56+
kv_overrides: Optional[List[str]] = Field(
5757
default=None,
5858
description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.",
5959
)
60-
rpc_servers: str | None = Field(
60+
rpc_servers: Optional[str] = Field(
6161
default=None,
6262
description="comma seperated list of rpc servers for offloading",
6363
)
@@ -109,25 +109,25 @@ class ModelSettings(BaseSettings):
109109
description="Last n tokens to keep for repeat penalty calculation.",
110110
)
111111
# LoRA Params
112-
lora_base: str | None = Field(
112+
lora_base: Optional[str] = Field(
113113
default=None,
114114
description="Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.",
115115
)
116-
lora_path: str | None = Field(
116+
lora_path: Optional[str] = Field(
117117
default=None,
118118
description="Path to a LoRA file to apply to the model.",
119119
)
120120
# Backend Params
121-
numa: bool | int = Field(
121+
numa: Union[bool, int] = Field(
122122
default=False,
123123
description="Enable NUMA support.",
124124
)
125125
# Chat Format Params
126-
chat_format: str | None = Field(
126+
chat_format: Optional[str] = Field(
127127
default=None,
128128
description="Chat format to use.",
129129
)
130-
clip_model_path: str | None = Field(
130+
clip_model_path: Optional[str] = Field(
131131
default=None,
132132
description="Path to a CLIP model to use for multi-modal chat completion.",
133133
)
@@ -145,21 +145,21 @@ class ModelSettings(BaseSettings):
145145
description="The size of the cache in bytes. Only used if cache is True.",
146146
)
147147
# Tokenizer Options
148-
hf_tokenizer_config_path: str | None = Field(
148+
hf_tokenizer_config_path: Optional[str] = Field(
149149
default=None,
150150
description="The path to a HuggingFace tokenizer_config.json file.",
151151
)
152-
hf_pretrained_model_name_or_path: str | None = Field(
152+
hf_pretrained_model_name_or_path: Optional[str] = Field(
153153
default=None,
154154
description="The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().",
155155
)
156156
# Loading from HuggingFace Model Hub
157-
hf_model_repo_id: str | None = Field(
157+
hf_model_repo_id: Optional[str] = Field(
158158
default=None,
159159
description="The model repo id to use for the HuggingFace tokenizer model.",
160160
)
161161
# Speculative Decoding
162-
draft_model: str | None = Field(
162+
draft_model: Optional[str] = Field(
163163
default=None,
164164
description="Method to use for speculative decoding. One of (prompt-lookup-decoding).",
165165
)
@@ -168,11 +168,11 @@ class ModelSettings(BaseSettings):
168168
description="Number of tokens to predict using the draft model.",
169169
)
170170
# KV Cache Quantization
171-
type_k: int | None = Field(
171+
type_k: Optional[int] = Field(
172172
default=None,
173173
description="Type of the key cache quantization.",
174174
)
175-
type_v: int | None = Field(
175+
type_v: Optional[int] = Field(
176176
default=None,
177177
description="Type of the value cache quantization.",
178178
)
@@ -187,7 +187,7 @@ class ModelSettings(BaseSettings):
187187
def set_dynamic_defaults(self) -> Self:
188188
# If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count()
189189
cpu_count = multiprocessing.cpu_count()
190-
values = cast(dict[str, int], self)
190+
values = cast(Dict[str, int], self)
191191
if values.get("n_threads", 0) == -1:
192192
values["n_threads"] = cpu_count
193193
if values.get("n_threads_batch", 0) == -1:
@@ -201,14 +201,14 @@ class ServerSettings(BaseSettings):
201201
# Uvicorn Settings
202202
host: str = Field(default="localhost", description="Listen address")
203203
port: int = Field(default=8000, description="Listen port")
204-
ssl_keyfile: str | None = Field(
204+
ssl_keyfile: Optional[str] = Field(
205205
default=None, description="SSL key file for HTTPS",
206206
)
207-
ssl_certfile: str | None = Field(
207+
ssl_certfile: Optional[str] = Field(
208208
default=None, description="SSL certificate file for HTTPS",
209209
)
210210
# FastAPI Settings
211-
api_key: str | None = Field(
211+
api_key: Optional[str] = Field(
212212
default=None,
213213
description="API key for authentication. If set all requests need to be authenticated.",
214214
)
@@ -233,4 +233,4 @@ class Settings(ServerSettings, ModelSettings):
233233
class ConfigFileSettings(ServerSettings):
234234
"""Configuration file format settings."""
235235

236-
models: list[ModelSettings] = Field(default=[], description="Model configs")
236+
models: List[ModelSettings] = Field(default=[], description="Model configs")

0 commit comments

Comments
 (0)