Skip to content

Commit 5af3b53

Browse files
authored
Update model.py
1 parent e133736 commit 5af3b53

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

llama_cpp/server/model.py

+18-18
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
class LlamaProxy:
12-
def __init__(self, models: list[ModelSettings]) -> None:
12+
def __init__(self, models: List[ModelSettings]) -> None:
1313
assert len(models) > 0, "No models provided!"
1414

1515
self._model_settings_dict: dict[str, ModelSettings] = {}
@@ -18,19 +18,19 @@ def __init__(self, models: list[ModelSettings]) -> None:
1818
model.model_alias = model.model
1919
self._model_settings_dict[model.model_alias] = model
2020

21-
self._current_model: llama_cpp.Llama | None = None
22-
self._current_model_alias: str | None = None
21+
self._current_model: Optional[llama_cpp.Llama] = None
22+
self._current_model_alias: Optional[str] = None
2323

2424
self._default_model_settings: ModelSettings = models[0]
2525
self._default_model_alias: str = self._default_model_settings.model_alias # type: ignore
2626

2727
# Load default model
2828
self._current_model = self.load_llama_from_model_settings(
29-
self._default_model_settings,
29+
self._default_model_settings
3030
)
3131
self._current_model_alias = self._default_model_alias
3232

33-
def __call__(self, model: str | None = None) -> llama_cpp.Llama:
33+
def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama:
3434
if model is None:
3535
model = self._default_model_alias
3636

@@ -53,7 +53,7 @@ def __call__(self, model: str | None = None) -> llama_cpp.Llama:
5353
def __getitem__(self, model: str):
5454
return self._model_settings_dict[model].model_dump()
5555

56-
def __setitem__(self, model: str, settings: ModelSettings | str | bytes):
56+
def __setitem__(self, model: str, settings: Union[ModelSettings, str, bytes]):
5757
if isinstance(settings, (bytes, str)):
5858
settings = ModelSettings.model_validate_json(settings)
5959
self._model_settings_dict[model] = settings
@@ -82,7 +82,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
8282
)
8383
else:
8484
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(
85-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
85+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
8686
)
8787
elif settings.chat_format == "obsidian":
8888
assert settings.clip_model_path is not None, "clip model not found"
@@ -96,7 +96,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
9696
)
9797
else:
9898
chat_handler = llama_cpp.llama_chat_format.ObsidianChatHandler(
99-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
99+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
100100
)
101101
elif settings.chat_format == "llava-1-6":
102102
assert settings.clip_model_path is not None, "clip model not found"
@@ -110,7 +110,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
110110
)
111111
else:
112112
chat_handler = llama_cpp.llama_chat_format.Llava16ChatHandler(
113-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
113+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
114114
)
115115
elif settings.chat_format == "moondream":
116116
assert settings.clip_model_path is not None, "clip model not found"
@@ -124,7 +124,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
124124
)
125125
else:
126126
chat_handler = llama_cpp.llama_chat_format.MoondreamChatHandler(
127-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
127+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
128128
)
129129
elif settings.chat_format == "nanollava":
130130
assert settings.clip_model_path is not None, "clip model not found"
@@ -138,7 +138,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
138138
)
139139
else:
140140
chat_handler = llama_cpp.llama_chat_format.NanoLlavaChatHandler(
141-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
141+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
142142
)
143143
elif settings.chat_format == "llama-3-vision-alpha":
144144
assert settings.clip_model_path is not None, "clip model not found"
@@ -152,38 +152,38 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
152152
)
153153
else:
154154
chat_handler = llama_cpp.llama_chat_format.Llama3VisionAlpha(
155-
clip_model_path=settings.clip_model_path, verbose=settings.verbose,
155+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
156156
)
157157
elif settings.chat_format == "hf-autotokenizer":
158158
assert (
159159
settings.hf_pretrained_model_name_or_path is not None
160160
), "hf_pretrained_model_name_or_path must be set for hf-autotokenizer"
161161
chat_handler = (
162162
llama_cpp.llama_chat_format.hf_autotokenizer_to_chat_completion_handler(
163-
settings.hf_pretrained_model_name_or_path,
163+
settings.hf_pretrained_model_name_or_path
164164
)
165165
)
166166
elif settings.chat_format == "hf-tokenizer-config":
167167
assert (
168168
settings.hf_tokenizer_config_path is not None
169169
), "hf_tokenizer_config_path must be set for hf-tokenizer-config"
170170
chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler(
171-
json.load(open(settings.hf_tokenizer_config_path)),
171+
json.load(open(settings.hf_tokenizer_config_path))
172172
)
173173

174-
tokenizer: llama_cpp.BaseLlamaTokenizer | None = None
174+
tokenizer: Optional[llama_cpp.BaseLlamaTokenizer] = None
175175
if settings.hf_pretrained_model_name_or_path is not None:
176176
tokenizer = llama_tokenizer.LlamaHFTokenizer.from_pretrained(
177-
settings.hf_pretrained_model_name_or_path,
177+
settings.hf_pretrained_model_name_or_path
178178
)
179179

180180
draft_model = None
181181
if settings.draft_model is not None:
182182
draft_model = llama_speculative.LlamaPromptLookupDecoding(
183-
num_pred_tokens=settings.draft_model_num_pred_tokens,
183+
num_pred_tokens=settings.draft_model_num_pred_tokens
184184
)
185185

186-
kv_overrides: dict[str, bool | int | float | str] | None = None
186+
kv_overrides: Optional[Dict[str, Union[bool, int, float, str]]] = None
187187
if settings.kv_overrides is not None:
188188
assert isinstance(settings.kv_overrides, list)
189189
kv_overrides = {}

0 commit comments

Comments
 (0)