9
9
10
10
11
11
class LlamaProxy :
12
- def __init__ (self , models : list [ModelSettings ]) -> None :
12
+ def __init__ (self , models : List [ModelSettings ]) -> None :
13
13
assert len (models ) > 0 , "No models provided!"
14
14
15
15
self ._model_settings_dict : dict [str , ModelSettings ] = {}
@@ -18,19 +18,19 @@ def __init__(self, models: list[ModelSettings]) -> None:
18
18
model .model_alias = model .model
19
19
self ._model_settings_dict [model .model_alias ] = model
20
20
21
- self ._current_model : llama_cpp .Llama | None = None
22
- self ._current_model_alias : str | None = None
21
+ self ._current_model : Optional [ llama_cpp .Llama ] = None
22
+ self ._current_model_alias : Optional [ str ] = None
23
23
24
24
self ._default_model_settings : ModelSettings = models [0 ]
25
25
self ._default_model_alias : str = self ._default_model_settings .model_alias # type: ignore
26
26
27
27
# Load default model
28
28
self ._current_model = self .load_llama_from_model_settings (
29
- self ._default_model_settings ,
29
+ self ._default_model_settings
30
30
)
31
31
self ._current_model_alias = self ._default_model_alias
32
32
33
- def __call__ (self , model : str | None = None ) -> llama_cpp .Llama :
33
+ def __call__ (self , model : Optional [ str ] = None ) -> llama_cpp .Llama :
34
34
if model is None :
35
35
model = self ._default_model_alias
36
36
@@ -53,7 +53,7 @@ def __call__(self, model: str | None = None) -> llama_cpp.Llama:
53
53
def __getitem__ (self , model : str ):
54
54
return self ._model_settings_dict [model ].model_dump ()
55
55
56
- def __setitem__ (self , model : str , settings : ModelSettings | str | bytes ):
56
+ def __setitem__ (self , model : str , settings : Union [ ModelSettings , str , bytes ] ):
57
57
if isinstance (settings , (bytes , str )):
58
58
settings = ModelSettings .model_validate_json (settings )
59
59
self ._model_settings_dict [model ] = settings
@@ -82,7 +82,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
82
82
)
83
83
else :
84
84
chat_handler = llama_cpp .llama_chat_format .Llava15ChatHandler (
85
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
85
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
86
86
)
87
87
elif settings .chat_format == "obsidian" :
88
88
assert settings .clip_model_path is not None , "clip model not found"
@@ -96,7 +96,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
96
96
)
97
97
else :
98
98
chat_handler = llama_cpp .llama_chat_format .ObsidianChatHandler (
99
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
99
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
100
100
)
101
101
elif settings .chat_format == "llava-1-6" :
102
102
assert settings .clip_model_path is not None , "clip model not found"
@@ -110,7 +110,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
110
110
)
111
111
else :
112
112
chat_handler = llama_cpp .llama_chat_format .Llava16ChatHandler (
113
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
113
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
114
114
)
115
115
elif settings .chat_format == "moondream" :
116
116
assert settings .clip_model_path is not None , "clip model not found"
@@ -124,7 +124,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
124
124
)
125
125
else :
126
126
chat_handler = llama_cpp .llama_chat_format .MoondreamChatHandler (
127
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
127
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
128
128
)
129
129
elif settings .chat_format == "nanollava" :
130
130
assert settings .clip_model_path is not None , "clip model not found"
@@ -138,7 +138,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
138
138
)
139
139
else :
140
140
chat_handler = llama_cpp .llama_chat_format .NanoLlavaChatHandler (
141
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
141
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
142
142
)
143
143
elif settings .chat_format == "llama-3-vision-alpha" :
144
144
assert settings .clip_model_path is not None , "clip model not found"
@@ -152,38 +152,38 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
152
152
)
153
153
else :
154
154
chat_handler = llama_cpp .llama_chat_format .Llama3VisionAlpha (
155
- clip_model_path = settings .clip_model_path , verbose = settings .verbose ,
155
+ clip_model_path = settings .clip_model_path , verbose = settings .verbose
156
156
)
157
157
elif settings .chat_format == "hf-autotokenizer" :
158
158
assert (
159
159
settings .hf_pretrained_model_name_or_path is not None
160
160
), "hf_pretrained_model_name_or_path must be set for hf-autotokenizer"
161
161
chat_handler = (
162
162
llama_cpp .llama_chat_format .hf_autotokenizer_to_chat_completion_handler (
163
- settings .hf_pretrained_model_name_or_path ,
163
+ settings .hf_pretrained_model_name_or_path
164
164
)
165
165
)
166
166
elif settings .chat_format == "hf-tokenizer-config" :
167
167
assert (
168
168
settings .hf_tokenizer_config_path is not None
169
169
), "hf_tokenizer_config_path must be set for hf-tokenizer-config"
170
170
chat_handler = llama_cpp .llama_chat_format .hf_tokenizer_config_to_chat_completion_handler (
171
- json .load (open (settings .hf_tokenizer_config_path )),
171
+ json .load (open (settings .hf_tokenizer_config_path ))
172
172
)
173
173
174
- tokenizer : llama_cpp .BaseLlamaTokenizer | None = None
174
+ tokenizer : Optional [ llama_cpp .BaseLlamaTokenizer ] = None
175
175
if settings .hf_pretrained_model_name_or_path is not None :
176
176
tokenizer = llama_tokenizer .LlamaHFTokenizer .from_pretrained (
177
- settings .hf_pretrained_model_name_or_path ,
177
+ settings .hf_pretrained_model_name_or_path
178
178
)
179
179
180
180
draft_model = None
181
181
if settings .draft_model is not None :
182
182
draft_model = llama_speculative .LlamaPromptLookupDecoding (
183
- num_pred_tokens = settings .draft_model_num_pred_tokens ,
183
+ num_pred_tokens = settings .draft_model_num_pred_tokens
184
184
)
185
185
186
- kv_overrides : dict [ str , bool | int | float | str ] | None = None
186
+ kv_overrides : Optional [ Dict [ str , Union [ bool , int , float , str ]]] = None
187
187
if settings .kv_overrides is not None :
188
188
assert isinstance (settings .kv_overrides , list )
189
189
kv_overrides = {}
0 commit comments