@@ -68,6 +68,8 @@ class EngineArgs:
6868 lengths_via_tokenize : bool = MANAGER .lengths_via_tokenize [0 ]
6969 embedding_dtype : EmbeddingDtype = EmbeddingDtype [MANAGER .embedding_dtype [0 ]]
7070 served_model_name : str = MANAGER .served_model_name [0 ]
71+ onnx_disable_optimize : bool = MANAGER .onnx_disable_optimize [0 ]
72+ onnx_do_not_prefer_quantized : bool = MANAGER .onnx_do_not_prefer_quantized [0 ]
7173
7274 _loading_strategy : Optional [LoadingStrategy ] = None
7375
@@ -160,8 +162,10 @@ def from_env(cls) -> list["EngineArgs"]:
160162 lengths_via_tokenize = lengths_via_tokenize ,
161163 embedding_dtype = embedding_dtype ,
162164 served_model_name = served_model_name ,
165+ onnx_disable_optimize = onnx_disable_optimize ,
166+ onnx_do_not_prefer_quantized = onnx_do_not_prefer_quantized
163167 )
164- for model_name_or_path , batch_size , revision , trust_remote_code , engine , model_warmup , device , compile , bettertransformer , dtype , pooling_method , lengths_via_tokenize , embedding_dtype , served_model_name in zip_longest (
168+ for model_name_or_path , batch_size , revision , trust_remote_code , engine , model_warmup , device , compile , bettertransformer , dtype , pooling_method , lengths_via_tokenize , embedding_dtype , served_model_name , onnx_disable_optimize , onnx_do_not_prefer_quantized in zip_longest (
165169 MANAGER .model_id ,
166170 MANAGER .batch_size ,
167171 MANAGER .revision ,
@@ -176,5 +180,7 @@ def from_env(cls) -> list["EngineArgs"]:
176180 MANAGER .lengths_via_tokenize ,
177181 MANAGER .embedding_dtype ,
178182 MANAGER .served_model_name ,
183+ MANAGER .onnx_disable_optimize ,
184+ MANAGER .onnx_do_not_prefer_quantized
179185 )
180186 ]
0 commit comments