File tree 2 files changed +3
-3
lines changed
2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -944,7 +944,7 @@ class llama_context_params(ctypes.Structure):
944
944
# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
945
945
# enum llama_ftype ftype; // quantize to this llama_ftype
946
946
# enum ggml_type output_tensor_type; // output tensor type
947
- # enum ggml_type token_embedding_type; // itoken embeddings tensor type
947
+ # enum ggml_type token_embedding_type; // token embeddings tensor type
948
948
# bool allow_requantize; // allow quantizing non-f32/f16 tensors
949
949
# bool quantize_output_tensor; // quantize output.weight
950
950
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
@@ -960,7 +960,7 @@ class llama_model_quantize_params(ctypes.Structure):
960
960
nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
961
961
ftype (int): quantize to this llama_ftype
962
962
output_tensor_type (int): output tensor type
963
- token_embedding_type (int): itoken embeddings tensor type
963
+ token_embedding_type (int): token embeddings tensor type
964
964
allow_requantize (bool): allow quantizing non-f32/f16 tensors
965
965
quantize_output_tensor (bool): quantize output.weight
966
966
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
You can’t perform that action at this time.
0 commit comments