Skip to content

Commit 18f58fe

Browse files
committed
feat: Update llama.cpp
1 parent dff186c commit 18f58fe

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

Diff for: llama_cpp/llama_cpp.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,7 @@ class llama_context_params(ctypes.Structure):
944944
# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
945945
# enum llama_ftype ftype; // quantize to this llama_ftype
946946
# enum ggml_type output_tensor_type; // output tensor type
947-
# enum ggml_type token_embedding_type; // itoken embeddings tensor type
947+
# enum ggml_type token_embedding_type; // token embeddings tensor type
948948
# bool allow_requantize; // allow quantizing non-f32/f16 tensors
949949
# bool quantize_output_tensor; // quantize output.weight
950950
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
@@ -960,7 +960,7 @@ class llama_model_quantize_params(ctypes.Structure):
960960
nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
961961
ftype (int): quantize to this llama_ftype
962962
output_tensor_type (int): output tensor type
963-
token_embedding_type (int): itoken embeddings tensor type
963+
token_embedding_type (int): token embeddings tensor type
964964
allow_requantize (bool): allow quantizing non-f32/f16 tensors
965965
quantize_output_tensor (bool): quantize output.weight
966966
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored

Diff for: vendor/llama.cpp

0 commit comments

Comments
 (0)