Skip to content

Commit 57e70bb

Browse files
committed
feat: Update llama.cpp
1 parent 01c7607 commit 57e70bb

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

Diff for: llama_cpp/llama_cpp.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@
220220
# LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
221221
# LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
222222
# LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
223+
# LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
223224
# };
224225
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
225226
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -247,6 +248,7 @@
247248
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23
248249
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24
249250
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25
251+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26
250252

251253

252254
# // note: these values should be synchronized with ggml_rope
@@ -404,12 +406,14 @@
404406
# LLAMA_POOLING_TYPE_MEAN = 1,
405407
# LLAMA_POOLING_TYPE_CLS = 2,
406408
# LLAMA_POOLING_TYPE_LAST = 3,
409+
# LLAMA_POOLING_TYPE_RANK = 4, // used by reranking models to attach the classification head to the graph
407410
# };
408411
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
409412
LLAMA_POOLING_TYPE_NONE = 0
410413
LLAMA_POOLING_TYPE_MEAN = 1
411414
LLAMA_POOLING_TYPE_CLS = 2
412415
LLAMA_POOLING_TYPE_LAST = 3
416+
LLAMA_POOLING_TYPE_RANK = 4
413417

414418
# enum llama_attention_type {
415419
# LLAMA_ATTENTION_TYPE_UNSPECIFIED = -1,
@@ -420,10 +424,11 @@
420424
LLAMA_ATTENTION_TYPE_CAUSAL = 0
421425
LLAMA_ATTENTION_TYPE_NON_CAUSAL = 1
422426

427+
423428
# enum llama_split_mode {
424-
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
425-
# LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
426-
# LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
429+
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
430+
# LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
431+
# LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
427432
# };
428433
LLAMA_SPLIT_MODE_NONE = 0
429434
LLAMA_SPLIT_MODE_LAYER = 1
@@ -2520,7 +2525,8 @@ def llama_get_embeddings_ith(
25202525

25212526
# // Get the embeddings for a sequence id
25222527
# // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
2523-
# // shape: [n_embd] (1-dimensional)
2528+
# // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
2529+
# // otherwise: float[n_embd] (1-dimensional)
25242530
# LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
25252531
@ctypes_function(
25262532
"llama_get_embeddings_seq",
@@ -2672,6 +2678,8 @@ def llama_token_eot(model: llama_model_p, /) -> int:
26722678
# //
26732679
# // Tokenization
26742680
# //
2681+
# // The API is thread-safe.
2682+
# //
26752683

26762684

26772685
# /// @details Convert the provided text into tokens.

Diff for: vendor/llama.cpp

0 commit comments

Comments
 (0)