Skip to content

Commit 73979bb

Browse files
committed
feat: wire MTP draft-mtp speculative decoding + restore mtp.test.js.
Signed-off-by: Marcus Edel <marcus.edel@collabora.com>
1 parent 50b4fd6 commit 73979bb

7 files changed

Lines changed: 458 additions & 24 deletions

File tree

packages/llm-llamacpp/addon/src/model-interface/LlamaModel.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <common/chat.h>
2020
#include <common/common.h>
2121
#include <common/log.h>
22+
#include <common/speculative.h>
2223
#include <inference-addon-cpp/Errors.hpp>
2324
#include <llama.h>
2425
#ifdef __APPLE__
@@ -1008,6 +1009,8 @@ LlamaModel::singleRuntimeStatsLocked() const {
10081009
{"contextSlides",
10091010
static_cast<int64_t>(state_->llmContext_->getNSlides())},
10101011
{"avgConcurrentSeq", 1.0},
1012+
{"draftAccepted", state_->llmContext_->getDraftAccepted()},
1013+
{"draftTotal", state_->llmContext_->getDraftTotal()},
10111014
{"backendDevice", runtimeBackendDevice_}};
10121015
}
10131016

@@ -1308,15 +1311,11 @@ void LlamaModel::commonParamsParse(
13081311

13091312
for (const std::string& key : {"spec-type", "spec_type"}) {
13101313
if (auto iter = configFilemap.find(key); iter != configFilemap.end()) {
1311-
const auto requested = split(iter->second, ',');
1312-
if (std::find(requested.begin(), requested.end(), "draft-mtp") !=
1313-
requested.end()) {
1314-
throw qvac_errors::StatusError(
1315-
ADDON_ID,
1316-
qvac_errors::general_error::toString(
1317-
qvac_errors::general_error::InvalidArgument),
1318-
"spec-type=draft-mtp is not supported");
1319-
}
1314+
auto types =
1315+
common_speculative_types_from_names(split(iter->second, ','));
1316+
params.speculative.types.insert(
1317+
params.speculative.types.end(), types.begin(), types.end());
1318+
configFilemap.erase(iter);
13201319
}
13211320
}
13221321

packages/llm-llamacpp/addon/src/model-interface/LlmContext.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,14 @@ class LlmContext { // NOLINT(cppcoreguidelines-special-member-functions)
281281
*/
282282
virtual void resetNSlides() = 0;
283283

284+
/**
285+
* Speculative-decoding counters for the most recent generation. Default 0
286+
* for contexts without speculative decoding; the single-prompt MTP path
287+
* overrides these to surface draft acceptance via RuntimeStats.
288+
*/
289+
[[nodiscard]] virtual int64_t getDraftAccepted() const { return 0; }
290+
[[nodiscard]] virtual int64_t getDraftTotal() const { return 0; }
291+
284292
/**
285293
* The load media method. It loads the media from memory buffer.
286294
* Default implementation does nothing (for text-only contexts).

0 commit comments

Comments
 (0)