|
19 | 19 | #include <common/chat.h> |
20 | 20 | #include <common/common.h> |
21 | 21 | #include <common/log.h> |
| 22 | +#include <common/speculative.h> |
22 | 23 | #include <inference-addon-cpp/Errors.hpp> |
23 | 24 | #include <llama.h> |
24 | 25 | #ifdef __APPLE__ |
@@ -1008,6 +1009,8 @@ LlamaModel::singleRuntimeStatsLocked() const { |
1008 | 1009 | {"contextSlides", |
1009 | 1010 | static_cast<int64_t>(state_->llmContext_->getNSlides())}, |
1010 | 1011 | {"avgConcurrentSeq", 1.0}, |
| 1012 | + {"draftAccepted", state_->llmContext_->getDraftAccepted()}, |
| 1013 | + {"draftTotal", state_->llmContext_->getDraftTotal()}, |
1011 | 1014 | {"backendDevice", runtimeBackendDevice_}}; |
1012 | 1015 | } |
1013 | 1016 |
|
@@ -1308,15 +1311,11 @@ void LlamaModel::commonParamsParse( |
1308 | 1311 |
|
1309 | 1312 | for (const std::string& key : {"spec-type", "spec_type"}) { |
1310 | 1313 | if (auto iter = configFilemap.find(key); iter != configFilemap.end()) { |
1311 | | - const auto requested = split(iter->second, ','); |
1312 | | - if (std::find(requested.begin(), requested.end(), "draft-mtp") != |
1313 | | - requested.end()) { |
1314 | | - throw qvac_errors::StatusError( |
1315 | | - ADDON_ID, |
1316 | | - qvac_errors::general_error::toString( |
1317 | | - qvac_errors::general_error::InvalidArgument), |
1318 | | - "spec-type=draft-mtp is not supported"); |
1319 | | - } |
| 1314 | + auto types = |
| 1315 | + common_speculative_types_from_names(split(iter->second, ',')); |
| 1316 | + params.speculative.types.insert( |
| 1317 | + params.speculative.types.end(), types.begin(), types.end()); |
| 1318 | + configFilemap.erase(iter); |
1320 | 1319 | } |
1321 | 1320 | } |
1322 | 1321 |
|
|
0 commit comments