Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/cpp/resources/backend_versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
"rocm-nightly": "b1274",
"cuda": "b9436",
"metal": "b9253",
"cpu": "b9253"
"cpu": "b9253",
"openvino": "b9488"
},
"openvino": {
"runtime_version": "2026.0"
Comment on lines +12 to +13
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does the runtime need to get installed somehow?

},
"whispercpp": {
"cpu": "v1.8.4",
Expand Down
4 changes: 3 additions & 1 deletion src/cpp/resources/defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
"rocm_args": "",
"vulkan_args": "",
"cpu_args": "",
"openvino_args": "",
"prefer_system": true,
"rocm_bin": "builtin",
"vulkan_bin": "builtin",
"cuda_bin": "builtin",
"cpu_bin": "builtin"
"cpu_bin": "builtin",
"openvino_bin": "builtin"
},
"whispercpp": {
"backend": "auto",
Expand Down
32 changes: 26 additions & 6 deletions src/cpp/server/backends/llamacpp_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ static bool is_llamacpp_cuda_backend(const std::string& backend) {
return backend == "cuda";
}

static bool is_llamacpp_openvino_backend(const std::string& backend) {
return backend == "openvino";
}

static std::string trim_version_prefix(const std::string& version) {
if (!version.empty() && version[0] == 'v') {
return version.substr(1);
Expand All @@ -143,6 +147,15 @@ static std::string get_therock_version() {
return trim_to_major_minor(config["therock"]["version"].get<std::string>());
}

static std::string get_openvino_runtime_version() {
auto config = JsonUtils::load_from_file(utils::get_resource_path("resources/backend_versions.json"));
if (!config.contains("openvino") || !config["openvino"].is_object() ||
!config["openvino"].contains("runtime_version") || !config["openvino"]["runtime_version"].is_string()) {
throw std::runtime_error("backend_versions.json is missing 'openvino.runtime_version'");
}
return config["openvino"]["runtime_version"].get<std::string>();
}

InstallParams LlamaCppServer::get_install_params(const std::string& backend, const std::string& version) {
InstallParams params;

Expand Down Expand Up @@ -211,6 +224,14 @@ InstallParams LlamaCppServer::get_install_params(const std::string& backend, con
params.filename = "llama-" + version + "-bin-macos-arm64.tar.gz";
#else
throw std::runtime_error("Metal llamacpp only supported on macOS");
#endif
} else if (resolved_backend == "openvino") {
params.repo = "lemonade-sdk/llama.cpp";
#ifdef __linux__
std::string openvino_ver = get_openvino_runtime_version();
params.filename = "llama-" + version + "-bin-ubuntu-openvino-" + openvino_ver + "-x64.tar.gz";
#else
throw std::runtime_error("OpenVINO llamacpp is currently supported on Linux only");
#endif
} else if (resolved_backend == "cpu") {
params.repo = "ggml-org/llama.cpp";
Expand Down Expand Up @@ -342,9 +363,9 @@ void LlamaCppServer::load(const std::string& model_name,
}
push_reserved(reserved_flags, "--mmproj", std::vector<std::string>{"-mm", "-mmu", "--mmproj-url", "--no-mmproj", "--mmproj-auto", "--no-mmproj-auto", "--mmproj-offload", "--no-mmproj-offload"});

// Enable context shift for vulkan/rocm/cuda (not supported on Metal)
// Enable context shift for vulkan/rocm/cuda/openvino (not supported on Metal)
if (llamacpp_backend == "vulkan" || is_llamacpp_rocm_backend(llamacpp_backend) ||
is_llamacpp_cuda_backend(llamacpp_backend)) {
is_llamacpp_cuda_backend(llamacpp_backend) || is_llamacpp_openvino_backend(llamacpp_backend)) {
push_overridable_arg(args, llamacpp_args, "--context-shift");
push_overridable_arg(args, llamacpp_args, "--keep", "16");
} else {
Expand Down Expand Up @@ -431,10 +452,9 @@ void LlamaCppServer::load(const std::string& model_name,

env_vars.push_back({"LD_LIBRARY_PATH", lib_path});
LOG(DEBUG, "LlamaCpp") << "Setting LD_LIBRARY_PATH=" << lib_path << std::endl;
} else if (is_llamacpp_cuda_backend(llamacpp_backend)) {
// The llama.cpp-builds Linux tarballs ship the bundled CUDA runtime
// (libcudart.so, libcublas.so, etc.) alongside llama-server, so add the
// executable's directory to LD_LIBRARY_PATH like we do for ROCm.
} else if (is_llamacpp_cuda_backend(llamacpp_backend) || is_llamacpp_openvino_backend(llamacpp_backend)) {
// CUDA and OpenVINO tarballs both bundle their runtime libraries (.so files)
// alongside llama-server, so add the executable's directory to LD_LIBRARY_PATH.
fs::path exe_dir = fs::path(executable).parent_path();
std::string lib_path = exe_dir.string();

Expand Down
2 changes: 2 additions & 0 deletions src/cpp/server/config_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,14 @@ static const EnvMapping env_mappings[] = {
{"LEMONADE_LLAMACPP_ROCM_ARGS", "llamacpp", "rocm_args"},
{"LEMONADE_LLAMACPP_VULKAN_ARGS", "llamacpp", "vulkan_args"},
{"LEMONADE_LLAMACPP_CPU_ARGS", "llamacpp", "cpu_args"},
{"LEMONADE_LLAMACPP_OPENVINO_ARGS", "llamacpp", "openvino_args"},
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this migration code? We didn't have oepnvino support before so how can you migrate?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't migration-only mappings — migrate_from_env() is called on every fresh install to bootstrap config.json from env vars. All backends use the same mechanism (see LEMONADE_LLAMACPP_VULKAN_ARGS, LEMONADE_LLAMACPP_CUDA_BIN, etc.). Adding OpenVINO entries here means users who configure via env vars get them picked up on first run, consistent with the existing pattern.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could have sworn there was a discussion somewhere about axing them.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jfowers comments please

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see #2106. I'm getting rid of cruft, don't add more.

{"LEMONADE_LLAMACPP_DEVICE", "llamacpp", "device"},
{"LEMONADE_LLAMACPP_PREFER_SYSTEM", "llamacpp", "prefer_system"},
{"LEMONADE_LLAMACPP_ROCM_BIN", "llamacpp", "rocm_bin"},
{"LEMONADE_LLAMACPP_VULKAN_BIN", "llamacpp", "vulkan_bin"},
{"LEMONADE_LLAMACPP_CUDA_BIN", "llamacpp", "cuda_bin"},
{"LEMONADE_LLAMACPP_CPU_BIN", "llamacpp", "cpu_bin"},
{"LEMONADE_LLAMACPP_OPENVINO_BIN", "llamacpp", "openvino_bin"},
// whispercpp
{"LEMONADE_WHISPERCPP", "whispercpp", "backend"},
{"LEMONADE_WHISPERCPP_ARGS", "whispercpp", "args"},
Expand Down
3 changes: 3 additions & 0 deletions src/cpp/server/system_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,9 @@ static const std::vector<RecipeBackendDef> RECIPE_DEFS = {
{"cpu", {"x86_64"}},
{"amd_gpu", {}}, // all AMD GPU families
}},
{"llamacpp", "openvino", {"linux"}, {
{"cpu", {"x86_64"}},
Comment thread
superm1 marked this conversation as resolved.
}},
{"llamacpp", "rocm", {"windows", "linux"}, {
{"amd_gpu", {"gfx1150", "gfx1151", "gfx103X", "gfx110X", "gfx120X"}}, // STX iGPUs + RDNA2/3/4 dGPUs
}},
Expand Down
Loading