Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1720,3 +1720,22 @@ if(EXISTS "${_GGUF_CAPS_TEST_SRC}")
include(CTest)
add_test(NAME GgufCapabilitiesTest COMMAND test_gguf_capabilities)
endif()

# Backend install atomicity (staging + verified atomic swap).
# Header-only unit under test, so no extra source files are required.
set(_INSTALL_ATOMICITY_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_install_atomicity.cpp"
)
if(EXISTS "${_INSTALL_ATOMICITY_TEST_SRC}")
add_executable(test_install_atomicity
test/cpp/test_install_atomicity.cpp
)
target_include_directories(test_install_atomicity PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/include
${CMAKE_CURRENT_BINARY_DIR}/include
)

# Enable testing and register the test with CTest
include(CTest)
add_test(NAME InstallAtomicityTest COMMAND test_install_atomicity)
endif()
118 changes: 118 additions & 0 deletions src/cpp/include/lemon/backends/install_staging.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#pragma once

#include <filesystem>
#include <stdexcept>
#include <string>
#include <system_error>

// Header-only, dependency-light staging/atomic-swap helpers for backend
// installation. Kept free of the heavier backend_utils.cpp dependencies so the
// crash-safety invariant can be unit-tested in isolation (test/cpp/test_install_atomicity.cpp).
namespace lemon::backends {

/**
* Recursively search `dir` for a regular file named `binary_name` (or
* `binary_name + ".exe"` on Windows). Returns the full path, or "" if not
* found or `dir` does not exist.
*/
inline std::string find_executable_in_dir(const std::string& dir,
const std::string& binary_name) {
namespace fs = std::filesystem;
if (!fs::exists(dir)) {
return "";
}
#ifdef _WIN32
const std::string binary_name_exe = binary_name + ".exe";
#endif
for (const fs::directory_entry& dir_entry : fs::recursive_directory_iterator(dir)) {
if (dir_entry.is_regular_file()) {
const auto& fname = dir_entry.path().filename();
if (fname == binary_name
#ifdef _WIN32
|| fname == binary_name_exe
#endif
) {
return dir_entry.path().string();
}
}
}
return "";
}

/**
* Atomically promote a fully-prepared staging directory to `install_dir`.
*
* Verifies that `staging_dir` contains `binary_name` before touching the
* currently-installed copy. The caller MUST create `staging_dir` as a
* sibling of `install_dir` so the renames stay on one filesystem.
*
* The promotion keeps a recoverable copy of the previous install at all
* times so a failed swap can never lose both installs:
* 1. move the existing install aside to `install_dir + ".old"`;
* 2. rename `staging_dir` into `install_dir`;
* 3. only then delete the `.old` backup.
* If step 2 fails the `.old` backup is rolled back into place, restoring the
* previously-working install.
*
* Outcomes:
* - returns the path to the installed executable on success;
* - returns "" when `staging_dir` does not contain `binary_name` (nothing
* was promoted; `staging_dir` is removed, `install_dir` is untouched);
* - throws std::runtime_error when the filesystem swap itself fails — the
* previous install is left in place (or rolled back), and `staging_dir`
* is left for the caller to clean up. This is distinct from the "" case
* so the caller can report an accurate error.
*/
inline std::string commit_staged_install(const std::string& staging_dir,
const std::string& install_dir,
const std::string& binary_name) {
namespace fs = std::filesystem;

// Verify the freshly-staged tree actually contains the backend
// executable before we touch the currently-installed copy.
std::string staged_exe = find_executable_in_dir(staging_dir, binary_name);
if (staged_exe.empty()) {
std::error_code ec;
fs::remove_all(staging_dir, ec); // drop the bad staging tree; keep install_dir
return "";
}

const std::string backup_dir = install_dir + ".old";
std::error_code ec;
fs::remove_all(backup_dir, ec); // clear any stale backup from a prior aborted swap

// Move the existing install aside (if any) so it survives a failed
// promotion. We never remove it outright — it is renamed to .old and
// only deleted once the new tree is verified in place.
const bool had_install = fs::exists(install_dir);
if (had_install) {
fs::rename(install_dir, backup_dir, ec);
if (ec) {
// Could not move the existing install aside; leave it untouched.
throw std::runtime_error(
"backend install swap failed: could not back up existing install at "
+ install_dir + " (" + ec.message() + ")");
}
}

// Promote the staged tree into place.
fs::rename(staging_dir, install_dir, ec);
if (ec) {
// Promotion failed. Roll the backup back so the previously-working
// install is restored rather than lost.
if (had_install) {
std::error_code rollback_ec;
fs::rename(backup_dir, install_dir, rollback_ec);
}
throw std::runtime_error(
"backend install swap failed: could not promote staged install to "
+ install_dir + " (" + ec.message() + ")");
}

// New install is in place; drop the backup.
fs::remove_all(backup_dir, ec);

return find_executable_in_dir(install_dir, binary_name);
}

} // namespace lemon::backends
129 changes: 85 additions & 44 deletions src/cpp/server/backends/backend_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "lemon/backends/backend_utils.h"
#include "lemon/backends/install_staging.h"
#include "lemon/runtime_config.h"
#include "lemon/system_info.h"
#include "lemon/backends/llamacpp_server.h"
Expand All @@ -24,6 +25,7 @@
#include <iostream>
#include <lemon/utils/aixlog.hpp>
#include <algorithm>
#include <system_error>
#include <vector>
#include <nlohmann/json.hpp>

Expand Down Expand Up @@ -257,27 +259,9 @@ namespace lemon::backends {
}

std::string BackendUtils::find_executable_in_install_dir(const std::string& install_dir, const std::string& binary_name) {
if (fs::exists(install_dir)) {
// On Windows, executables have a .exe extension that may not be in binary_name
#ifdef _WIN32
const std::string binary_name_exe = binary_name + ".exe";
#endif
// This could be optimized with a cache but saving a few milliseconds every few minutes/hours is not going to do much
for (const fs::directory_entry& dir_entry : fs::recursive_directory_iterator(install_dir)) {
if (dir_entry.is_regular_file()) {
const auto& fname = dir_entry.path().filename();
if (fname == binary_name
#ifdef _WIN32
|| fname == binary_name_exe
#endif
) {
return dir_entry.path().string();
}
}
}
}

return "";
// Delegates to the header-only helper so the executable-lookup logic has
// a single source of truth shared with commit_staged_install().
return find_executable_in_dir(install_dir, binary_name);
}

std::string BackendUtils::get_backend_binary_path(const BackendSpec& spec, const std::string& backend) {
Expand Down Expand Up @@ -403,7 +387,11 @@ namespace lemon::backends {
LOG(INFO, spec.log_name()) << "Upgrading " << spec.binary << " from " << installed_version
<< " to " << expected_version << std::endl;
needs_install = true;
fs::remove_all(install_dir);
// NOTE: do NOT remove install_dir here. The existing working
// binary is kept in place until the replacement has been
// downloaded, extracted, and verified; the atomic swap below
// (commit_staged_install) handles removal so an interrupted
// download cannot leave the backend with no usable binary.
}
} else if (!needs_install && !expected_version.empty()) {
// If the executable exists but version.txt is missing, SystemInfo
Expand All @@ -413,19 +401,45 @@ namespace lemon::backends {
LOG(INFO, spec.log_name()) << "Installed executable is missing version.txt; reinstalling "
<< spec.binary << " version " << expected_version << std::endl;
needs_install = true;
fs::remove_all(install_dir);
// See note above: removal is deferred to the verified atomic swap.
}
}

if (needs_install) {
LOG(INFO, spec.log_name()) << "Installing " << spec.binary << " (version: "
<< expected_version << ")" << std::endl;

// Create install directory
fs::create_directories(install_dir);

std::string url = "https://github.com/" + repo + "/releases/download/" +
expected_version + "/" + filename;
// Stage the new install in a sibling directory so the currently
// installed (working) binary is left untouched until the download is
// complete, extracted, and verified. Only then is staging atomically
// swapped into place (see commit_staged_install below), so a slow or
// interrupted download never destroys a working binary.
const std::string staging_dir = install_dir + ".staging";
std::error_code staging_ec;
fs::remove_all(staging_dir, staging_ec); // clear any leftover from a prior aborted install
fs::remove_all(install_dir + ".old", staging_ec); // and any orphaned swap backup
// If a stale staging tree could not be cleared (e.g. a locked file on
// Windows), fail rather than extracting into it — a leftover binary
// could otherwise satisfy verification and get promoted as a stale or
// mixed install over the working one.
if (fs::exists(staging_dir)) {
throw std::runtime_error("Could not clear stale staging directory: " + staging_dir);
}
fs::create_directories(staging_dir);

// Remove the staging tree on any early exit (exception) before the
// swap, so a failed download/extraction never leaves a half-built
// tree behind for the next attempt to trip over.
struct StagingGuard {
const std::string& dir;
bool active = true;
~StagingGuard() {
if (active) {
std::error_code ec;
fs::remove_all(dir, ec);
}
}
} staging_guard{staging_dir};

// Download archive to cache directory.
// Preserve the actual filename (sanitised for use in a path) so that
Expand All @@ -442,6 +456,19 @@ namespace lemon::backends {

LOG(DEBUG, spec.log_name()) << "Downloading to: " << zip_path << std::endl;

// Remove the downloaded archive on ANY exit from here on — success
// OR exception, including a throw from commit_staged_install() below
// (a swap/rename failure) — so the cache archive is never leaked.
// Mirrors StagingGuard above; replaces the per-throw fs::remove(zip_path)
// calls that did not cover the commit_staged_install throw path.
struct ZipGuard {
const std::string& path;
~ZipGuard() {
std::error_code ec;
fs::remove(path, ec);
}
} zip_guard{zip_path};

const std::string base_download_url = "https://github.com/" + repo + "/releases/download/" +
expected_version + "/";

Expand Down Expand Up @@ -585,7 +612,6 @@ namespace lemon::backends {
if (!part_result.success) {
combined.close();
fs::remove(part_path);
fs::remove(zip_path);
throw std::runtime_error("Failed to download " + part_filename + " from: " + part_url +
" - " + part_result.error_message);
}
Expand All @@ -610,29 +636,45 @@ namespace lemon::backends {
LOG(DEBUG, spec.log_name()) << "Downloaded archive file size: "
<< (file_size / 1024 / 1024) << " MB" << std::endl;

// Extract
if (!extract_archive(zip_path, install_dir, spec.log_name())) {
fs::remove(zip_path);
fs::remove_all(install_dir);
// Extract into the staging directory (NOT install_dir) so a failed
// extraction cannot destroy the currently-installed binary. The
// staging guard removes the partial tree when we throw.
if (!extract_archive(zip_path, staging_dir, spec.log_name())) {
throw std::runtime_error("Failed to extract archive: " + zip_path);
}

// Verify extraction
exe_path = find_executable_in_install_dir(install_dir, spec.binary);
// Save version info into the staging tree so it travels with the
// atomic swap below. Fail cleanly on a write error rather than
// promoting a backend with no version.txt (which would make the next
// status check force an unnecessary reinstall).
{
const std::string staged_version_file = (fs::path(staging_dir) / "version.txt").string();
std::ofstream vf(staged_version_file);
vf << expected_version;
vf.flush();
if (!vf.good()) {
throw std::runtime_error("Failed to write version file: " + staged_version_file);
}
}

// Verify the staged tree contains the executable, then atomically
// swap it into place. commit_staged_install keeps a recoverable .old
// backup across the swap: it removes the staging tree and leaves
// install_dir untouched on verification failure (returns ""), and on
// a swap (rename) failure it rolls the backup back and throws — so a
// botched download/extraction/swap never destroys the working binary.
exe_path = commit_staged_install(staging_dir, install_dir, spec.binary);
if (exe_path.empty()) {
LOG(ERROR, spec.log_name()) << "Extraction completed but executable not found" << std::endl;
fs::remove(zip_path);
fs::remove_all(install_dir);
throw std::runtime_error("Extraction failed: executable not found");
Comment thread
ianbmacdonald marked this conversation as resolved.
}
// Swap succeeded: staging was consumed by the rename, so disarm the
// guard (its cleanup would now be a no-op, but disarm to make intent
// explicit and skip a pointless filesystem call).
staging_guard.active = false;

LOG(DEBUG, spec.log_name()) << "Executable verified at: " << exe_path << std::endl;

// Save version info
std::ofstream vf(version_file);
vf << expected_version;
vf.close();

#ifndef _WIN32
// Make all binaries in bin/ executable (tar may lose permissions)
{
Expand All @@ -649,8 +691,7 @@ namespace lemon::backends {
chmod(exe_path.c_str(), 0755);
#endif

// Delete ZIP file
fs::remove(zip_path);
// (The downloaded archive is removed by zip_guard on scope exit.)

// Send completion event now that installation is fully done.
// For split archives the combined on-disk size is only known after
Expand Down
Loading
Loading