Skip to content

Commit 04b3230

Browse files
committed
Simplify snapshot save: drop snapshot_result wrapper, tree_size/bytes, and unused env knobs
1 parent a4ecf1c commit 04b3230

1 file changed

Lines changed: 19 additions & 55 deletions

File tree

snapshot.cpp

Lines changed: 19 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ extern void client_handler(lupine_socket_t connfd);
2929

3030
namespace {
3131

32-
struct snapshot_result {
33-
CUresult cuda_result = CUDA_ERROR_UNKNOWN;
34-
};
35-
3632
static const char kBootstrapMagic[] = "LUPSNAP1";
3733

3834
const char *snapshot_root() {
@@ -113,14 +109,7 @@ std::string criu_inherit_target(const std::string &target) {
113109

114110
void redirect_stdio_to_devnull() {
115111
#ifndef _WIN32
116-
const char *log_path = getenv("LUPINE_SNAPSHOT_STDIO_LOG");
117-
int fd = -1;
118-
if (log_path != nullptr && log_path[0] != '\0') {
119-
fd = open(log_path, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, 0600);
120-
}
121-
if (fd < 0) {
122-
fd = open("/dev/null", O_RDWR | O_CLOEXEC);
123-
}
112+
int fd = open("/dev/null", O_RDWR | O_CLOEXEC);
124113
if (fd < 0) {
125114
return;
126115
}
@@ -130,21 +119,6 @@ void redirect_stdio_to_devnull() {
130119
#endif
131120
}
132121

133-
static thread_local uint64_t g_tree_size = 0;
134-
135-
int tree_size_cb(const char *, const struct stat *st, int type, struct FTW *) {
136-
if (type == FTW_F) {
137-
g_tree_size += static_cast<uint64_t>(st->st_size);
138-
}
139-
return 0;
140-
}
141-
142-
uint64_t tree_size(const std::string &path) {
143-
g_tree_size = 0;
144-
nftw(path.c_str(), tree_size_cb, 32, FTW_PHYS);
145-
return g_tree_size;
146-
}
147-
148122
int remove_tree_cb(const char *path, const struct stat *, int, struct FTW *) {
149123
return remove(path);
150124
}
@@ -180,9 +154,8 @@ bool write_text_file(const std::string &path, const std::string &text) {
180154
}
181155
off += static_cast<size_t>(wrote);
182156
}
183-
if (ok && (getenv("LUPINE_SNAPSHOT_FSYNC") == nullptr ||
184-
strcmp(getenv("LUPINE_SNAPSHOT_FSYNC"), "0") != 0)) {
185-
ok = fsync(fd) == 0;
157+
if (ok) {
158+
ok = fsync(fd) == 0; // the manifest is the "ready" marker; make it durable
186159
}
187160
close(fd);
188161
return ok;
@@ -405,45 +378,38 @@ int prepare_restore_socket_placeholder(const std::string &path) {
405378
}
406379

407380
std::string staging_root_for(const std::string &root) {
408-
const char *configured = getenv("LUPINE_SNAPSHOT_STAGING_DIR");
409-
if (configured != nullptr && configured[0] != '\0') {
410-
return configured;
411-
}
412381
return join_path(root, ".staging");
413382
}
414383

415-
std::string manifest_json(const std::string &id, uint64_t bytes,
384+
std::string manifest_json(const std::string &id,
416385
const std::string &client_fd_target) {
417386
char buf[1024];
418387
snprintf(buf, sizeof(buf),
419388
"{\n"
420389
" \"id\": \"%s\",\n"
421-
" \"state\": \"READY\",\n"
422-
" \"bytes\": %llu,\n"
423390
" \"created_unix_seconds\": %lld,\n"
424391
" \"pid\": %lld,\n"
425392
" \"client_fd_target\": \"%s\"\n"
426393
"}\n",
427-
id.c_str(), static_cast<unsigned long long>(bytes),
428-
static_cast<long long>(time(nullptr)), static_cast<long long>(getpid()),
429-
client_fd_target.c_str());
394+
id.c_str(), static_cast<long long>(time(nullptr)),
395+
static_cast<long long>(getpid()), client_fd_target.c_str());
430396
return std::string(buf);
431397
}
432398

433-
snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
399+
CUresult save_snapshot_artifact(const char *id, int client_fd) {
434400
const char *root_env = snapshot_root();
435401
if (root_env == nullptr) {
436-
return {CUDA_ERROR_NOT_SUPPORTED};
402+
return CUDA_ERROR_NOT_SUPPORTED;
437403
}
438404
if (!lupine_snapshot_id_valid(id)) {
439-
return {CUDA_ERROR_INVALID_VALUE};
405+
return CUDA_ERROR_INVALID_VALUE;
440406
}
441407

442408
std::string root(root_env);
443409
std::string objects = join_path(root, "objects");
444410
std::string staging_root = staging_root_for(root);
445411
if (!mkdir_p(objects) || !mkdir_p(staging_root)) {
446-
return {CUDA_ERROR_OPERATING_SYSTEM};
412+
return CUDA_ERROR_OPERATING_SYSTEM;
447413
}
448414

449415
std::string snapshot_id = snapshot_id_dir(id);
@@ -455,7 +421,7 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
455421
remove_tree(staging);
456422
if (!mkdir_p(criu_dir) || !mkdir_p(logs_dir)) {
457423
remove_tree(staging);
458-
return {CUDA_ERROR_OPERATING_SYSTEM};
424+
return CUDA_ERROR_OPERATING_SYSTEM;
459425
}
460426

461427
int restore_fd = -1;
@@ -466,13 +432,13 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
466432
join_path(staging, "client-socket-placeholder"));
467433
if (restore_fd < 0) {
468434
remove_tree(staging);
469-
return {CUDA_ERROR_OPERATING_SYSTEM};
435+
return CUDA_ERROR_OPERATING_SYSTEM;
470436
}
471437
restore_fd_target = fd_target(restore_fd);
472438
if (restore_fd_target.empty()) {
473439
close(restore_fd);
474440
remove_tree(staging);
475-
return {CUDA_ERROR_OPERATING_SYSTEM};
441+
return CUDA_ERROR_OPERATING_SYSTEM;
476442
}
477443
client_target = criu_inherit_target(restore_fd_target);
478444
}
@@ -493,20 +459,19 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
493459
} else {
494460
LUPINE_LOG_ERROR("Keeping failed snapshot staging directory " << staging);
495461
}
496-
return {result};
462+
return result;
497463
}
498464

499-
uint64_t bytes = tree_size(staging);
500465
if (!publish_artifact(staging, final_dir)) {
501466
remove_tree(staging);
502-
return {CUDA_ERROR_OPERATING_SYSTEM};
467+
return CUDA_ERROR_OPERATING_SYSTEM;
503468
}
504469
if (!write_text_file(join_path(final_dir, "manifest.json"),
505-
manifest_json(snapshot_id, bytes, client_target))) {
470+
manifest_json(snapshot_id, client_target))) {
506471
remove_tree(final_dir);
507-
return {CUDA_ERROR_OPERATING_SYSTEM};
472+
return CUDA_ERROR_OPERATING_SYSTEM;
508473
}
509-
return {CUDA_SUCCESS};
474+
return CUDA_SUCCESS;
510475
}
511476

512477
int recv_exact(lupine_socket_t connfd, void *data, size_t size) {
@@ -678,9 +643,8 @@ int handle_lupine_snapshot_save_and_exit(conn_t *conn) {
678643
}
679644
int request_id = rpc_read_end(conn);
680645

681-
snapshot_result saved =
646+
CUresult result =
682647
save_snapshot_artifact(id.c_str(), static_cast<int>(conn->connfd));
683-
CUresult result = saved.cuda_result;
684648
LUPINE_LOG_DEBUG("Snapshot save for " << id << " completed with CUDA result "
685649
<< result);
686650
if (rpc_write_start_response(conn, request_id) < 0) {

0 commit comments

Comments
 (0)