@@ -29,10 +29,6 @@ extern void client_handler(lupine_socket_t connfd);
2929
3030namespace {
3131
32- struct snapshot_result {
33- CUresult cuda_result = CUDA_ERROR_UNKNOWN ;
34- };
35-
3632static const char kBootstrapMagic [] = " LUPSNAP1" ;
3733
3834const char *snapshot_root () {
@@ -113,14 +109,7 @@ std::string criu_inherit_target(const std::string &target) {
113109
114110void redirect_stdio_to_devnull () {
115111#ifndef _WIN32
116- const char *log_path = getenv (" LUPINE_SNAPSHOT_STDIO_LOG" );
117- int fd = -1 ;
118- if (log_path != nullptr && log_path[0 ] != ' \0 ' ) {
119- fd = open (log_path, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC , 0600 );
120- }
121- if (fd < 0 ) {
122- fd = open (" /dev/null" , O_RDWR | O_CLOEXEC );
123- }
112+ int fd = open (" /dev/null" , O_RDWR | O_CLOEXEC );
124113 if (fd < 0 ) {
125114 return ;
126115 }
@@ -130,21 +119,6 @@ void redirect_stdio_to_devnull() {
130119#endif
131120}
132121
133- static thread_local uint64_t g_tree_size = 0 ;
134-
135- int tree_size_cb (const char *, const struct stat *st, int type, struct FTW *) {
136- if (type == FTW_F ) {
137- g_tree_size += static_cast <uint64_t >(st->st_size );
138- }
139- return 0 ;
140- }
141-
142- uint64_t tree_size (const std::string &path) {
143- g_tree_size = 0 ;
144- nftw (path.c_str (), tree_size_cb, 32 , FTW_PHYS );
145- return g_tree_size;
146- }
147-
148122int remove_tree_cb (const char *path, const struct stat *, int , struct FTW *) {
149123 return remove (path);
150124}
@@ -180,9 +154,8 @@ bool write_text_file(const std::string &path, const std::string &text) {
180154 }
181155 off += static_cast <size_t >(wrote);
182156 }
183- if (ok && (getenv (" LUPINE_SNAPSHOT_FSYNC" ) == nullptr ||
184- strcmp (getenv (" LUPINE_SNAPSHOT_FSYNC" ), " 0" ) != 0 )) {
185- ok = fsync (fd) == 0 ;
157+ if (ok) {
158+ ok = fsync (fd) == 0 ; // the manifest is the "ready" marker; make it durable
186159 }
187160 close (fd);
188161 return ok;
@@ -405,45 +378,38 @@ int prepare_restore_socket_placeholder(const std::string &path) {
405378}
406379
407380std::string staging_root_for (const std::string &root) {
408- const char *configured = getenv (" LUPINE_SNAPSHOT_STAGING_DIR" );
409- if (configured != nullptr && configured[0 ] != ' \0 ' ) {
410- return configured;
411- }
412381 return join_path (root, " .staging" );
413382}
414383
415- std::string manifest_json (const std::string &id, uint64_t bytes,
384+ std::string manifest_json (const std::string &id,
416385 const std::string &client_fd_target) {
417386 char buf[1024 ];
418387 snprintf (buf, sizeof (buf),
419388 " {\n "
420389 " \" id\" : \" %s\" ,\n "
421- " \" state\" : \" READY\" ,\n "
422- " \" bytes\" : %llu,\n "
423390 " \" created_unix_seconds\" : %lld,\n "
424391 " \" pid\" : %lld,\n "
425392 " \" client_fd_target\" : \" %s\"\n "
426393 " }\n " ,
427- id.c_str (), static_cast <unsigned long long >(bytes),
428- static_cast <long long >(time (nullptr )), static_cast <long long >(getpid ()),
429- client_fd_target.c_str ());
394+ id.c_str (), static_cast <long long >(time (nullptr )),
395+ static_cast <long long >(getpid ()), client_fd_target.c_str ());
430396 return std::string (buf);
431397}
432398
433- snapshot_result save_snapshot_artifact (const char *id, int client_fd) {
399+ CUresult save_snapshot_artifact (const char *id, int client_fd) {
434400 const char *root_env = snapshot_root ();
435401 if (root_env == nullptr ) {
436- return { CUDA_ERROR_NOT_SUPPORTED } ;
402+ return CUDA_ERROR_NOT_SUPPORTED ;
437403 }
438404 if (!lupine_snapshot_id_valid (id)) {
439- return { CUDA_ERROR_INVALID_VALUE } ;
405+ return CUDA_ERROR_INVALID_VALUE ;
440406 }
441407
442408 std::string root (root_env);
443409 std::string objects = join_path (root, " objects" );
444410 std::string staging_root = staging_root_for (root);
445411 if (!mkdir_p (objects) || !mkdir_p (staging_root)) {
446- return { CUDA_ERROR_OPERATING_SYSTEM } ;
412+ return CUDA_ERROR_OPERATING_SYSTEM ;
447413 }
448414
449415 std::string snapshot_id = snapshot_id_dir (id);
@@ -455,7 +421,7 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
455421 remove_tree (staging);
456422 if (!mkdir_p (criu_dir) || !mkdir_p (logs_dir)) {
457423 remove_tree (staging);
458- return { CUDA_ERROR_OPERATING_SYSTEM } ;
424+ return CUDA_ERROR_OPERATING_SYSTEM ;
459425 }
460426
461427 int restore_fd = -1 ;
@@ -466,13 +432,13 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
466432 join_path (staging, " client-socket-placeholder" ));
467433 if (restore_fd < 0 ) {
468434 remove_tree (staging);
469- return { CUDA_ERROR_OPERATING_SYSTEM } ;
435+ return CUDA_ERROR_OPERATING_SYSTEM ;
470436 }
471437 restore_fd_target = fd_target (restore_fd);
472438 if (restore_fd_target.empty ()) {
473439 close (restore_fd);
474440 remove_tree (staging);
475- return { CUDA_ERROR_OPERATING_SYSTEM } ;
441+ return CUDA_ERROR_OPERATING_SYSTEM ;
476442 }
477443 client_target = criu_inherit_target (restore_fd_target);
478444 }
@@ -493,20 +459,19 @@ snapshot_result save_snapshot_artifact(const char *id, int client_fd) {
493459 } else {
494460 LUPINE_LOG_ERROR (" Keeping failed snapshot staging directory " << staging);
495461 }
496- return { result} ;
462+ return result;
497463 }
498464
499- uint64_t bytes = tree_size (staging);
500465 if (!publish_artifact (staging, final_dir)) {
501466 remove_tree (staging);
502- return { CUDA_ERROR_OPERATING_SYSTEM } ;
467+ return CUDA_ERROR_OPERATING_SYSTEM ;
503468 }
504469 if (!write_text_file (join_path (final_dir, " manifest.json" ),
505- manifest_json (snapshot_id, bytes, client_target))) {
470+ manifest_json (snapshot_id, client_target))) {
506471 remove_tree (final_dir);
507- return { CUDA_ERROR_OPERATING_SYSTEM } ;
472+ return CUDA_ERROR_OPERATING_SYSTEM ;
508473 }
509- return { CUDA_SUCCESS } ;
474+ return CUDA_SUCCESS ;
510475}
511476
512477int recv_exact (lupine_socket_t connfd, void *data, size_t size) {
@@ -678,9 +643,8 @@ int handle_lupine_snapshot_save_and_exit(conn_t *conn) {
678643 }
679644 int request_id = rpc_read_end (conn);
680645
681- snapshot_result saved =
646+ CUresult result =
682647 save_snapshot_artifact (id.c_str (), static_cast <int >(conn->connfd ));
683- CUresult result = saved.cuda_result ;
684648 LUPINE_LOG_DEBUG (" Snapshot save for " << id << " completed with CUDA result "
685649 << result);
686650 if (rpc_write_start_response (conn, request_id) < 0 ) {
0 commit comments