From 8a3d00b8f7d3fbac1c9d72db38562f61aed06ac0 Mon Sep 17 00:00:00 2001 From: nidhishgajjar Date: Mon, 16 Mar 2026 03:21:55 +0100 Subject: [PATCH] pidns: support single-level nested PID namespace dump/restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CLONE_NEWPID to CLONE_SUBNS to allow dumping and restoring process trees that contain a child PID namespace. This enables sandboxing use cases where the root process spawns children in an isolated PID namespace (e.g., container runtimes, agent sandboxes). Changes: - namespaces.h: add CLONE_NEWPID to CLONE_SUBNS - pstree.c: fix restore check to allow CLONE_SUBNS types even when root doesn't use them (cflags & ~(root_ns_mask | CLONE_SUBNS) instead of cflags & ~(root_ns_mask & CLONE_SUBNS)) - New ZDTM test: pidns_nested — verifies dump/restore of a process tree with unshare(CLONE_NEWPID) + fork + setsid Limitations: - Only single-level nesting (one child pidns, not arbitrary depth) - Host PID of the child may change after restore (namespace PID preserved) - CRIU must dump from outside the child PID namespace Signed-off-by: nidhishgajjar --- criu/include/namespaces.h | 2 +- criu/pstree.c | 15 ++- test/zdtm/static/Makefile | 1 + test/zdtm/static/pidns_nested.c | 161 +++++++++++++++++++++++++++++ test/zdtm/static/pidns_nested.desc | 1 + 5 files changed, 170 insertions(+), 10 deletions(-) create mode 100644 test/zdtm/static/pidns_nested.c create mode 100644 test/zdtm/static/pidns_nested.desc diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index cfd0b02e4d..4a7dc9b162 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -46,7 +46,7 @@ CLONE_NEWTIME) /* Nested namespaces are supported only for these types */ -#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET) +#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWPID) #define EXTRA_SIZE 20 diff --git a/criu/pstree.c b/criu/pstree.c index 3b2e84cedf..9efbe90ab5 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -959,16 +959,13 @@ static int prepare_pstree_kobj_ids(void) if (item == root_item) { pr_info("Will restore in %lx namespaces\n", cflags); root_ns_mask = cflags; - } else if (cflags & ~(root_ns_mask & CLONE_SUBNS)) { + } else if (cflags & ~(root_ns_mask | CLONE_SUBNS)) { /* - * Namespaces from CLONE_SUBNS can be nested, but in - * this case nobody can't share external namespaces of - * these types. - * - * Workaround for all other namespaces -- - * all tasks should be in one namespace. And - * this namespace is either inherited from the - * criu or is created for the init task (only) + * Namespaces from CLONE_SUBNS can be nested and + * can also be created by sub-tasks even if root + * doesn't use them (e.g. a child creates a PID + * namespace for sandboxing). All other namespace + * types must be shared with root. */ pr_err("Can't restore sub-task in NS (cflags %lx)\n", cflags); return -1; diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index de0f77a5a3..39b09d6d88 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -281,6 +281,7 @@ TST_NOFILE := \ shmemfd-priv \ time \ timens_nested \ + pidns_nested \ timens_for_kids \ zombie_leader \ sigtrap \ diff --git a/test/zdtm/static/pidns_nested.c b/test/zdtm/static/pidns_nested.c new file mode 100644 index 0000000000..79671031f8 --- /dev/null +++ b/test/zdtm/static/pidns_nested.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check dump/restore of a process tree with a child PID namespace"; +const char *test_author = "Nidhish Gajjar "; + +int main(int argc, char **argv) +{ + int pipe_ready[2], pipe_go[2], pipe_result[2]; + int status; + pid_t child, ret; + char buf; + + test_init(argc, argv); + + if (pipe(pipe_ready) || pipe(pipe_go) || pipe(pipe_result)) { + pr_perror("pipe"); + return 1; + } + + /* + * Create a new PID namespace. The next fork()'d child + * will be PID 1 inside this new namespace. + */ + if (unshare(CLONE_NEWPID)) { + pr_perror("unshare(CLONE_NEWPID)"); + return 1; + } + + child = fork(); + if (child < 0) { + pr_perror("fork child"); + return 1; + } + + if (child == 0) { + /* + * Child: PID 1 inside the new PID namespace. + */ + pid_t my_pid; + char res = '0'; + + close(pipe_ready[0]); + close(pipe_go[1]); + close(pipe_result[0]); + + /* + * Create a new session inside the child PID namespace. + * Without this, getsid() returns 0 because the inherited + * session leader lives in the parent namespace and is not + * visible here. + */ + if (setsid() < 0) { + pr_perror("setsid"); + _exit(1); + } + + my_pid = getpid(); + if (my_pid != 1) { + fprintf(stderr, "Child expected PID 1 before C/R, got %d\n", my_pid); + _exit(1); + } + + /* Signal parent we're ready */ + write(pipe_ready[1], "R", 1); + close(pipe_ready[1]); + + /* + * Wait for parent to tell us to check PID. + * Dump/restore happens while we're blocked here. + */ + if (read(pipe_go[0], &buf, 1) != 1) { + _exit(1); + } + close(pipe_go[0]); + + /* After restore: verify PID is still 1 inside our namespace */ + my_pid = getpid(); + if (my_pid != 1) { + fprintf(stderr, "Child expected PID 1 after C/R, got %d\n", my_pid); + res = '1'; + } + + write(pipe_result[1], &res, 1); + close(pipe_result[1]); + _exit(0); + } + + /* Parent: in the original PID namespace */ + close(pipe_ready[1]); + close(pipe_go[0]); + close(pipe_result[1]); + + /* Wait for child to be ready */ + if (read(pipe_ready[0], &buf, 1) != 1 || buf != 'R') { + pr_perror("child not ready"); + kill(child, SIGKILL); + return 1; + } + close(pipe_ready[0]); + + test_msg("Child host PID: %d (namespace PID should be 1)\n", child); + + /* Checkpoint happens here */ + test_daemon(); + test_waitsig(); + + /* + * After restore: tell child to verify its PID and report. + * Use pipe instead of kill() since host PID may change. + */ + write(pipe_go[1], "G", 1); + close(pipe_go[1]); + + /* Read result from child */ + if (read(pipe_result[0], &buf, 1) != 1) { + fail("Failed to read result from child"); + return 1; + } + close(pipe_result[0]); + + ret = waitpid(child, &status, 0); + if (ret < 0 && errno == ECHILD) { + /* + * After restore, the host PID may have changed. + * Wait for any child instead. + */ + ret = waitpid(-1, &status, 0); + } + + if (ret < 0) { + fail("waitpid: %m"); + return 1; + } + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + fail("Child exit status: exited=%d code=%d signaled=%d sig=%d", + WIFEXITED(status), WEXITSTATUS(status), + WIFSIGNALED(status), WTERMSIG(status)); + return 1; + } + + if (buf != '0') { + fail("Child PID was not preserved across dump/restore"); + return 1; + } + + pass(); + return 0; +} diff --git a/test/zdtm/static/pidns_nested.desc b/test/zdtm/static/pidns_nested.desc new file mode 100644 index 0000000000..fa2c82d083 --- /dev/null +++ b/test/zdtm/static/pidns_nested.desc @@ -0,0 +1 @@ +{'flavor': 'h', 'flags': 'suid'}