Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion criu/include/namespaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
CLONE_NEWTIME)

/* Nested namespaces are supported only for these types */
#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET)
#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWPID)

#define EXTRA_SIZE 20

Expand Down
15 changes: 6 additions & 9 deletions criu/pstree.c
Original file line number Diff line number Diff line change
Expand Up @@ -959,16 +959,13 @@ static int prepare_pstree_kobj_ids(void)
if (item == root_item) {
pr_info("Will restore in %lx namespaces\n", cflags);
root_ns_mask = cflags;
} else if (cflags & ~(root_ns_mask & CLONE_SUBNS)) {
} else if (cflags & ~(root_ns_mask | CLONE_SUBNS)) {
/*
* Namespaces from CLONE_SUBNS can be nested, but in
* this case nobody can't share external namespaces of
* these types.
*
* Workaround for all other namespaces --
* all tasks should be in one namespace. And
* this namespace is either inherited from the
* criu or is created for the init task (only)
* Namespaces from CLONE_SUBNS can be nested and
* can also be created by sub-tasks even if root
* doesn't use them (e.g. a child creates a PID
* namespace for sandboxing). All other namespace
* types must be shared with root.
*/
pr_err("Can't restore sub-task in NS (cflags %lx)\n", cflags);
return -1;
Expand Down
1 change: 1 addition & 0 deletions test/zdtm/static/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ TST_NOFILE := \
shmemfd-priv \
time \
timens_nested \
pidns_nested \
timens_for_kids \
zombie_leader \
sigtrap \
Expand Down
161 changes: 161 additions & 0 deletions test/zdtm/static/pidns_nested.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sched.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <fcntl.h>
#include <stdio.h>

#include "zdtmtst.h"

const char *test_doc = "Check dump/restore of a process tree with a child PID namespace";
const char *test_author = "Nidhish Gajjar <hacker@scigic.com>";

int main(int argc, char **argv)
{
int pipe_ready[2], pipe_go[2], pipe_result[2];
int status;
pid_t child, ret;
char buf;

test_init(argc, argv);

if (pipe(pipe_ready) || pipe(pipe_go) || pipe(pipe_result)) {
pr_perror("pipe");
return 1;
}

/*
* Create a new PID namespace. The next fork()'d child
* will be PID 1 inside this new namespace.
*/
if (unshare(CLONE_NEWPID)) {
pr_perror("unshare(CLONE_NEWPID)");
return 1;
}

child = fork();
if (child < 0) {
pr_perror("fork child");
return 1;
}

if (child == 0) {
/*
* Child: PID 1 inside the new PID namespace.
*/
pid_t my_pid;
char res = '0';

close(pipe_ready[0]);
close(pipe_go[1]);
close(pipe_result[0]);

/*
* Create a new session inside the child PID namespace.
* Without this, getsid() returns 0 because the inherited
* session leader lives in the parent namespace and is not
* visible here.
*/
if (setsid() < 0) {
pr_perror("setsid");
_exit(1);
}

my_pid = getpid();
if (my_pid != 1) {
fprintf(stderr, "Child expected PID 1 before C/R, got %d\n", my_pid);
_exit(1);
}

/* Signal parent we're ready */
write(pipe_ready[1], "R", 1);
close(pipe_ready[1]);

/*
* Wait for parent to tell us to check PID.
* Dump/restore happens while we're blocked here.
*/
if (read(pipe_go[0], &buf, 1) != 1) {
_exit(1);
}
close(pipe_go[0]);

/* After restore: verify PID is still 1 inside our namespace */
my_pid = getpid();
if (my_pid != 1) {
fprintf(stderr, "Child expected PID 1 after C/R, got %d\n", my_pid);
res = '1';
}

write(pipe_result[1], &res, 1);
close(pipe_result[1]);
_exit(0);
}

/* Parent: in the original PID namespace */
close(pipe_ready[1]);
close(pipe_go[0]);
close(pipe_result[1]);

/* Wait for child to be ready */
if (read(pipe_ready[0], &buf, 1) != 1 || buf != 'R') {
pr_perror("child not ready");
kill(child, SIGKILL);
return 1;
}
close(pipe_ready[0]);

test_msg("Child host PID: %d (namespace PID should be 1)\n", child);

/* Checkpoint happens here */
test_daemon();
test_waitsig();

/*
* After restore: tell child to verify its PID and report.
* Use pipe instead of kill() since host PID may change.
*/
write(pipe_go[1], "G", 1);
close(pipe_go[1]);

/* Read result from child */
if (read(pipe_result[0], &buf, 1) != 1) {
fail("Failed to read result from child");
return 1;
}
close(pipe_result[0]);

ret = waitpid(child, &status, 0);
if (ret < 0 && errno == ECHILD) {
/*
* After restore, the host PID may have changed.
* Wait for any child instead.
*/
ret = waitpid(-1, &status, 0);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why the feature shouldn't be labeled as 'support for single-level nested PID namespace dump/restore.' It feels more like a workaround for a specific use case. At a minimum, you should introduce an option to enable this feature; it should not be active by default. We probably need to do something to be sure that we use/mount proper /proc mounts...

}

if (ret < 0) {
fail("waitpid: %m");
return 1;
}

if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
fail("Child exit status: exited=%d code=%d signaled=%d sig=%d",
WIFEXITED(status), WEXITSTATUS(status),
WIFSIGNALED(status), WTERMSIG(status));
return 1;
}

if (buf != '0') {
fail("Child PID was not preserved across dump/restore");
return 1;
}

pass();
return 0;
}
1 change: 1 addition & 0 deletions test/zdtm/static/pidns_nested.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'flavor': 'h', 'flags': 'suid'}