Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 107 additions & 1 deletion criu/fsnotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <sys/mman.h>
#include <sys/mount.h>
#include <aio.h>
#include <dirent.h>

#include <sys/fanotify.h>

Expand Down Expand Up @@ -108,6 +109,77 @@ enum {
ERR_GENERIC = -3
};

/*
* Recursively scan a directory tree looking for a file with the given
* @s_dev and @i_ino. Returns an allocated absolute path string on success,
* NULL if not found. Used as a fallback for overlayfs when open_by_handle_at
* cannot decode the file handle.
*/
static char *scan_dir_for_inode(int mntns_root, const char *base_path,
unsigned int s_dev, unsigned long i_ino,
int depth)
{
static int entries_scanned;
int fd;
DIR *dir;
struct dirent *de;
char *result = NULL;

if (depth <= 0)
return NULL;

fd = openat(mntns_root, base_path, O_RDONLY | O_DIRECTORY);
if (fd < 0)
return NULL;

dir = fdopendir(fd);
if (!dir) {
close(fd);
return NULL;
}

while ((de = readdir(dir)) != NULL) {
struct stat st;
char child_path[PATH_MAX];
int n;

if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
continue;

n = snprintf(child_path, PATH_MAX, "%s/%s", base_path,
de->d_name);
if (n >= PATH_MAX)
continue;

if (fstatat(mntns_root, child_path, &st, AT_SYMLINK_NOFOLLOW))
continue;

entries_scanned++;
if (entries_scanned == 10000)
pr_warn("Overlayfs inode scan: checked over 10000 entries\n");

if (st.st_dev != s_dev)
continue;

if (st.st_ino == i_ino) {
result = xsprintf("/%s", child_path);
break;
}

if (S_ISDIR(st.st_mode)) {
result = scan_dir_for_inode(mntns_root, child_path,
s_dev, i_ino, depth - 1);
if (result)
break;
}
}

closedir(dir);
return result;
}

#define OVERLAYFS_SCAN_MAX_DEPTH 64

static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_handle)
{
struct mount_info *m;
Expand Down Expand Up @@ -136,6 +208,13 @@ static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_
if (!mnt_is_dir(m))
continue;

/*
* Record that we found a mount with matching s_dev before
* trying open_by_handle_at. On overlayfs, handle decoding
* may fail but the mount is still correct.
*/
suitable_mount_found = 1;

mntfd = __open_mountpoint(m);
pr_debug("\t\tTrying via mntid %d root %s ns_mountpoint @%s (%d)\n", m->mnt_id, m->root,
m->ns_mountpoint, mntfd);
Expand All @@ -146,7 +225,6 @@ static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_
close(mntfd);
if (fd < 0)
continue;
suitable_mount_found = 1;

if (read_fd_link(fd, buf, sizeof(buf)) < 0) {
close(fd);
Expand Down Expand Up @@ -269,6 +347,34 @@ int check_open_handle(unsigned int s_dev, unsigned long i_ino, FhEntry *f_handle
goto err;
}

/*
* For overlayfs, open_by_handle_at() often fails because overlayfs
* does not reliably support file handle decoding (depends on kernel
* version and nfs_export mount option). Resolve the path by scanning
* the overlay mount tree for the matching inode.
*/
if (mi->fstype->code == FSTYPE__OVERLAYFS) {
int mntns_root = mntns_get_root_fd(mi->nsid);
if (mntns_root >= 0) {
char *mp = mi->ns_mountpoint + 1;
if (mp[0] == '\0')
mp = ".";
path = scan_dir_for_inode(mntns_root, mp, s_dev, i_ino,
OVERLAYFS_SCAN_MAX_DEPTH);
if (path) {
pr_debug("\tResolved overlayfs path: %s\n", path);
f_handle->path = path;
if (root_ns_mask & CLONE_NEWNS) {
f_handle->has_mnt_id = true;
f_handle->mnt_id = mi->mnt_id;
}
goto out_nopath;
}
}
pr_warn("\tOverlayfs inode scan failed, trying irmap\n");
goto fault;
}

if (!opts.force_irmap)
/*
* If we're not forced to do irmap, then
Expand Down
1 change: 1 addition & 0 deletions test/zdtm/static/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ TST_DIR = \
inotify01 \
inotify02 \
inotify04 \
inotify_overlayfs \
cgroup00 \
rmdir_open \
cgroup01 \
Expand Down
129 changes: 129 additions & 0 deletions test/zdtm/static/inotify_overlayfs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <sys/inotify.h>
#include <stdlib.h>

#include "zdtmtst.h"

const char *test_doc = "Check inotify C/R on overlayfs mounts";
const char *test_author = "Ankit Mahajan <ankimaha-sys@users.noreply.github.com>";

char *dirname;
TEST_OPTION(dirname, string, "directory name", 1);

#define BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX))

#define LOWER "lower"
#define UPPER "upper"
#define WORK "work"
#define MERGED "merged"

int main(int argc, char *argv[])
{
char lower[PATH_MAX], upper[PATH_MAX], work[PATH_MAX], merged[PATH_MAX];
char test_file[PATH_MAX];
char buf[BUFF_SIZE];
char opts[PATH_MAX * 4];
int fd, wd, len, real_fd;

test_init(argc, argv);

if (strlen(dirname) > PATH_MAX - 64) {
pr_perror("dirname too long");
return 1;
}

sprintf(lower, "%s/%s", dirname, LOWER);
sprintf(upper, "%s/%s", dirname, UPPER);
sprintf(work, "%s/%s", dirname, WORK);
sprintf(merged, "%s/%s", dirname, MERGED);

if (mkdir(dirname, 0755) && errno != EEXIST) {
pr_perror("Can't create %s", dirname);
return 1;
}
if (mkdir(lower, 0755)) {
pr_perror("Can't create %s", lower);
return 1;
}
if (mkdir(upper, 0755)) {
pr_perror("Can't create %s", upper);
return 1;
}
if (mkdir(work, 0755)) {
pr_perror("Can't create %s", work);
return 1;
}
if (mkdir(merged, 0755)) {
pr_perror("Can't create %s", merged);
return 1;
}

/* Create a test file in the lower layer */
sprintf(test_file, "%s/testfile", lower);
real_fd = open(test_file, O_CREAT | O_WRONLY, 0644);
if (real_fd < 0) {
pr_perror("Can't create %s", test_file);
return 1;
}
close(real_fd);

/* Mount overlayfs */
sprintf(opts, "lowerdir=%s,upperdir=%s,workdir=%s",
lower, upper, work);
if (mount("overlay", merged, "overlay", 0, opts)) {
pr_perror("Can't mount overlayfs");
return 1;
}

/* Set up inotify watch on file inside the overlay */
sprintf(test_file, "%s/testfile", merged);

fd = inotify_init1(IN_NONBLOCK);
if (fd < 0) {
pr_perror("inotify_init1 failed");
goto umount;
}

wd = inotify_add_watch(fd, test_file, IN_MODIFY | IN_ACCESS);
if (wd < 0) {
pr_perror("inotify_add_watch failed on %s", test_file);
goto umount;
}

test_msg("Added inotify watch (wd=%d) on %s\n", wd, test_file);

test_daemon();
test_waitsig();

/*
* After restore, verify the watch still works by triggering
* an event and reading it.
*/
real_fd = open(test_file, O_RDONLY);
if (real_fd < 0) {
fail("Can't open %s after restore", test_file);
goto umount;
}
close(real_fd);

len = read(fd, buf, sizeof(buf));
if (len <= 0) {
fail("No inotify events after restore (len=%d)", len);
goto umount;
}

pass();

umount:
close(fd);
umount2(merged, MNT_DETACH);
return 0;
}
1 change: 1 addition & 0 deletions test/zdtm/static/inotify_overlayfs.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'flags': 'suid', 'feature': 'overlayfs'}