diff --git a/.github/actions/build/action.yml b/.github/actions/build/action.yml index 80e8b688..c55ae487 100644 --- a/.github/actions/build/action.yml +++ b/.github/actions/build/action.yml @@ -22,7 +22,8 @@ runs: lib${{ inputs.fuse }}-dev \ meson \ pkg-config \ - uuid-runtime + uuid-runtime \ + libcap-dev echo "::endgroup::" diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index f1e84f78..d303d620 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -27,7 +27,7 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install -qq gcc clang meson - sudo apt-get install -qq libfuse-dev uuid-runtime python3 python3-setuptools + sudo apt-get install -qq libcap-dev libfuse-dev uuid-runtime python3 python3-setuptools - name: Run coverity run: | diff --git a/lxcfs.spec.in b/lxcfs.spec.in index 90bb033b..b224508b 100644 --- a/lxcfs.spec.in +++ b/lxcfs.spec.in @@ -33,6 +33,7 @@ BuildRequires: libtool BuildRequires: docbook2X BuildRequires: doxygen BuildRequires: fuse-devel +BuildRequires: libcap-devel Requires: fuse-libs %description diff --git a/meson.build b/meson.build index 1073afb4..8819edbf 100644 --- a/meson.build +++ b/meson.build @@ -223,6 +223,8 @@ liblxcfs_sources = files( 'src/syscall_numbers.h', 'src/sysfs_fuse.c', 'src/sysfs_fuse.h', + 'src/lxcfsctl_fuse.c', + 'src/lxcfsctl_fuse.h', 'src/utils.c', 'src/utils.h') diff --git a/src/api_extensions.h b/src/api_extensions.h index ccf9bf59..b7f1e426 100644 --- a/src/api_extensions.h +++ b/src/api_extensions.h @@ -30,6 +30,7 @@ static char *api_extensions[] = { "cpuview_daemon", "loadavg_daemon", "pidfds", + "per_instance_configuration", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/bindings.c b/src/bindings.c index 061a6102..8d6f9606 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -45,6 +45,11 @@ #define BASEDIR "/lxcfs/controllers" #define ROOTDIR "/lxcfs/root" +feature_t per_instance_features[63] = { + { .name = "disable_uptime", }, + { } +}; + static bool can_use_pidfd; static bool can_use_swap; static bool can_use_zswap; @@ -113,35 +118,8 @@ static int pivot_root(const char *new_root, const char *put_old) extern int pivot_root(const char *new_root, const char *put_old); #endif -/* - * A table caching which pid is init for a pid namespace. - * When looking up which pid is init for $qpid, we first - * 1. Stat /proc/$qpid/ns/pid. - * 2. Check whether the ino_t is in our store. - * a. if not, fork a child in qpid's ns to send us - * ucred.pid = 1, and read the initpid. Cache - * initpid and creation time for /proc/initpid - * in a new store entry. - * b. if so, verify that /proc/initpid still matches - * what we have saved. If not, clear the store - * entry and go back to a. If so, return the - * cached initpid. - */ -struct pidns_init_store { - ino_t ino; /* inode number for /proc/$pid/ns/pid */ - pid_t initpid; /* the pid of nit in that ns */ - int init_pidfd; - int64_t ctime; /* the time at which /proc/$initpid was created */ - struct pidns_init_store *next; - int64_t lastcheck; -}; - -/* lol - look at how they are allocated in the kernel */ -#define PIDNS_HASH_SIZE 4096 -#define HASH(x) ((x) % PIDNS_HASH_SIZE) - -static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE]; -static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER; +static struct pidns_store **pidns_hash_table; +static pthread_mutex_t *pidns_store_mutex; static void mutex_lock(pthread_mutex_t *l) { @@ -165,12 +143,12 @@ static void mutex_unlock(pthread_mutex_t *l) static inline void store_lock(void) { - mutex_lock(&pidns_store_mutex); + mutex_lock(pidns_store_mutex); } static inline void store_unlock(void) { - mutex_unlock(&pidns_store_mutex); + mutex_unlock(pidns_store_mutex); } #define define_interruptible_lock(type, lockname, lockfn) \ @@ -201,7 +179,7 @@ define_interruptible_lock(pthread_rwlock_t, rwlock_wrlock, pthread_rwlock_timedw #define LXCFS_PROC_PID_LEN \ (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1) -static int initpid_still_valid_pidfd(struct pidns_init_store *entry) +static int initpid_still_valid_pidfd(struct pidns_store *entry) { int ret; @@ -219,7 +197,7 @@ static int initpid_still_valid_pidfd(struct pidns_init_store *entry) return 1; } -static int initpid_still_valid_stat(struct pidns_init_store *entry) +static int initpid_still_valid_stat(struct pidns_store *entry) { struct stat st; char path[LXCFS_PROC_PID_LEN]; @@ -232,7 +210,7 @@ static int initpid_still_valid_stat(struct pidns_init_store *entry) } /* Must be called under store_lock */ -static bool initpid_still_valid(struct pidns_init_store *entry) +static bool initpid_still_valid(struct pidns_store *entry) { int ret; @@ -243,20 +221,28 @@ static bool initpid_still_valid(struct pidns_init_store *entry) return ret == 1; } +static void free_initpid(struct pidns_store *entry) +{ + close_prot_errno_disarm(entry->init_pidfd); + free_disarm(entry); +} + /* Must be called under store_lock */ -static void remove_initpid(struct pidns_init_store *entry) +static void remove_initpid(struct pidns_store *entry) { - struct pidns_init_store *it; + struct pidns_store *it; int ino_hash; + if (!pidns_hash_table) + return; + lxcfs_debug("Removing cached entry for pid %d from init pid cache", entry->initpid); ino_hash = HASH(entry->ino); if (pidns_hash_table[ino_hash] == entry) { pidns_hash_table[ino_hash] = entry->next; - close_prot_errno_disarm(entry->init_pidfd); - free_disarm(entry); + free_initpid(entry); return; } @@ -264,14 +250,19 @@ static void remove_initpid(struct pidns_init_store *entry) while (it) { if (it->next == entry) { it->next = entry->next; - close_prot_errno_disarm(entry->init_pidfd); - free_disarm(entry); + free_initpid(entry); return; } it = it->next; } } +static bool keep_pidns_entry(struct pidns_store *entry) +{ + return (entry->version >= 1) && entry->keep_on_reload && + initpid_still_valid(entry); +} + #define PURGE_SECS 5 /* Must be called under store_lock */ static void prune_initpid_store(void) @@ -279,6 +270,9 @@ static void prune_initpid_store(void) static int64_t last_prune = 0; int64_t now, threshold; + if (!pidns_hash_table) + return; + if (!last_prune) { last_prune = time(NULL); return; @@ -294,20 +288,22 @@ static void prune_initpid_store(void) threshold = now - 2 * PURGE_SECS; for (int i = 0; i < PIDNS_HASH_SIZE; i++) { - for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { - if (entry->lastcheck < threshold) { - struct pidns_init_store *cur = entry; + for (struct pidns_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { + struct pidns_store *cur = entry; - lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); + if ((entry->lastcheck < threshold) && + !keep_pidns_entry(cur)) { + lxcfs_debug("Removed cache entry for pid %d from init pid cache", cur->initpid); if (prev) prev->next = entry->next; else pidns_hash_table[i] = entry->next; entry = entry->next; - close_prot_errno_disarm(cur->init_pidfd); - free_disarm(cur); + free_initpid(cur); } else { + lxcfs_debug("Kept cache entry for pid %d in init pid cache", cur->initpid); + prev = entry; entry = entry->next; } @@ -317,32 +313,68 @@ static void prune_initpid_store(void) static void clear_initpid_store(void) { + if (!pidns_hash_table) + return; + store_lock(); for (int i = 0; i < PIDNS_HASH_SIZE; i++) { - for (struct pidns_init_store *entry = pidns_hash_table[i]; entry;) { - struct pidns_init_store *cur = entry; + for (struct pidns_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { + struct pidns_store *cur = entry; + + if (keep_pidns_entry(cur)) { + lxcfs_debug("Kept cache entry for pid %d in init pid cache", cur->initpid); + + prev = entry; + entry = entry->next; + } else { + lxcfs_debug("Removed cache entry for pid %d from init pid cache", cur->initpid); + + if (prev) + prev->next = entry->next; + else + pidns_hash_table[i] = entry->next; + entry = entry->next; + free_initpid(cur); + } + } + } + store_unlock(); +} + +int iter_initpid_store(pidns_store_iter_func_t f, void *data) +{ + int ret; - lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); + if (!pidns_hash_table) + return 0; - pidns_hash_table[i] = entry->next; - entry = entry->next; - close_prot_errno_disarm(cur->init_pidfd); - free_disarm(cur); + store_lock(); + for (int i = 0; i < PIDNS_HASH_SIZE; i++) { + for (struct pidns_store *entry = pidns_hash_table[i]; entry; entry = entry->next) { + ret = f(entry, data); + if (ret) + goto out; } } + +out: store_unlock(); + return ret; } /* Must be called under store_lock */ static void save_initpid(ino_t pidns_inode, pid_t pid) { - __do_free struct pidns_init_store *entry = NULL; + __do_free struct pidns_store *entry = NULL; __do_close int pidfd = -EBADF; const struct lxcfs_opts *opts = fuse_get_context()->private_data; char path[LXCFS_PROC_PID_LEN]; struct stat st; int ino_hash; + if (!pidns_hash_table) + return; + if (opts && opts->use_pidfd && can_use_pidfd) { pidfd = pidfd_open(pid, 0); if (pidfd < 0) @@ -358,44 +390,61 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) return; ino_hash = HASH(pidns_inode); - *entry = (struct pidns_init_store){ + *entry = (struct pidns_store){ + .version = 2, .ino = pidns_inode, .initpid = pid, .ctime = st.st_ctime, .next = pidns_hash_table[ino_hash], .lastcheck = time(NULL), .init_pidfd = move_fd(pidfd), + .keep_on_reload = false, }; pidns_hash_table[ino_hash] = move_ptr(entry); lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid); } -/* - * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store - * entry for the inode number and creation time. Verify that the init pid - * is still valid. If not, remove it. Return the entry if valid, NULL - * otherwise. - * Must be called under store_lock - */ -static pid_t lookup_verify_initpid(ino_t pidns_inode) +static struct pidns_store *lookup_verify_pidns_entry(ino_t pidns_inode) { - struct pidns_init_store *entry = pidns_hash_table[HASH(pidns_inode)]; + struct pidns_store *entry; + + if (!pidns_hash_table) + return NULL; + + entry = pidns_hash_table[HASH(pidns_inode)]; while (entry) { if (entry->ino == pidns_inode) { if (initpid_still_valid(entry)) { entry->lastcheck = time(NULL); - return entry->initpid; + return entry; } remove_initpid(entry); - return ret_errno(ESRCH); + return NULL; } entry = entry->next; } - return ret_errno(ESRCH); + return NULL; +} + +/* + * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store + * entry for the inode number and creation time. Verify that the init pid + * is still valid. If not, remove it. Return the entry if valid, NULL + * otherwise. + * Must be called under store_lock + */ +static pid_t lookup_verify_initpid(ino_t pidns_inode) +{ + struct pidns_store *entry = lookup_verify_pidns_entry(pidns_inode); + + if (!entry) + return ret_errno(ESRCH); + + return entry->initpid; } static bool send_creds_ok(int sock_fd) @@ -563,20 +612,33 @@ static pid_t scm_init_pid(pid_t task) return pid_ret; } -pid_t lookup_initpid_in_store(pid_t pid) +static ino_t get_pidns_ino(pid_t pid) { - pid_t hashed_pid = 0; char path[LXCFS_PROC_PID_NS_LEN]; struct stat st; snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid); if (stat(path, &st)) + return 0; + + return st.st_ino; +} + +pid_t lookup_initpid_in_store(pid_t pid) +{ + pid_t hashed_pid = 0; + ino_t pidns_ino; + + pidns_ino = get_pidns_ino(pid); + if (!pidns_ino) return ret_errno(ESRCH); store_lock(); - hashed_pid = lookup_verify_initpid(st.st_ino); + hashed_pid = lookup_verify_initpid(pidns_ino); if (hashed_pid < 0) { + pid_t already_hashed_pid; + /* release the mutex as the following call is expensive */ store_unlock(); @@ -584,8 +646,20 @@ pid_t lookup_initpid_in_store(pid_t pid) store_lock(); - if (hashed_pid > 0) - save_initpid(st.st_ino, hashed_pid); + /* recheck that entry wasn't added while lock was released */ + already_hashed_pid = lookup_verify_initpid(pidns_ino); + + /* no existing entry found. Just add a new one. */ + if (already_hashed_pid < 0) { + if (hashed_pid > 0) + save_initpid(pidns_ino, hashed_pid); + + /* entry found it must have the same pid */ + } else if (already_hashed_pid != hashed_pid) { + lxcfs_error("Different init pids (%d, %d) for the same cache entry %lu\n", + already_hashed_pid, hashed_pid, HASH(pidns_ino)); + hashed_pid = -1; + } } /* @@ -598,6 +672,55 @@ pid_t lookup_initpid_in_store(pid_t pid) return hashed_pid; } +bool check_set_lxcfs_feature(pid_t pid, enum lxcfs_feature_op op, __u64 feature) +{ + bool ret = false; + struct pidns_store *entry; + ino_t pidns_ino; + + pidns_ino = get_pidns_ino(pid); + if (!pidns_ino) + return ret; + + store_lock(); + + entry = lookup_verify_pidns_entry(pidns_ino); + if (!entry) + goto out; + + if (entry->version < 2) + goto out; + + switch (op) { + case LXCFS_FEATURE_CHECK: + ret = entry->features & feature; + + break; + case LXCFS_FEATURE_SET: + entry->features |= feature; + + /* + * As we have enabled feature, this entry + * must be kept across lxcfs live reloads. + */ + entry->keep_on_reload = true; + + ret = true; + + break; + case LXCFS_FEATURE_CLEAR: + entry->features &= ~feature; + ret = true; + + break; + } + +out: + store_unlock(); + + return ret; +} + /* * Functions needed to setup cgroups in the __constructor__. */ @@ -1020,11 +1143,23 @@ void lxcfslib_init(void) lxcfs_info("Failed to run constructor %s to reload liblxcfs", __func__); } +static bool old_daemon = false; + static void __attribute__((destructor)) lxcfs_exit(void) { lxcfs_info("Running destructor %s", __func__); clear_initpid_store(); + + if (old_daemon) { + if (pidns_store_mutex) { + pthread_mutex_destroy(pidns_store_mutex); + free(pidns_store_mutex); + } + + free(pidns_hash_table); + } + free_cpuview(); cgroup_exit(cgroup_ops); } @@ -1033,6 +1168,7 @@ void *lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) { struct fuse_context *fc = fuse_get_context(); struct lxcfs_opts *opts = fc ? fc->private_data : NULL; + struct lxcfs_persistent_data *lxcfs_data = data; #if HAVE_FUSE_RETURNS_DT_TYPE can_use_sys_cpu = true; @@ -1047,5 +1183,30 @@ void *lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) /* initialize the library */ lxcfslib_init(); + if (lxcfs_data) { + pidns_hash_table = lxcfs_data->pidns_hash_table; + pidns_store_mutex = &lxcfs_data->pidns_store_mutex; + } else { + lxcfs_info("Fallback way to initialize liblxcfs with old daemon binary. Please, consider full restart."); + + old_daemon = true; + + pidns_hash_table = zalloc(PIDNS_HASH_SIZE * sizeof(struct pidns_store *)); + if (!pidns_hash_table) + goto err; + + pidns_store_mutex = malloc(PIDNS_HASH_SIZE * sizeof(*pidns_store_mutex)); + if (!pidns_store_mutex) + goto err; + + if (pthread_mutex_init(pidns_store_mutex, NULL)) + goto err; + } + return opts; + +err: + lxcfs_error("liblxcfs failed to initialize. Turning off LXCFS virtualization.\n"); + reload_successful = 0; + return NULL; } diff --git a/src/bindings.h b/src/bindings.h index f65e42dc..c745f94b 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -77,6 +78,14 @@ enum lxcfs_virt_t { LXC_TYPE_PROC_PRESSURE_MEMORY, #define LXC_TYPE_PROC_PRESSURE_MEMORY_PATH "/proc/pressure/memory" + + LXC_TYPE_LXCFS, + LXC_TYPE_LXCFS_PIDNS_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE, + LXC_TYPE_MAX, }; @@ -85,6 +94,7 @@ enum lxcfs_virt_t { #define LXCFS_TYPE_PROC(type) ((type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO) || \ (type >= LXC_TYPE_PROC && type <= LXC_TYPE_PROC_PRESSURE_MEMORY)) #define LXCFS_TYPE_SYS(type) (type >= LXC_TYPE_SYS && type <= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE) +#define LXCFS_TYPE_LXCFS(type) (type >= LXC_TYPE_LXCFS && type < LXC_TYPE_MAX) #define LXCFS_TYPE_OK(type) (type >= LXC_TYPE_CGDIR && type < LXC_TYPE_MAX) /* @@ -126,6 +136,7 @@ struct file_info { void *private_data; }; }; + int type; char *buf; /* unused */ int buflen; @@ -133,6 +144,68 @@ struct file_info { int cached; }; +typedef struct feature { + char *name; +} feature_t; + +extern feature_t per_instance_features[]; + +enum lxcfs_feature_op { + LXCFS_FEATURE_CHECK, + LXCFS_FEATURE_SET, + LXCFS_FEATURE_CLEAR, +}; + +/* + * A table caching which pid is init for a pid namespace. + * When looking up which pid is init for $qpid, we first + * 1. Stat /proc/$qpid/ns/pid. + * 2. Check whether the ino_t is in our store. + * a. if not, fork a child in qpid's ns to send us + * ucred.pid = 1, and read the initpid. Cache + * initpid and creation time for /proc/initpid + * in a new store entry. + * b. if so, verify that /proc/initpid still matches + * what we have saved. If not, clear the store + * entry and go back to a. If so, return the + * cached initpid. + */ +struct pidns_store { + /* increase version if the structure was changed */ + __u16 version; + + /* hash table key */ + ino_t ino; /* inode number for /proc/$pid/ns/pid */ + + /* next entry in hash table's bucket */ + struct pidns_store *next; + + pid_t initpid; /* the pid of init in that ns */ + int init_pidfd; + int64_t ctime; /* the time at which /proc/$initpid was created */ + int64_t lastcheck; + + /* Do not free on liblxcfs reload (contains useful persistent data) */ + bool keep_on_reload; + +#define LXCFS_FEATURES_DISABLE_UPTIME (1 << 0) + /* bit mask for per-instance configuration options (on/off) */ + __u64 features; +}; + +/* lol - look at how they are allocated in the kernel */ +#define PIDNS_HASH_SIZE 4096 +#define HASH(x) ((x) % PIDNS_HASH_SIZE) + +/* structure that contains data that should survive reload */ +struct lxcfs_persistent_data { + /* increase version if the structure was changed */ + __u16 version; + + struct pidns_store **pidns_hash_table; + pthread_mutex_t pidns_store_mutex; +}; + struct lxcfs_opts { bool swap_off; bool use_pidfd; @@ -157,8 +230,11 @@ typedef enum lxcfs_opt_t { LXCFS_OPTS_MAX = LXCFS_PSI_POLL_ON, } lxcfs_opt_t; +typedef int (*pidns_store_iter_func_t) (struct pidns_store *cur, void *data); +extern int iter_initpid_store(pidns_store_iter_func_t f, void *data); extern pid_t lookup_initpid_in_store(pid_t qpid); +extern bool check_set_lxcfs_feature(pid_t pid, enum lxcfs_feature_op op, __u64 feature); extern void prune_init_slice(char *cg); extern bool supports_pidfd(void); extern bool liblxcfs_functional(void); diff --git a/src/lxcfs.c b/src/lxcfs.c index af14e3bf..76241540 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -71,6 +71,40 @@ static inline void users_unlock(void) unlock_mutex(&user_count_mutex); } +static struct lxcfs_persistent_data *lxcfs_data; + +struct lxcfs_persistent_data *alloc_lxcfs_data(void) +{ + struct lxcfs_persistent_data *data; + + data = zalloc(sizeof(struct lxcfs_persistent_data)); + if (!data) + return NULL; + + data->version = 1; + + data->pidns_hash_table = zalloc(PIDNS_HASH_SIZE * sizeof(struct pidns_store *)); + if (!data->pidns_hash_table) + goto err; + + if (pthread_mutex_init(&data->pidns_store_mutex, NULL)) + goto err; + + return data; + +err: + free(data->pidns_hash_table); + free(data); + return NULL; +} + +void free_lxcfs_data(struct lxcfs_persistent_data *data) +{ + pthread_mutex_destroy(&data->pidns_store_mutex); + free(data->pidns_hash_table); + free(data); +} + /* Returns file info type of custom type declaration carried * in fuse_file_info */ static inline enum lxcfs_virt_t file_info_type(struct fuse_file_info *fi) @@ -151,18 +185,18 @@ static int stop_loadavg(void) static volatile sig_atomic_t need_reload; -static int do_lxcfs_fuse_init(void) +static int do_lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) { char *error; - void *(*__lxcfs_fuse_init)(struct fuse_conn_info * conn, void * cfg); + void *(*__lxcfs_fuse_init)(struct fuse_conn_info *, void *); dlerror(); - __lxcfs_fuse_init = (void *(*)(struct fuse_conn_info * conn, void * cfg))dlsym(dlopen_handle, "lxcfs_fuse_init"); + __lxcfs_fuse_init = (void *(*)(struct fuse_conn_info *, void *))dlsym(dlopen_handle, "lxcfs_fuse_init"); error = dlerror(); if (error) return log_error(-1, "%s - Failed to find lxcfs_fuse_init()", error); - __lxcfs_fuse_init(NULL, NULL); + __lxcfs_fuse_init(conn, data); return 0; } @@ -209,7 +243,7 @@ static void do_reload(bool reinit) lxcfs_debug("Opened %s", lxcfs_lib_path); good: - if (reinit && do_lxcfs_fuse_init() < 0) { + if (reinit && do_lxcfs_fuse_init(NULL, lxcfs_data) < 0) { log_exit("Failed to initialize liblxcfs.so"); } @@ -266,6 +300,7 @@ static int do_##type##_##fsop(LIB_FS_##fsop##_OP_ARGS_TYPE) \ DEF_LIB_FS_OP(cg , getattr) DEF_LIB_FS_OP(proc , getattr) DEF_LIB_FS_OP(sys , getattr) +DEF_LIB_FS_OP(lxcfsctl, getattr) #define LIB_FS_read_OP_ARGS_TYPE const char *path, char *buf, size_t size, \ off_t offset, struct fuse_file_info *fi @@ -273,6 +308,7 @@ DEF_LIB_FS_OP(sys , getattr) DEF_LIB_FS_OP(cg , read) DEF_LIB_FS_OP(proc , read) DEF_LIB_FS_OP(sys , read) +DEF_LIB_FS_OP(lxcfsctl, read) #define LIB_FS_write_OP_ARGS_TYPE const char *path, const char *buf, size_t size, \ off_t offset, struct fuse_file_info *fi @@ -280,6 +316,7 @@ DEF_LIB_FS_OP(sys , read) DEF_LIB_FS_OP(cg , write) DEF_LIB_FS_OP(proc , write) DEF_LIB_FS_OP(sys , write) +DEF_LIB_FS_OP(lxcfsctl, write) #define LIB_FS_poll_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi, \ struct fuse_pollhandle *ph, unsigned *reventsp @@ -308,40 +345,47 @@ DEF_LIB_FS_OP(cg, chmod) DEF_LIB_FS_OP(cg , readdir) DEF_LIB_FS_OP(proc , readdir) DEF_LIB_FS_OP(sys , readdir) +DEF_LIB_FS_OP(lxcfsctl, readdir) #define LIB_FS_readlink_OP_ARGS_TYPE const char *path, char *buf, size_t size #define LIB_FS_readlink_OP_ARGS path, buf, size DEF_LIB_FS_OP(sys , readlink) +DEF_LIB_FS_OP(lxcfsctl, readlink) #define LIB_FS_open_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_open_OP_ARGS path, fi DEF_LIB_FS_OP(cg , open) DEF_LIB_FS_OP(proc , open) DEF_LIB_FS_OP(sys , open) +DEF_LIB_FS_OP(lxcfsctl, open) #define LIB_FS_access_OP_ARGS_TYPE const char *path, int mode #define LIB_FS_access_OP_ARGS path, mode DEF_LIB_FS_OP(cg , access) DEF_LIB_FS_OP(proc , access) DEF_LIB_FS_OP(sys , access) +DEF_LIB_FS_OP(lxcfsctl, access) #define LIB_FS_opendir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_opendir_OP_ARGS path, fi DEF_LIB_FS_OP(cg , opendir) DEF_LIB_FS_OP(proc , opendir) DEF_LIB_FS_OP(sys , opendir) +DEF_LIB_FS_OP(lxcfsctl, opendir) #define LIB_FS_release_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_release_OP_ARGS path, fi DEF_LIB_FS_OP(cg , release) DEF_LIB_FS_OP(proc , release) DEF_LIB_FS_OP(sys , release) +DEF_LIB_FS_OP(lxcfsctl, release) #define LIB_FS_releasedir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_releasedir_OP_ARGS path, fi DEF_LIB_FS_OP(cg , releasedir) DEF_LIB_FS_OP(proc , releasedir) DEF_LIB_FS_OP(sys , releasedir) +DEF_LIB_FS_OP(lxcfsctl, releasedir) static bool cgroup_is_enabled = false; @@ -386,6 +430,13 @@ static int lxcfs_getattr(const char *path, struct stat *sb) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_getattr(path, sb); + down_users(); + return ret; + } + return -ENOENT; } @@ -417,6 +468,13 @@ static int lxcfs_opendir(const char *path, struct fuse_file_info *fi) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_opendir(path, fi); + down_users(); + return ret; + } + return -ENOENT; } @@ -436,6 +494,7 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, if (strcmp(path, "/") == 0) { if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "lxcfs", 0) != 0 || dir_filler(filler, buf, "proc", 0) != 0 || dir_filler(filler, buf, "sys", 0) != 0 || (cgroup_is_enabled && dir_filler(filler, buf, "cgroup", 0) != 0)) @@ -465,6 +524,13 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_readdir(path, buf, filler, offset, fi); + down_users(); + return ret; + } + return -ENOENT; } @@ -496,6 +562,13 @@ static int lxcfs_access(const char *path, int mode) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_access(path, mode); + down_users(); + return ret; + } + return -EACCES; } @@ -526,6 +599,14 @@ static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi) down_users(); return ret; } + + if (LXCFS_TYPE_LXCFS(type)) { + up_users(); + ret = do_lxcfsctl_releasedir(path, fi); + down_users(); + return ret; + } + if (path && strcmp(path, "/") == 0) return 0; @@ -560,6 +641,13 @@ static int lxcfs_open(const char *path, struct fuse_file_info *fi) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_open(path, fi); + down_users(); + return ret; + } + return -EACCES; } @@ -592,6 +680,13 @@ static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_read(path, buf, size, offset, fi); + down_users(); + return ret; + } + lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64, path, type, fi->fh); @@ -627,6 +722,13 @@ int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_write(path, buf, size, offset, fi); + down_users(); + return ret; + } + return -EINVAL; } @@ -662,6 +764,13 @@ int lxcfs_readlink(const char *path, char *buf, size_t size) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_readlink(path, buf, size); + down_users(); + return ret; + } + return -EINVAL; } @@ -698,6 +807,13 @@ static int lxcfs_release(const char *path, struct fuse_file_info *fi) return ret; } + if (LXCFS_TYPE_LXCFS(type)) { + up_users(); + ret = do_lxcfsctl_release(path, fi); + down_users(); + return ret; + } + lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64, path, type, fi->fh); @@ -744,6 +860,9 @@ int lxcfs_chown(const char *path, uid_t uid, gid_t gid) if (strncmp(path, "/sys", 4) == 0) return -EPERM; + if (strncmp(path, "/lxcfs", 6) == 0) + return -EPERM; + return -ENOENT; } @@ -764,6 +883,9 @@ int lxcfs_truncate(const char *path, off_t newsize) if (strncmp(path, "/sys", 4) == 0) return 0; + if (strncmp(path, "/lxcfs", 6) == 0) + return 0; + return -EPERM; } @@ -802,6 +924,9 @@ int lxcfs_chmod(const char *path, mode_t mode) if (strncmp(path, "/sys", 4) == 0) return -EPERM; + if (strncmp(path, "/lxcfs", 6) == 0) + return -EPERM; + return -ENOENT; } @@ -847,7 +972,7 @@ static void *lxcfs_init(struct fuse_conn_info *conn, struct fuse_config *cfg) static void *lxcfs_init(struct fuse_conn_info *conn) #endif { - if (do_lxcfs_fuse_init() < 0) + if (do_lxcfs_fuse_init(conn, lxcfs_data) < 0) return NULL; #if HAVE_FUSE3 @@ -1041,6 +1166,12 @@ int main(int argc, char *argv[]) struct lxcfs_opts *opts; char *runtime_path_arg = NULL; + lxcfs_data = alloc_lxcfs_data(); + if (lxcfs_data == NULL) { + lxcfs_error("Error allocating memory for lxcfs persistent data"); + goto out; + } + opts = malloc(sizeof(struct lxcfs_opts)); if (opts == NULL) { lxcfs_error("Error allocating memory for options"); @@ -1262,6 +1393,7 @@ int main(int argc, char *argv[]) unlink(pidfile); free(new_fuse_opts); free(opts); + free_lxcfs_data(lxcfs_data); close_prot_errno_disarm(pidfile_fd); exit(ret); } diff --git a/src/lxcfsctl_fuse.c b/src/lxcfsctl_fuse.c new file mode 100644 index 00000000..36833651 --- /dev/null +++ b/src/lxcfsctl_fuse.c @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sysfs_fuse.h" + +#include "bindings.h" +#include "memory_utils.h" +#include "cgroups/cgroup.h" +#include "lxcfs_fuse_compat.h" +#include "utils.h" + +typedef struct lxcfsctl_dentry_data { + int type; + pid_t initpid; + int feature; +} lxcfsctl_dentry_data_t; + +static int lxcfsctl_get_dentry_type(const char *path, lxcfsctl_dentry_data_t *data) +{ + struct fuse_context *fc = fuse_get_context(); + + memset(data, 0, sizeof(*data)); + + if (strcmp(path, "/lxcfs") == 0) { + data->type = LXC_TYPE_LXCFS; + + return 0; + } else if (strcmp(path, "/lxcfs/pidns") == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_SUBDIR; + + return 0; + } else if (strncmp(path, "/lxcfs/pidns/", STRLITERALLEN("/lxcfs/pidns/")) == 0) { + unsigned int pid; + char subpathbuf[101] = { 0 }; + const char *subpath; + + if (strncmp(path, "/lxcfs/pidns/current", STRLITERALLEN("/lxcfs/pidns/current")) == 0) { + pid = fc->pid; + subpath = path + STRLITERALLEN("/lxcfs/pidns/current"); + } else { + int i; + + /* this path is only allowed for host admin */ + if (!proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN)) + return -ENOENT; + + i = sscanf(path, "/lxcfs/pidns/%u%100s", &pid, subpathbuf); + if (i < 1) + return -ENOENT; + subpathbuf[sizeof(subpathbuf)-1] = '\0'; + subpath = subpathbuf; + } + + if (lookup_initpid_in_store(pid) < 0) + return -ENOENT; + + data->initpid = pid; + + if (strlen(subpath) == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR; + + return 0; + } else if (strcmp(subpath, "/features") == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR; + + return 0; + } else if (strncmp(subpath, "/features/", STRLITERALLEN("/features/")) == 0) { + const char *feature = subpath + STRLITERALLEN("/features/"); + + for (int i = 0; per_instance_features[i].name; i++) { + if (strcmp(per_instance_features[i].name, feature) == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE; + data->feature = i; + return 0; + } + } + + return -ENOENT; + } + } + + return -ENOENT; +} + +__lxcfs_fuse_ops int lxcfsctl_getattr(const char *path, struct stat *sb) +{ + lxcfsctl_dentry_data_t d_data; + struct timespec now; + + memset(sb, 0, sizeof(struct stat)); + if (clock_gettime(CLOCK_REALTIME, &now) < 0) + return -EINVAL; + + sb->st_uid = sb->st_gid = 0; + sb->st_atim = sb->st_mtim = sb->st_ctim = now; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + switch (d_data.type) { + case LXC_TYPE_LXCFS: + case LXC_TYPE_LXCFS_PIDNS_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR: + sb->st_mode = S_IFDIR | 00555; + sb->st_nlink = 2; + + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + sb->st_size = 1; + + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + return -ENOENT; +} + +static int lxcfs_features_dir_filler(fuse_fill_dir_t filler, void *buf) +{ + for (int i = 0; per_instance_features[i].name; i++) { + if (dir_filler(filler, buf, per_instance_features[i].name, 0) != 0) + return -ENOENT; + } + + return 0; +} + +struct pidns_iter_filler_args { + fuse_fill_dir_t filler; + void *buf; +}; + +int features_dir_filler(struct pidns_store *cur, void *data) +{ + struct pidns_iter_filler_args *args = data; + char dname[INTTYPE_TO_STRLEN(typeof(cur->initpid))]; + + snprintf(dname, sizeof(dname), "%d", cur->initpid); + + if (dir_filler(args->filler, args->buf, dname, 0) != 0) + return -ENOENT; + + return 0; +} + +static int lxcfs_pidns_dir_filler(fuse_fill_dir_t filler, void *buf) +{ + struct pidns_iter_filler_args args = { + .filler = filler, + .buf = buf, + }; + + return iter_initpid_store(features_dir_filler, &args); +} + +__lxcfs_fuse_ops int lxcfsctl_readdir(const char *path, void *buf, + fuse_fill_dir_t filler, off_t offset, + struct fuse_file_info *fi) +{ + struct fuse_context *fc = fuse_get_context(); + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "pidns", 0) != 0) + return -ENOENT; + return 0; + case LXC_TYPE_LXCFS_PIDNS_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "current", 0) != 0) + return -ENOENT; + + /* show all pid namespaces for the host admin */ + if (proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN) && + lxcfs_pidns_dir_filler(filler, buf)) + return -ENOENT; + + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "features", 0) != 0) + return -ENOENT; + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + return lxcfs_features_dir_filler(filler, buf); + } + + return -EINVAL; +} + +typedef struct lxcfsctl_file_data { + /* increase version if the structure was changed */ + __u16 version; + + pid_t initpid; + int feature; +} lxcfsctl_file_data_t; + +__lxcfs_fuse_ops int lxcfsctl_open(const char *path, struct fuse_file_info *fi) +{ + struct fuse_context *fc = fuse_get_context(); + __do_free struct file_info *info = NULL; + int type = -1; + __do_free lxcfsctl_file_data_t *private_data = NULL; + lxcfsctl_dentry_data_t d_data; + + if (!liblxcfs_functional()) + return -EIO; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + if (!proc_has_capability_in(d_data.initpid, fc->pid, CAP_SYS_ADMIN) && + !proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN)) + return -EACCES; + + type = d_data.type; + if (type == -1) + return -ENOENT; + + private_data = zalloc(sizeof(*private_data)); + if (!private_data) + return -EIO; + + private_data->version = 1; + private_data->initpid = d_data.initpid; + private_data->feature = d_data.feature; + + info = zalloc(sizeof(*info)); + if (!info) + return -ENOMEM; + + info->type = type; + info->buflen = BUF_RESERVE_SIZE; + + info->buf = malloc(info->buflen); + if (!info->buf) + return -ENOMEM; + + memset(info->buf, 0, info->buflen); + /* set actual size to buffer size */ + info->size = info->buflen; + + info->private_data = move_ptr(private_data); + + fi->fh = PTR_TO_UINT64(move_ptr(info)); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_release(const char *path, struct fuse_file_info *fi) +{ + struct file_info *f; + + f = INTTYPE_TO_PTR(fi->fh); + if (!f) + return 0; + + /* free lxcfsctl_file_data_t */ + free_disarm(f->private_data); + + do_release_file_info(fi); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_opendir(const char *path, struct fuse_file_info *fi) +{ + __do_free struct file_info *dir_info = NULL; + int type = -1; + lxcfsctl_dentry_data_t d_data; + + if (!liblxcfs_functional()) + return -EIO; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + type = d_data.type; + + if (type == -1) + return -ENOENT; + + dir_info = zalloc(sizeof(*dir_info)); + if (!dir_info) + return -ENOMEM; + + dir_info->type = type; + dir_info->buf = NULL; + dir_info->file = NULL; + dir_info->buflen = 0; + + fi->fh = PTR_TO_UINT64(move_ptr(dir_info)); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_releasedir(const char *path, struct fuse_file_info *fi) +{ + do_release_file_info(fi); + return 0; +} + +static int lxcfsctl_read_feature(char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + ssize_t total_len = 0, ret = 0; + lxcfsctl_dentry_data_t *d_data = d->private_data; + bool state; + + if (offset) { + size_t left; + + if (offset > d->size) + return -EINVAL; + + if (!d->cached) + return 0; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + state = check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_CHECK, (1 << d_data->feature)); + + ret = snprintf(d->buf, d->buflen, "%d\n", state); + if (ret < 0 || ret >= d->buflen) + return -EIO; + total_len = ret; + + d->cached = 1; + d->size = total_len; + if ((size_t)total_len > size) + total_len = size; + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int lxcfsctl_write_feature(const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + lxcfsctl_dentry_data_t *d_data; + + if (!size) + return -EINVAL; + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + if (f->type != LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE) + return -EINVAL; + + d_data = f->private_data; + if (!d_data) + return -EIO; + + if (buf[0] == '0') { + if (!check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_CLEAR, (1 << d_data->feature))) + return -EIO; + + return size; + } else if (buf[0] == '1') { + if (!check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_SET, (1 << d_data->feature))) + return -EIO; + + return size; + } + + return -EINVAL; +} + +__lxcfs_fuse_ops int lxcfsctl_write(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!size) + return -EINVAL; + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + return lxcfsctl_write_feature(buf, size, offset, fi); + } + + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_read(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + return lxcfsctl_read_feature(buf, size, offset, fi); + } + + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_access(const char *path, int mask) +{ + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_readlink(const char *path, char *buf, size_t size) +{ + return -EOPNOTSUPP; +} diff --git a/src/lxcfsctl_fuse.h b/src/lxcfsctl_fuse.h new file mode 100644 index 00000000..bb3d6d41 --- /dev/null +++ b/src/lxcfsctl_fuse.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef __LXCFSCTL_FUSE_H +#define __LXCFSCTL_FUSE_H + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "lxcfs_fuse.h" + +#include "macro.h" + +__visible extern int lxcfsctl_getattr(const char *path, struct stat *sb); +__visible extern int lxcfsctl_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_readlink(const char *path, char *buf, size_t size); +__visible extern int lxcfsctl_release(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_releasedir(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_open(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_opendir(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_access(const char *path, int mask); + +#endif /* __LXCFSCTL_FUSE_H */ diff --git a/src/proc_fuse.c b/src/proc_fuse.c index 8ed3be29..be45a3a9 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -1017,6 +1017,9 @@ static int proc_uptime_read(char *buf, size_t size, off_t offset, return total_len; } + if (check_set_lxcfs_feature(fc->pid, LXCFS_FEATURE_CHECK, LXCFS_FEATURES_DISABLE_UPTIME)) + return read_file_fuse("/proc/uptime", buf, size, d); + reaperage = get_reaper_age(fc->pid); /* * To understand why this is done, please read the comment to the diff --git a/src/utils.c b/src/utils.c index c1592951..db4ad58f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -740,3 +741,49 @@ size_t strlcat(char *d, const char *s, size_t n) return l + strlcpy(d + l, s, n - l); } #endif + +/* inspired by the Linux kernel's selftests/bpf :-) */ +bool proc_has_capability(pid_t pid, __u64 caps) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + __u32 cap0 = caps; + __u32 cap1 = caps >> 32; + int err; + + err = capget(&hdr, data); + if (err) + return false; + + return ((data[0].effective & cap0) == cap0 && + (data[1].effective & cap1) == cap1); +} + +#define LXCFS_PROC_USER_NS_LEN \ + (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \ + STRLITERALLEN("/ns/user") + 1) + +static ino_t get_userns_ino(pid_t pid) +{ + char path[LXCFS_PROC_USER_NS_LEN]; + struct stat st; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + if (stat(path, &st)) + return 0; + + return st.st_ino; +} + +bool proc_has_capability_in(pid_t nspid, pid_t pid, cap_value_t cap) +{ + ino_t nspid_userns_ino, pid_userns_ino; + + nspid_userns_ino = get_userns_ino(nspid); + pid_userns_ino = get_userns_ino(pid); + + return (nspid_userns_ino == pid_userns_ino) && + proc_has_capability(pid, 1ULL << cap); +} diff --git a/src/utils.h b/src/utils.h index 32b52395..c078156b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -90,4 +91,7 @@ extern size_t strlcpy(char *, const char *, size_t); extern size_t strlcat(char *d, const char *s, size_t n); #endif +extern bool proc_has_capability(pid_t pid, __u64 caps); +extern bool proc_has_capability_in(pid_t nspid, pid_t pid, cap_value_t cap); + #endif /* __LXCFS_UTILS_H */