Skip to content

Commit a717ed5

Browse files
committed
[LibOS] Add support for timerfd system calls
This commit adds support for system calls that create and operate on a timer that delivers timer expiration notifications via a file descriptor, specifically: `timerfd_create()`, `timerfd_settime()` and `timerfd_gettime()`. The timerfd object is associated with a dummy eventfd created on the host to trigger notifications (e.g., in epoll). The object is created inside Gramine, with all its operations resolved entirely inside Gramine (note that the time source in Gramine SGX is still untrusted). The emulation is currently implemented at the level of a single process. All timerfds created in the parent process are marked as invalid in child processes. In multi-process applications, Gramine does not exit immediately after fork; it only exits if the application attempts to use timerfds in the child. Therefore, inter-process timing signals via timerfds are not allowed. LibOS regression tests are also added. Signed-off-by: Kailun Qin <[email protected]>
1 parent aef14f1 commit a717ed5

27 files changed

+1158
-52
lines changed

Documentation/devel/features.md

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,7 @@ The below list is generated from the [syscall table of Linux
10361036
-`signalfd()`
10371037
<sup>[7](#signals-and-process-state-changes)</sup>
10381038

1039-
- `timerfd_create()`
1039+
- `timerfd_create()`
10401040
<sup>[20](#sleeps-timers-and-alarms)</sup>
10411041

10421042
-`eventfd()`
@@ -1045,10 +1045,10 @@ The below list is generated from the [syscall table of Linux
10451045
-`fallocate()`
10461046
<sup>[9a](#file-system-operations)</sup>
10471047

1048-
- `timerfd_settime()`
1048+
- `timerfd_settime()`
10491049
<sup>[20](#sleeps-timers-and-alarms)</sup>
10501050

1051-
- `timerfd_gettime()`
1051+
- `timerfd_gettime()`
10521052
<sup>[20](#sleeps-timers-and-alarms)</sup>
10531053

10541054
-`accept4()`
@@ -2891,9 +2891,23 @@ Gramine implements getting and setting the interval timer: `getitimer()` and `se
28912891

28922892
Gramine implements alarm clocks via `alarm()`.
28932893

2894+
Gramine implements timers that notify via file descriptors: `timerfd_create()`, `timerfd_settime()`
2895+
and `timerfd_gettime()`. The timerfd object is created inside Gramine, and all operations are
2896+
resolved entirely inside Gramine (note that the time source in Gramine SGX is still untrusted). Each
2897+
timerfd object is associated with a dummy eventfd created on the host. This is purely for triggering
2898+
read notifications (e.g., in epoll); timerfd data is verified inside Gramine and is never exposed to
2899+
the host. Since the host is used purely for notifications, a malicious host can only induce Denial
2900+
of Service (DoS) attacks. `TFD_TIMER_CANCEL_ON_SET` is silently ignored because there are no
2901+
"discontinuous changes of time" in Gramine (via e.g., `settimeofday()`). `TFD_IOC_SET_TICKS` is not
2902+
supported.
2903+
2904+
The emulation is currently implemented at the level of a single process. All timerfds created in the
2905+
parent process are marked as invalid in child processes. In multi-process applications, Gramine does
2906+
not exit immediately after fork; it only exits if the application attempts to use timerfds in the
2907+
child. Therefore, inter-process timing signals via timerfds are not allowed.
2908+
28942909
Gramine does *not* currently implement the POSIX per-process timer: `timer_create()`, etc. Gramine
2895-
also does not currently implement timers that notify via file descriptors. Gramine could implement
2896-
these timers in the future, if need arises.
2910+
could implement it in the future, if need arises.
28972911

28982912
<details><summary>Related system calls</summary>
28992913

@@ -2909,9 +2923,9 @@ these timers in the future, if need arises.
29092923
-`timer_getoverrun()`: may be implemented in the future
29102924
-`timer_delete()`: may be implemented in the future
29112925

2912-
- `timerfd_create()`: may be implemented in the future
2913-
- `timerfd_settime()`: may be implemented in the future
2914-
- `timerfd_gettime()`: may be implemented in the future
2926+
- `timerfd_create()`: see the notes above
2927+
- `timerfd_settime()`: see the notes above
2928+
- `timerfd_gettime()`: see the notes above
29152929

29162930
</details><br />
29172931

libos/include/libos_fs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ struct libos_fs_ops {
190190
int (*poll)(struct libos_handle* hdl, int in_events, int* out_events);
191191

192192
/* Verify a single handle after poll. Must update `pal_ret_events` in-place with only allowed
193-
* ones. Used in e.g. secure eventfd FS to verify if the host is not lying to us. */
193+
* ones. Used in e.g. secure eventfd and timerfd FS to verify if the host is not lying to us. */
194194
void (*post_poll)(struct libos_handle* hdl, pal_wait_flags_t* pal_ret_events);
195195

196196
/* checkpoint/migrate the file system */
@@ -948,6 +948,7 @@ extern struct libos_fs eventfd_builtin_fs;
948948
extern struct libos_fs synthetic_builtin_fs;
949949
extern struct libos_fs path_builtin_fs;
950950
extern struct libos_fs shm_builtin_fs;
951+
extern struct libos_fs timerfd_builtin_fs;
951952

952953
struct libos_fs* find_fs(const char* name);
953954

libos/include/libos_handle.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ enum libos_handle_type {
4646
/* Special handles: */
4747
TYPE_EPOLL, /* epoll handles, see `libos_epoll.c` */
4848
TYPE_EVENTFD, /* eventfd handles, used by `eventfd` filesystem */
49+
TYPE_TIMERFD, /* timerfd handles, used by `timerfd` filesystem */
4950
};
5051

5152
struct libos_pipe_handle {
@@ -142,6 +143,18 @@ struct libos_eventfd_handle {
142143
uint64_t dummy_host_val;
143144
};
144145

146+
struct libos_timerfd_handle {
147+
bool broken_in_child;
148+
149+
spinlock_t expiration_lock; /* protecting below fields */
150+
uint64_t num_expirations;
151+
uint64_t dummy_host_val;
152+
153+
spinlock_t timer_lock; /* protecting below fields */
154+
uint64_t timeout; /* always an absolute time */
155+
uint64_t reset;
156+
};
157+
145158
struct libos_handle {
146159
enum libos_handle_type type;
147160
bool is_dir;
@@ -217,6 +230,8 @@ struct libos_handle {
217230

218231
struct libos_epoll_handle epoll; /* TYPE_EPOLL */
219232
struct libos_eventfd_handle eventfd; /* TYPE_EVENTFD */
233+
234+
struct libos_timerfd_handle timerfd; /* TYPE_TIMERFD */
220235
} info;
221236

222237
struct libos_dir_handle dir_info;
@@ -232,7 +247,7 @@ struct libos_handle {
232247
* `read`, `seek` but not `pread`). This lock should be taken *before* `libos_handle.lock` and
233248
* `libos_inode.lock`. Must be used *only* via maybe_lock_pos_handle() and
234249
* maybe_unlock_pos_handle(); these functions make sure that the lock is acquired only on those
235-
* handle types that are seekable (e.g. not on eventfds or pipes). */
250+
* handle types that are seekable (e.g. not on eventfds, timerfds or pipes). */
236251
struct libos_lock pos_lock;
237252
};
238253

libos/include/libos_table.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,7 @@ long libos_syscall_getrandom(char* buf, size_t count, unsigned int flags);
220220
long libos_syscall_mlock2(unsigned long start, size_t len, int flags);
221221
long libos_syscall_sysinfo(struct sysinfo* info);
222222
long libos_syscall_close_range(unsigned int first, unsigned int last, unsigned int flags);
223+
long libos_syscall_timerfd_create(int clockid, int flags);
224+
long libos_syscall_timerfd_settime(int fd, int flags, const struct __kernel_itimerspec* value,
225+
struct __kernel_itimerspec* ovalue);
226+
long libos_syscall_timerfd_gettime(int fd, struct __kernel_itimerspec* value);

libos/include/libos_utils.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,14 @@ void clean_link_map_list(void);
5252
int create_pipe(char* name, char* uri, size_t size, PAL_HANDLE* hdl, bool use_vmid_for_name);
5353

5454
/* Asynchronous event support */
55+
enum async_event_type {
56+
ASYNC_EVENT_TYPE_IO = 1,
57+
ASYNC_EVENT_TYPE_ALARM_TIMER = 2,
58+
};
59+
5560
int init_async_worker(void);
56-
int64_t install_async_event(PAL_HANDLE object, unsigned long time,
61+
int64_t install_async_event(enum async_event_type type, PAL_HANDLE object,
62+
unsigned long time_us, bool absolute_time,
5763
void (*callback)(IDTYPE caller, void* arg), void* arg);
5864
void terminate_async_worker(void);
5965

libos/include/linux_abi/time.h

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
/* These need to be binary-identical with the ones used by Linux. */
1010

1111
// TODO: remove all of these includes and make this header libc-independent.
12-
#include <linux/times.h>
13-
#include <linux/timex.h>
14-
#include <linux/utime.h>
1512
#include <linux/version.h>
1613

14+
typedef long __kernel_suseconds_t;
15+
typedef long __kernel_time_t;
16+
1717
typedef __kernel_time_t time_t;
1818

1919
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
@@ -37,3 +37,28 @@ struct __kernel_timezone {
3737
int tz_minuteswest; /* minutes west of Greenwich */
3838
int tz_dsttime; /* type of dst correction */
3939
};
40+
41+
/* The IDs of the various system clocks (for POSIX.1b interval timers). */
42+
#define CLOCK_REALTIME 0
43+
#define CLOCK_MONOTONIC 1
44+
#define CLOCK_PROCESS_CPUTIME_ID 2
45+
#define CLOCK_THREAD_CPUTIME_ID 3
46+
#define CLOCK_MONOTONIC_RAW 4
47+
#define CLOCK_REALTIME_COARSE 5
48+
#define CLOCK_MONOTONIC_COARSE 6
49+
#define CLOCK_BOOTTIME 7
50+
#define CLOCK_REALTIME_ALARM 8
51+
#define CLOCK_BOOTTIME_ALARM 9
52+
53+
#define MAX_CLOCKS 16
54+
55+
#define TFD_TIMER_ABSTIME (1 << 0)
56+
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
57+
#define TFD_CLOEXEC O_CLOEXEC
58+
#define TFD_NONBLOCK O_NONBLOCK
59+
60+
#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
61+
/* Flags for timerfd_create. */
62+
#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
63+
/* Flags for timerfd_settime. */
64+
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)

libos/src/arch/x86_64/libos_table.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,11 @@ libos_syscall_t libos_syscall_table[LIBOS_SYSCALL_BOUND] = {
297297
[__NR_utimensat] = (libos_syscall_t)0, // libos_syscall_utimensat
298298
[__NR_epoll_pwait] = (libos_syscall_t)libos_syscall_epoll_pwait,
299299
[__NR_signalfd] = (libos_syscall_t)0, // libos_syscall_signalfd
300-
[__NR_timerfd_create] = (libos_syscall_t)0, // libos_syscall_timerfd_create
300+
[__NR_timerfd_create] = (libos_syscall_t)libos_syscall_timerfd_create,
301301
[__NR_eventfd] = (libos_syscall_t)libos_syscall_eventfd,
302302
[__NR_fallocate] = (libos_syscall_t)libos_syscall_fallocate,
303-
[__NR_timerfd_settime] = (libos_syscall_t)0, // libos_syscall_timerfd_settime
304-
[__NR_timerfd_gettime] = (libos_syscall_t)0, // libos_syscall_timerfd_gettime
303+
[__NR_timerfd_settime] = (libos_syscall_t)libos_syscall_timerfd_settime,
304+
[__NR_timerfd_gettime] = (libos_syscall_t)libos_syscall_timerfd_gettime,
305305
[__NR_accept4] = (libos_syscall_t)libos_syscall_accept4,
306306
[__NR_signalfd4] = (libos_syscall_t)0, // libos_syscall_signalfd4
307307
[__NR_eventfd2] = (libos_syscall_t)libos_syscall_eventfd2,

libos/src/fs/libos_fs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static struct libos_fs* g_builtin_fs[] = {
3333
&synthetic_builtin_fs,
3434
&path_builtin_fs,
3535
&shm_builtin_fs,
36+
&timerfd_builtin_fs,
3637
};
3738

3839
static struct libos_lock g_mount_mgr_lock;

libos/src/fs/proc/thread.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ static char* describe_handle(struct libos_handle* hdl) {
287287
case TYPE_EPOLL: str = "epoll:[?]"; break;
288288
case TYPE_EVENTFD: str = "eventfd:[?]"; break;
289289
case TYPE_SHM: str = "shm:[?]"; break;
290+
case TYPE_TIMERFD: str = "timerfd:[?]"; break;
290291
default: str = "unknown:[?]"; break;
291292
}
292293
return strdup(str);

libos/src/fs/timerfd/fs.c

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/* SPDX-License-Identifier: LGPL-3.0-or-later */
2+
/* Copyright (C) 2024 Intel Corporation
3+
* Kailun Qin <[email protected]>
4+
*/
5+
6+
/*
7+
* This file contains code for implementation of "timerfd" filesystem. For more information, see
8+
* `libos/src/sys/libos_timerfd.c`.
9+
*/
10+
11+
#include "libos_fs.h"
12+
#include "libos_handle.h"
13+
#include "libos_internal.h"
14+
#include "libos_lock.h"
15+
#include "linux_abi/errors.h"
16+
#include "pal.h"
17+
18+
/* Enforce a restriction that all timerfds created in the parent process are marked as invalid in
19+
* child processes, i.e. inter-process timing signals via timerfds are not allowed. This restriction
20+
* is because LibOS doesn't yet implement sync between timerfd objects. */
21+
static int timerfd_checkin(struct libos_handle* hdl) {
22+
assert(hdl->type == TYPE_TIMERFD);
23+
hdl->info.timerfd.broken_in_child = true;
24+
return 0;
25+
}
26+
27+
/* This implementation is the same as `eventfd_dummy_host_read()` in "fs/eventfd/fs.c". */
28+
static void timerfd_dummy_host_read(struct libos_handle* hdl) {
29+
int ret;
30+
uint64_t buf_dummy_host_val = 0;
31+
size_t dummy_host_val_count = sizeof(buf_dummy_host_val);
32+
do {
33+
ret = PalStreamRead(hdl->pal_handle, /*offset=*/0, &dummy_host_val_count,
34+
&buf_dummy_host_val);
35+
} while (ret == PAL_ERROR_INTERRUPTED);
36+
if (ret < 0 || dummy_host_val_count != sizeof(buf_dummy_host_val)) {
37+
/* must not happen in benign case, consider it an attack and panic */
38+
BUG();
39+
}
40+
}
41+
42+
/* This implementation is the same as `eventfd_dummy_host_wait()` in "fs/eventfd/fs.c". */
43+
static void timerfd_dummy_host_wait(struct libos_handle* hdl) {
44+
pal_wait_flags_t wait_for_events = PAL_WAIT_READ;
45+
pal_wait_flags_t ret_events = 0;
46+
int ret = PalStreamsWaitEvents(1, &hdl->pal_handle, &wait_for_events, &ret_events, NULL);
47+
if (ret < 0 && ret != PAL_ERROR_INTERRUPTED) {
48+
BUG();
49+
}
50+
(void)ret_events; /* we don't care what events the host returned, we can't trust them anyway */
51+
}
52+
53+
static ssize_t timerfd_read(struct libos_handle* hdl, void* buf, size_t count, file_off_t* pos) {
54+
__UNUSED(pos);
55+
assert(hdl->type == TYPE_TIMERFD);
56+
57+
if (count < sizeof(uint64_t))
58+
return -EINVAL;
59+
60+
if (hdl->info.timerfd.broken_in_child) {
61+
log_warning("Child process tried to access timerfd created by parent process. This is "
62+
"disallowed in Gramine.");
63+
return -EIO;
64+
}
65+
66+
int ret;
67+
spinlock_lock(&hdl->info.timerfd.expiration_lock);
68+
69+
while (!hdl->info.timerfd.num_expirations) {
70+
if (hdl->flags & O_NONBLOCK) {
71+
ret = -EAGAIN;
72+
goto out;
73+
}
74+
spinlock_unlock(&hdl->info.timerfd.expiration_lock);
75+
timerfd_dummy_host_wait(hdl);
76+
spinlock_lock(&hdl->info.timerfd.expiration_lock);
77+
}
78+
79+
memcpy(buf, &hdl->info.timerfd.num_expirations, sizeof(uint64_t));
80+
hdl->info.timerfd.num_expirations = 0;
81+
82+
/* perform a read (not supposed to block) to clear the event from polling threads */
83+
if (hdl->info.timerfd.dummy_host_val) {
84+
timerfd_dummy_host_read(hdl);
85+
hdl->info.timerfd.dummy_host_val = 0;
86+
}
87+
88+
ret = (ssize_t)count;
89+
out:
90+
spinlock_unlock(&hdl->info.timerfd.expiration_lock);
91+
maybe_epoll_et_trigger(hdl, ret, /*in=*/true, /*unused was_partial=*/false);
92+
return ret;
93+
}
94+
95+
static void timerfd_post_poll(struct libos_handle* hdl, pal_wait_flags_t* pal_ret_events) {
96+
assert(hdl->type == TYPE_TIMERFD);
97+
98+
if (hdl->info.timerfd.broken_in_child) {
99+
log_warning("Child process tried to access timerfd created by parent process. This is "
100+
"disallowed in Gramine.");
101+
*pal_ret_events = PAL_WAIT_ERROR;
102+
return;
103+
}
104+
105+
if (*pal_ret_events & (PAL_WAIT_ERROR | PAL_WAIT_HANG_UP | PAL_WAIT_WRITE)) {
106+
/* impossible: we control timerfd inside the LibOS, and we never raise such conditions */
107+
BUG();
108+
}
109+
110+
spinlock_lock(&hdl->info.timerfd.expiration_lock);
111+
if (*pal_ret_events & PAL_WAIT_READ) {
112+
/* there is data to read: verify if timerfd has number of expirations greater than zero */
113+
if (!hdl->info.timerfd.num_expirations) {
114+
/* spurious or malicious notification, can legitimately happen if another thread
115+
* consumed this event between this thread's poll wakeup and the post_poll callback;
116+
* we currently choose to return a spurious notification to the user */
117+
*pal_ret_events &= ~PAL_WAIT_READ;
118+
}
119+
}
120+
spinlock_unlock(&hdl->info.timerfd.expiration_lock);
121+
}
122+
123+
static int timerfd_close(struct libos_handle* hdl) {
124+
if (hdl->info.timerfd.broken_in_child) {
125+
log_warning("Child process tried to access timerfd created by parent process. This is "
126+
"disallowed in Gramine.");
127+
return -EIO;
128+
}
129+
130+
/* cancel the pending timerfd object */
131+
return install_async_event(ASYNC_EVENT_TYPE_ALARM_TIMER, hdl->pal_handle,
132+
/*time_us=*/0, /*absolute_time=*/false, /*callback=*/NULL,
133+
/*arg=*/NULL);
134+
}
135+
136+
struct libos_fs_ops timerfd_fs_ops = {
137+
.checkin = &timerfd_checkin,
138+
.read = &timerfd_read,
139+
.close = &timerfd_close,
140+
.post_poll = &timerfd_post_poll,
141+
};
142+
143+
struct libos_fs timerfd_builtin_fs = {
144+
.name = "timerfd",
145+
.fs_ops = &timerfd_fs_ops,
146+
};

0 commit comments

Comments
 (0)