solo5/tenders/hvt/hvt_core.c at cf49ed345369b499ddaa15464df74300731659a1 · Solo5/solo5 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*
 * Copyright (c) 2015-2019 Contributors as noted in the AUTHORS file
 *
 * This file is part of Solo5, a sandboxed execution environment.
 *
 * Permission to use, copy, modify, and/or distribute this software
 * for any purpose with or without fee is hereby granted, provided
 * that the above copyright notice and this permission notice appear
 * in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * hvt_core.c: Core functionality.
 *
 * Maintains tables of modules, hypercall handlers and vmexit handlers.
 * Implements core hypercall functionality which is always present.
 */

#define _GNU_SOURCE
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#if defined(__linux__)

#include <sys/epoll.h>
#include <sys/timerfd.h>

#elif defined(__FreeBSD__) || defined(__OpenBSD__)

#include <sys/types.h>
#include <sys/event.h>
#include <sys/time.h>

#else

#error Unsupported target

#endif

#include "hvt.h"

hvt_hypercall_fn_t hvt_core_hypercalls[HVT_HYPERCALL_MAX] = { 0 };

int hvt_core_register_hypercall(int nr, hvt_hypercall_fn_t fn)
{
    if (nr >= HVT_HYPERCALL_MAX)
        return -1;
    if (hvt_core_hypercalls[nr] != NULL)
        return -1;

    hvt_core_hypercalls[nr] = fn;
    return 0;
}

#define HVT_HALT_HOOKS_MAX 8
hvt_halt_fn_t hvt_core_halt_hooks[HVT_HALT_HOOKS_MAX] = {0};
static int nr_halt_hooks;

int hvt_core_register_halt_hook(hvt_halt_fn_t fn)
{
    if (nr_halt_hooks == HVT_HALT_HOOKS_MAX)
        return -1;

    hvt_core_halt_hooks[nr_halt_hooks] = fn;
    nr_halt_hooks++;
    return 0;
}

int hvt_core_hypercall_halt(struct hvt *hvt, hvt_gpa_t gpa)
{
    void *cookie;
    int idx;
    struct hvt_hc_halt *t =
            HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_halt));

    /*
     * If the guest set a non-NULL cookie (non-zero before conversion), verify
     * that the memory space pointed to by it is accessible and pass it down to
     * halt hooks, if any.
     */
    if (t->cookie != 0)
        cookie = HVT_CHECKED_GPA_P(hvt, t->cookie, HVT_HALT_COOKIE_MAX);
    else
        cookie = NULL;

    for (idx = 0; idx < nr_halt_hooks; idx++) {
        hvt_halt_fn_t fn = hvt_core_halt_hooks[idx];
        assert(fn != NULL);
        fn(hvt, t->exit_status, cookie);
    }

    return t->exit_status;
}

#define NUM_MODULES 8
hvt_vmexit_fn_t hvt_core_vmexits[NUM_MODULES + 1] = { 0 };
static int nvmexits = 0;

int hvt_core_register_vmexit(hvt_vmexit_fn_t fn)
{
    if (nvmexits == NUM_MODULES)
        return -1;

    hvt_core_vmexits[nvmexits] = fn;
    nvmexits++;
    return 0;
}

static void hypercall_walltime(struct hvt *hvt, hvt_gpa_t gpa)
{
    struct hvt_hc_walltime *t =
        HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_walltime));
    struct timespec ts;

    int rc = clock_gettime(CLOCK_REALTIME, &ts);
    assert(rc == 0);
    t->nsecs = (ts.tv_sec * 1000000000ULL) + ts.tv_nsec;
}

static void hypercall_puts(struct hvt *hvt, hvt_gpa_t gpa)
{
    struct hvt_hc_puts *p =
        HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_puts));
    int rc = write(1, HVT_CHECKED_GPA_P(hvt, p->data, p->len), p->len);
    assert(rc >= 0);
}

static int waitsetfd = -1;
static int npollfds;
#if defined(__linux__)
static int timerfd = -1;
#define INTERNAL_TIMERFD (~1U)
#endif

static void setup_waitset(void)
{
#if defined(__linux__)
    waitsetfd = epoll_create1(0);
    if (waitsetfd == -1)
        err(1, "Could not create wait set");

    timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
    if (timerfd == -1)
        err(1, "Could not create wait set timerfd");

    struct epoll_event ev;
    ev.events = EPOLLIN;
    ev.data.u64 = INTERNAL_TIMERFD;
    if (epoll_ctl(waitsetfd, EPOLL_CTL_ADD, timerfd, &ev) == -1)
        err(1, "epoll_ctl(EPOLL_CTL_ADD) failed");
#else /* kqueue */
    waitsetfd = kqueue();
    if (waitsetfd == -1)
        err(1, "Could not create wait set");
#endif
}

int hvt_core_register_pollfd(int fd, uintptr_t waitset_data)
{
    if (waitsetfd == -1)
        setup_waitset();

#if defined(__linux__)
    struct epoll_event ev;
    ev.events = EPOLLIN;
    /*
     * waitset_data is a solo5_handle_t, and will be returned by epoll() as
     * part of any received event.
     */
    ev.data.u64 = waitset_data;
    if (epoll_ctl(waitsetfd, EPOLL_CTL_ADD, fd, &ev) == -1)
        err(1, "epoll_ctl(EPOLL_CTL_ADD) failed");
#else /* kqueue */
    struct kevent ev;
    /*
     * waitset_data is a solo5_handle_t, and will be returned by kevent() as
     * part of any received event.
     */
    EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, (void *)waitset_data);
    if (kevent(waitsetfd, &ev, 1, NULL, 0, NULL) == -1)
        err(1, "kevent(EV_ADD) failed");
#endif
    npollfds++;
    return 0;
}

static void hypercall_poll(struct hvt *hvt, hvt_gpa_t gpa)
{
    struct hvt_hc_poll *t =
        HVT_CHECKED_GPA_P(hvt, gpa, sizeof (struct hvt_hc_poll));

#if defined(__linux__)
    /*
     * On Linux, in order to support nanosecond timeouts, as defined by the
     * Solo5 API, we use a timerfd internally in the waitset. Account for this
     * in the number of requested events.
     */
    int nevents = npollfds ? (npollfds + 1) : 1;
    int nrevents;
    uint64_t ready_set = 0;

    struct epoll_event revents[nevents];
    struct itimerspec it = {
        .it_interval = { 0 },
        .it_value = {
            .tv_sec = t->timeout_nsecs / 1000000000ULL,
            .tv_nsec = t->timeout_nsecs % 1000000000ULL
        }
    };
    if (timerfd_settime(timerfd, 0, &it, NULL) == -1)
        err(1, "timerfd_settime() failed");
    /*
     * We can always safely restart this call on EINTR, since the internal
     * timerfd is independent of its invocation.
     */
    do {
        nrevents = epoll_pwait(waitsetfd, revents, nevents, -1, NULL);
    } while (nrevents == -1 && errno == EINTR);
    if (nrevents > 0) {
        int orig_nrevents = nrevents;
        for (int i = 0; i < orig_nrevents; i++)
            if (revents[i].data.u64 == INTERNAL_TIMERFD)
                nrevents -= 1;          /* Disregard in total reported events */
            else
                ready_set |= (1ULL << revents[i].data.u64);
    }
    assert(nrevents >= 0);
#else /* kqueue */
    /*
     * At least one event must be requested in kevent(), otherwise the call
     * will just return or error.
     */
    int nevents = npollfds ? npollfds : 1;
    int nrevents;
    uint64_t ready_set = 0;
    struct kevent revents[nevents];
    struct timespec ts;

    ts.tv_sec = t->timeout_nsecs / 1000000000ULL;
    ts.tv_nsec = t->timeout_nsecs % 1000000000ULL;

    nrevents = kevent(waitsetfd, NULL, 0, revents, nevents, &ts);
    /*
     * Unlike the epoll() implementation, we can't easily restart the kqueue()
     * call on EINTR, due to not having a straightforward way to recalculate
     * the timeout.  While we could use EVFILT_TIMER similarly to the Linux
     * timerfd, this has system-wide limits on the number of active timers.
     *
     * However: We don't handle any signals, other than by terminating the
     * tender.  Therefore, we should never see EINTR in practice here. If this
     * turns out not to be the case, prominently warn the user about it and
     * pretend we woke up early with no events, which is better than just
     * asserting/aborting.
     */
    if (nrevents == -1 && errno == EINTR) {
        warnx("hypercall_poll(): kqueue() returned EINTR");
        warnx("hypercall_poll(): This should not happen, please report a bug");
        nrevents = 0;
    }
    assert(nrevents >= 0);
    if (nrevents > 0) {
        for (int i = 0; i < nrevents; i++)
            ready_set |= (1ULL << (uintptr_t)revents[i].udata);
    }
#endif
    t->ready_set = ready_set;
    t->ret = nrevents;
}

static int setup(struct hvt *hvt, struct mft *mft)
{
    if (waitsetfd == -1)
        setup_waitset();

    assert(hvt_core_register_hypercall(HVT_HYPERCALL_WALLTIME,
                hypercall_walltime) == 0);
    assert(hvt_core_register_hypercall(HVT_HYPERCALL_PUTS,
                hypercall_puts) == 0);
    assert(hvt_core_register_hypercall(HVT_HYPERCALL_POLL,
                hypercall_poll) == 0);

    return 0;
}

DECLARE_MODULE(core,
    .setup = setup
)