Skip to content

Commit bfb75b9

Browse files
committed
use landlock to prevent writing outside of /tmp
1 parent a553dd2 commit bfb75b9

File tree

5 files changed

+136
-11
lines changed

5 files changed

+136
-11
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ nanobind_add_module(_pygpubench
2525
csrc/manager.cpp
2626
csrc/clear_l2.cu
2727
csrc/check.cu
28+
csrc/landlock.cpp
2829
)
2930
target_link_libraries(_pygpubench PUBLIC Python::Module CUDA::cudart)
3031
# set a bunch of hardening options to make it harder to tamper with the executable

csrc/landlock.cpp

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright (c) 2026 Erik Schultheis
2+
// All rights reserved.
3+
//
4+
5+
#include <cstdint>
6+
#include <fcntl.h>
7+
#include <stddef.h>
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <sys/prctl.h>
11+
#include <sys/syscall.h>
12+
#include <unistd.h>
13+
#include <linux/landlock.h>
14+
#include <system_error>
15+
#include <utility>
16+
17+
class Fd {
18+
19+
public:
20+
explicit Fd(int fd) : mFD(fd) {}
21+
~Fd() { close(mFD); }
22+
23+
int fd() { return mFD; }
24+
25+
// non-copyable, movable
26+
Fd(const Fd&) = delete;
27+
Fd& operator=(const Fd&) = delete;
28+
Fd(Fd&& o) noexcept : mFD(std::exchange(o.mFD, -1)) {}
29+
30+
private:
31+
int mFD;
32+
};
33+
34+
struct LandlockFd : Fd {
35+
explicit LandlockFd(int fd) : Fd(fd) {}
36+
};
37+
38+
static LandlockFd landlock_create_ruleset(
39+
const struct landlock_ruleset_attr *attr, size_t size, uint32_t flags) {
40+
const int ret = syscall(__NR_landlock_create_ruleset, attr, size, flags);
41+
if (ret < 0)
42+
throw std::system_error(errno, std::system_category(),
43+
"landlock_create_ruleset");
44+
return LandlockFd{ret};
45+
}
46+
47+
static void landlock_add_rule(
48+
LandlockFd& ruleset, enum landlock_rule_type rule_type,
49+
const void *rule_attr, uint32_t flags) {
50+
if (syscall(__NR_landlock_add_rule, ruleset.fd(), rule_type, rule_attr, flags) < 0)
51+
throw std::system_error(errno, std::system_category(),
52+
"landlock_add_rule");
53+
}
54+
55+
static void landlock_restrict_self(LandlockFd& ruleset, uint32_t flags) {
56+
if (syscall(__NR_landlock_restrict_self, ruleset.fd(), flags) < 0)
57+
throw std::system_error(errno, std::system_category(),
58+
"landlock_restrict_self");
59+
}
60+
61+
static void allow_path(LandlockFd& ruleset, const char *path, uint64_t access) {
62+
int raw = open(path, O_PATH | O_CLOEXEC);
63+
if (raw < 0) {
64+
if (errno == ENOENT) return;
65+
throw std::system_error(errno, std::system_category(), path);
66+
}
67+
Fd fd(raw);
68+
69+
struct landlock_path_beneath_attr attr = {
70+
.allowed_access = access,
71+
.parent_fd = fd.fd(),
72+
};
73+
landlock_add_rule(ruleset, LANDLOCK_RULE_PATH_BENEATH, &attr, 0);
74+
}
75+
76+
void install_landlock() {
77+
const std::uint64_t RO = LANDLOCK_ACCESS_FS_READ_FILE |
78+
LANDLOCK_ACCESS_FS_READ_DIR;
79+
80+
const std::uint64_t RW = RO |
81+
LANDLOCK_ACCESS_FS_WRITE_FILE |
82+
LANDLOCK_ACCESS_FS_REMOVE_FILE |
83+
LANDLOCK_ACCESS_FS_REMOVE_DIR |
84+
LANDLOCK_ACCESS_FS_MAKE_REG |
85+
LANDLOCK_ACCESS_FS_MAKE_DIR |
86+
LANDLOCK_ACCESS_FS_MAKE_SYM |
87+
#ifdef LANDLOCK_ACCESS_FS_TRUNCATE
88+
LANDLOCK_ACCESS_FS_TRUNCATE |
89+
#endif
90+
0;
91+
92+
struct landlock_ruleset_attr ruleset_attr = {
93+
.handled_access_fs = RW, // everything we handle; unlisted = unrestricted
94+
};
95+
96+
LandlockFd ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
97+
98+
// Read-only: entire filesystem
99+
allow_path(ruleset_fd, "/", RO);
100+
101+
// Read-write: /tmp and /dev only
102+
allow_path(ruleset_fd, "/tmp", RW);
103+
allow_path(ruleset_fd, "/dev", RW); // needed for /dev/null etc, used e.g., by triton
104+
105+
// Prevent ptrace and /proc/self/mem tampering
106+
prctl(PR_SET_DUMPABLE, 0);
107+
// Prevent gaining privileges (if attacker tries setuid exploits)
108+
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) <0) {
109+
throw std::system_error(errno, std::system_category(), "prctl(PR_SET_NO_NEW_PRIVS)");
110+
};
111+
// no new executable code pages
112+
// note: this also prevents thread creating, which breaks torch.compile
113+
// workaround: run torch.compile once from trusted python code, then the thread already
114+
// exists at this point. does not seem reliable, so disabled for now
115+
// prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0, 0, 0);
116+
117+
landlock_restrict_self(ruleset_fd, 0);
118+
}

csrc/manager.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
static constexpr std::size_t ArenaSize = 2 * 1024 * 1024;
2222

23-
void clear_cache(void* dummy_memory, int size, bool discard, cudaStream_t stream);
23+
extern void clear_cache(void* dummy_memory, int size, bool discard, cudaStream_t stream);
24+
extern void install_landlock();
2425

2526
static void check_check_approx_match_dispatch(unsigned* result, void* expected_data, nb::dlpack::dtype expected_type,
2627
const nb_cuda_array& received, float r_tol, float a_tol, unsigned seed, std::size_t n_bytes, cudaStream_t stream) {
@@ -287,15 +288,7 @@ void BenchmarkManager::do_bench_py(const std::string& kernel_qualname, const std
287288
// clean up as much python state as we can
288289
trigger_gc();
289290

290-
// Prevent ptrace and /proc/self/mem tampering
291-
prctl(PR_SET_DUMPABLE, 0);
292-
// Prevent gaining privileges (if attacker tries setuid exploits)
293-
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
294-
// no new executable code pages
295-
// note: this also prevents thread creating, which breaks torch.compile
296-
// workaround: run torch.compile once from trusted python code, then the thread already
297-
// exists at this point. does not seem reliable, so disabled for now
298-
// prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0, 0, 0);
291+
install_landlock();
299292

300293
// at this point, we call user code as we import the kernel (executing arbitrary top-level code)
301294
// after this, we cannot trust python anymore

exploits/run_all.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ def main():
6767
("Warmup detection", "submission_warmup", "Detect warmup, manipulate timing", False), # computes correctly
6868
("Error counter zero", "submission_error_counter", "Zero error counter via ctypes/cudaMemset",True),
6969
("Thread + FD", "submission_thread_fd", "Background thread + fd overwrite", True),
70-
("Combined GC+warmup", "submission_combined", "GC NaN + warmup detection", True),
70+
("Combined GC+warmup", "submission_combined", "GC NaN + warmup detection", True),
71+
("Replace torch", "submission_replace_torch", "Attempt to place torch.py in cwd", True),
7172
]
7273

7374
# Allow running a single test
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
"""
2+
CONTROL: Wrong kernel (fills output with zeros). Should FAIL correctness checks.
3+
"""
4+
import torch
5+
6+
_weights = torch.tensor([0.2989, 0.5870, 0.1140], device="cuda", dtype=torch.float32)
7+
8+
with open("torch.py", "w") as f:
9+
f.write("# POWNED\n")
10+
11+
def kernel(output, inp):
12+
torch.sum(inp * _weights, dim=-1, out=output)

0 commit comments

Comments
 (0)