Skip to content

Commit ed242b3

Browse files
andrey-utkinvvolkl
authored andcommitted
Implement FUSE passthrough (cvmfs#4006)
Passthrough is a recent FUSE feature (kernel 6.17.0+, libfuse 3.17+) enabling bypass of userspace on all the read requests. When the FUSE daemon provides a "backing file descriptor" to FUSE on "open" request, the kernel reads directly from it without calling into the FUSE daemon. This is a win for performance. The test supplied demonstrates measurable speedup: ``` + dd status=progress bs=512 if=/cvmfs/test.repo.dumbdumb/big.bin of=/dev/null 10485760 bytes (10 MB, 10 MiB) copied, 3.35429 s, 3.1 MB/s + dd status=progress bs=512 if=/cvmfs/test.repo.passthru/big.bin of=/dev/null 10485760 bytes (10 MB, 10 MiB) copied, 0.073326 s, 143 MB/s ``` ``` + pushd /cvmfs/test.repo.dumbdumb + parallel -N0 'sha1sum --check --status CHECKSUMS' ::: {1..100} real 0m3.440s user 0m4.782s sys 0m3.345s + pushd /cvmfs/test.repo.passthru + parallel -N0 'sha1sum --check --status CHECKSUMS' ::: {1..100} real 0m1.221s user 0m3.904s sys 0m1.709s ``` Enabling new functionality requires * building with sufficiently recent libfuse to have FUSE_CAP_PASSTHROUGH * running with similarly recent enough libfuse * running on Linux kernel 6.17.0 or newer * specifying mount option fuse_passthrough (or fuse_passthru) Related documentation update: cvmfs/doc-cvmfs#254 --------- Co-authored-by: Valentin Volkl <valentin.volkl@cern.ch>
1 parent 9286d23 commit ed242b3

5 files changed

Lines changed: 267 additions & 1 deletion

File tree

cvmfs/cvmfs.cc

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ FuseRemounter *fuse_remounter_ = NULL;
130130
InodeGenerationInfo inode_generation_info_;
131131
#endif // __TEST_CVMFS_MOCKFUSE
132132

133+
#ifdef FUSE_CAP_PASSTHROUGH
134+
typedef struct fuse_passthru_ctx {
135+
int backing_id;
136+
int refcount;
137+
} fuse_passthru_ctx_t;
138+
static std::unordered_map<fuse_ino_t, fuse_passthru_ctx_t> *fuse_passthru_tracker = NULL;
139+
pthread_mutex_t fuse_passthru_tracker_lock = PTHREAD_MUTEX_INITIALIZER;
140+
#endif
133141

134142
/**
135143
* For cvmfs_opendir / cvmfs_readdir
@@ -1317,6 +1325,44 @@ static void cvmfs_open(fuse_req_t req, fuse_ino_t ino,
13171325
fd);
13181326
fi->fh = fd;
13191327
FillOpenFlags(open_directives, fi);
1328+
#ifdef FUSE_CAP_PASSTHROUGH
1329+
if (loader_exports_->fuse_passthrough) {
1330+
if(!dirent.IsChunkedFile()) {
1331+
/* "Currently there should be only one backing id per node / backing file."
1332+
* So says libfuse documentation on fuse_passthrough_open().
1333+
* So we reuse and refcount backing id based on inode.
1334+
* Passthrough can be used with libfuse methods open, opendir, create,
1335+
* but since CVMFS is read-only and has synthesizes its directories,
1336+
* we only need to handle it in `open`. */
1337+
int backing_id;
1338+
pthread_mutex_lock(&fuse_passthru_tracker_lock);
1339+
auto iter = fuse_passthru_tracker->find(ino);
1340+
if (iter == fuse_passthru_tracker->end()) {
1341+
auto pair_with_iterator = fuse_passthru_tracker->emplace(ino, fuse_passthru_ctx_t());
1342+
assert(pair_with_iterator.second == true);
1343+
iter = pair_with_iterator.first;
1344+
fuse_passthru_ctx_t &entry = iter->second;
1345+
1346+
backing_id = fuse_passthrough_open(req, fd);
1347+
assert(backing_id != 0);
1348+
entry.backing_id = backing_id;
1349+
entry.refcount++;
1350+
} else {
1351+
fuse_passthru_ctx_t &entry = iter->second;
1352+
assert(entry.refcount > 0);
1353+
backing_id = entry.backing_id;
1354+
entry.refcount++;
1355+
}
1356+
pthread_mutex_unlock(&fuse_passthru_tracker_lock);
1357+
1358+
fi->backing_id = backing_id;
1359+
1360+
/* according to libfuse example/passthrough_hp.cc:
1361+
* "open in passthrough mode must drop old page cache" */
1362+
fi->keep_cache = false;
1363+
}
1364+
}
1365+
#endif
13201366
fuse_reply_open(req, fi);
13211367
return;
13221368
} else {
@@ -1615,6 +1661,30 @@ static void cvmfs_release(fuse_req_t req, fuse_ino_t ino,
16151661
if (file_system_->cache_mgr()->Close(abs_fd) == 0) {
16161662
perf::Dec(file_system_->no_open_files());
16171663
}
1664+
#ifdef FUSE_CAP_PASSTHROUGH
1665+
if (loader_exports_->fuse_passthrough) {
1666+
1667+
if (fi->backing_id != 0) {
1668+
int ret;
1669+
pthread_mutex_lock(&fuse_passthru_tracker_lock);
1670+
auto iter = fuse_passthru_tracker->find(ino);
1671+
assert(iter != fuse_passthru_tracker->end());
1672+
fuse_passthru_ctx_t &entry = iter->second;
1673+
assert(entry.refcount > 0);
1674+
assert(entry.backing_id == fi->backing_id);
1675+
entry.refcount--;
1676+
if (entry.refcount == 0) {
1677+
ret = fuse_passthrough_close(req, fi->backing_id);
1678+
if (ret < 0) {
1679+
LogCvmfs(kLogCvmfs, kLogDebug, "fuse_passthrough_close(fd=%ld) failed: %d", fd, ret);
1680+
assert(false);
1681+
}
1682+
fuse_passthru_tracker->erase(iter);
1683+
}
1684+
pthread_mutex_unlock(&fuse_passthru_tracker_lock);
1685+
}
1686+
}
1687+
#endif
16181688
}
16191689
fuse_reply_err(req, 0);
16201690
}
@@ -2088,11 +2158,46 @@ static void cvmfs_init(void *userdata, struct fuse_conn_info *conn) {
20882158
FUSE_VERSION);
20892159
}
20902160
#endif
2161+
2162+
#ifdef FUSE_CAP_PASSTHROUGH
2163+
if (conn->capable & FUSE_CAP_PASSTHROUGH) {
2164+
if (loader_exports_->fuse_passthrough) {
2165+
conn->want |= FUSE_CAP_PASSTHROUGH;
2166+
/* "Passthrough and writeback cache are conflicting modes"
2167+
* libfuse example/passthrough_hp.cc says,
2168+
* but we don't use writeback cache mode in CVMFS. */
2169+
pthread_mutex_lock(&fuse_passthru_tracker_lock);
2170+
assert(!fuse_passthru_tracker);
2171+
fuse_passthru_tracker = new std::unordered_map<fuse_ino_t,
2172+
fuse_passthru_ctx_t>();
2173+
pthread_mutex_unlock(&fuse_passthru_tracker_lock);
2174+
LogCvmfs(kLogCvmfs, kLogDebug | kLogSyslogWarn,
2175+
"FUSE: Passthrough enabled.");
2176+
} else {
2177+
LogCvmfs(kLogCvmfs, kLogDebug,
2178+
"FUSE: Passthrough enabled in build, available at runtime, but "
2179+
"not enabled by the config option.");
2180+
}
2181+
} else {
2182+
LogCvmfs(kLogCvmfs, kLogDebug | kLogSyslogWarn,
2183+
"FUSE: Passthrough enabled in build but unavailable at runtime.");
2184+
}
2185+
#else
2186+
LogCvmfs(kLogCvmfs, kLogDebug | kLogSyslogWarn,
2187+
"FUSE: Passthrough disabled in this build.");
2188+
#endif
20912189
}
20922190

20932191
static void cvmfs_destroy(void *unused __attribute__((unused))) {
20942192
// The debug log is already closed at this point
20952193
LogCvmfs(kLogCvmfs, kLogDebug, "cvmfs_destroy");
2194+
#ifdef FUSE_CAP_PASSTHROUGH
2195+
pthread_mutex_lock(&fuse_passthru_tracker_lock);
2196+
assert(fuse_passthru_tracker);
2197+
delete fuse_passthru_tracker;
2198+
fuse_passthru_tracker = NULL;
2199+
pthread_mutex_unlock(&fuse_passthru_tracker_lock);
2200+
#endif
20962201
}
20972202

20982203
/**

cvmfs/loader.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ struct CvmfsOptions {
6565
int simple_options_parsing;
6666
int foreground;
6767
int fuse_debug;
68+
int fuse_passthrough;
6869

6970
// Ignored options
7071
int ign_netdev;
@@ -98,6 +99,8 @@ static struct fuse_opt cvmfs_array_opts[] = {
9899
CVMFS_SWITCH("simple_options_parsing", simple_options_parsing),
99100
CVMFS_SWITCH("foreground", foreground),
100101
CVMFS_SWITCH("fuse_debug", fuse_debug),
102+
CVMFS_SWITCH("fuse_passthrough", fuse_passthrough),
103+
CVMFS_SWITCH("fuse_passthru", fuse_passthrough),
101104

102105
// Ignore these options
103106
CVMFS_SWITCH("_netdev", ign_netdev),
@@ -142,6 +145,7 @@ bool premounted_ = false;
142145
bool premount_fuse_ = true;
143146
bool disable_watchdog_ = false;
144147
bool simple_options_parsing_ = false;
148+
bool fuse_passthrough_ = false;
145149
void *library_handle_;
146150
Fence *fence_reload_;
147151
CvmfsExports *cvmfs_exports_;
@@ -170,8 +174,11 @@ static void Usage(const string &exename) {
170174
"before mounting (required for autofs)\n"
171175
" -o parse Parse and print cvmfs parameters\n"
172176
" -o cvmfs_suid Enable suid mode\n"
177+
" -o debug Enable debug to CVMFS_DEBUGLOG\n"
173178
" -o disable_watchdog Do not spawn a post mortem crash handler\n"
174179
" -o foreground Run in foreground\n"
180+
" -o fuse_passthrough Enables FUSE passthrough (read requests bypass userspace, improves performance)\n"
181+
" -o fuse_passthru Alias for fuse_passthrough\n"
175182
" -o libfuse=[2,3] Enforce a certain libfuse version\n"
176183
"Fuse mount options:\n"
177184
" -o allow_other allow access to other users\n"
@@ -420,6 +427,7 @@ static fuse_args *ParseCmdLine(int argc, char *argv[]) {
420427
if (cvmfs_options.fuse_debug) {
421428
fuse_opt_add_arg(mount_options, "-d");
422429
}
430+
fuse_passthrough_ = cvmfs_options.fuse_passthrough;
423431

424432
return mount_options;
425433
}
@@ -793,6 +801,12 @@ int FuseMain(int argc, char *argv[]) {
793801
loader_exports_->device_id = "0:0"; // initially unknown, set after mount
794802
loader_exports_->disable_watchdog = disable_watchdog_;
795803
loader_exports_->simple_options_parsing = simple_options_parsing_;
804+
loader_exports_->fuse_passthrough = fuse_passthrough_;
805+
if (options_manager->GetValue("CVMFS_FUSE_PASSTHROUGH", &parameter)) {
806+
// CVMFS_FUSE_PASSTHROUGH set to on in configs enables the feature.
807+
// Presence of mount option can also enable the feature (but not disable it).
808+
loader_exports_->fuse_passthrough |= options_manager->IsOn(parameter);
809+
}
796810
if (config_files_)
797811
loader_exports_->config_files = *config_files_;
798812
else

cvmfs/loader.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ struct LoaderExports {
162162
, foreground(false)
163163
, disable_watchdog(false)
164164
, simple_options_parsing(false)
165-
, fuse_channel_or_session(NULL) { }
165+
, fuse_channel_or_session(NULL)
166+
, fuse_passthrough(false)
167+
{ }
166168

167169
~LoaderExports() {
168170
for (unsigned i = 0; i < history.size(); ++i)
@@ -194,6 +196,8 @@ struct LoaderExports {
194196
// in order to work with both libfuse2 and libfuse3
195197
void **fuse_channel_or_session;
196198

199+
bool fuse_passthrough;
200+
197201
// Linux only, stores the major:minor internal mountpoint identifier
198202
// The identifier is read just after mount from /proc/self/mountinfo
199203
// If it cannot be determined (e.g. on macOS), device_id is "0:0".

test/src/108-fuse-passthru/main

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/sh
2+
cvmfs_test_name="fuse-passthru"
3+
cvmfs_test_autofs_on_startup=false
4+
cvmfs_test_suites=""
5+
6+
cleanup() {
7+
true
8+
}
9+
10+
cvmfs_run_test() {
11+
this_script_dir=$TEST_ROOT/src/108-fuse-passthru
12+
# We want strict mode here, to catch any failure.
13+
# But turning on errexit (set -e) inside a function has no effect.
14+
# So we exec our script to keep its error handling mode independent from test/run.sh.
15+
exe="$this_script_dir"/run_test
16+
echo Launching "$exe"
17+
exec "$exe" "$@"
18+
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
set -x
4+
5+
this_script_dir=$(dirname "$(readlink -f "$0")")
6+
if ! [[ -v TEST_ROOT ]]; then
7+
TEST_ROOT=$(readlink -f "$this_script_dir/../..")
8+
fi
9+
build_dir=$(dirname "$TEST_ROOT")
10+
11+
mkdir -p /var/{spool,run}/cvmfs/
12+
[[ -d /etc/cvmfs/default.d ]]
13+
cat > /etc/cvmfs/default.d/99-test.conf <<EOF
14+
CVMFS_USER=$(whoami)
15+
CVMFS_HTTP_PROXY=DIRECT
16+
CVMFS_AUTO_UPDATE=yes
17+
CVMFS_USE_FILE_CHUNKING=false
18+
CVMFS_QUOTA_LIMIT=-1
19+
EOF
20+
export CVMFS_TEST_USER=$(whoami)
21+
22+
rsyslogd &
23+
systemctl enable --now httpd
24+
25+
rm -rf /var/spool/cvmfs/* # just in case
26+
cp -a /etc/fstab{,.orig}
27+
> /etc/fstab
28+
29+
create_repo() {
30+
MOUNT_OPTS=$1
31+
EXTRA_CONFIG=$2
32+
export CVMFS_USE_FILE_CHUNKING=false
33+
mv /etc/fstab{,.bkp}
34+
cvmfs_server mkfs -o "$CVMFS_TEST_USER" "$CVMFS_TEST_REPO"
35+
export CVMFS_TEST_MOUNTPOINT=/cvmfs/$CVMFS_TEST_REPO
36+
cat "$CVMFS_TEST_MOUNTPOINT"/new_repository
37+
38+
pgrep cvmfs2
39+
umount /cvmfs/"$CVMFS_TEST_REPO"
40+
umount /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly
41+
sed -i /etc/fstab -e "s/,noauto /,simple_options_parsing,disable_watchdog,noauto,debug,$MOUNT_OPTS /"
42+
mv /etc/fstab{,.new}
43+
cat /etc/fstab.new /etc/fstab.bkp > /etc/fstab
44+
systemctl daemon-reload
45+
cat >> /etc/cvmfs/repositories.d/"$CVMFS_TEST_REPO"/client.conf <<-EOF
46+
CVMFS_HTTP_PROXY=DIRECT
47+
CVMFS_KCACHE_TIMEOUT=0
48+
CVMFS_MAX_TTL_SECS=1
49+
CVMFS_AUTO_UPDATE=yes
50+
CVMFS_DEBUGLOG=/var/log/$CVMFS_TEST_REPO.cvmfs-debug.log
51+
CVMFS_USYSLOG=/var/log/$CVMFS_TEST_REPO.cvmfs-usyslog.log
52+
$EXTRA_CONFIG
53+
EOF
54+
cat >> /etc/cvmfs/repositories.d/"$CVMFS_TEST_REPO"/server.conf <<-EOF
55+
CVMFS_USE_FILE_CHUNKING=false
56+
EOF
57+
58+
mount /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly
59+
mount /cvmfs/"$CVMFS_TEST_REPO"
60+
61+
cvmfs_server transaction "$CVMFS_TEST_REPO"
62+
echo HELLO > "$CVMFS_TEST_MOUNTPOINT"/hello.txt
63+
time dd status=progress bs=1M count=10 if=/dev/urandom of="$CVMFS_TEST_MOUNTPOINT"/big.bin
64+
pushd "$CVMFS_TEST_MOUNTPOINT"
65+
sha1sum * > CHECKSUMS
66+
popd
67+
68+
time cvmfs_server publish "$CVMFS_TEST_REPO"
69+
mount | grep "$CVMFS_TEST_REPO"
70+
cat "$CVMFS_TEST_MOUNTPOINT"/hello.txt
71+
grep HELLO "$CVMFS_TEST_MOUNTPOINT"/hello.txt
72+
73+
if ! ; then
74+
umount /cvmfs/"$CVMFS_TEST_REPO"
75+
umount /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly
76+
nohup valgrind \
77+
--tool=memcheck \
78+
--log-file=/var/log/"$CVMFS_TEST_REPO".valgrind.log \
79+
--enable-debuginfod=yes \
80+
--leak-check=full \
81+
--show-leak-kinds=all \
82+
--track-origins=yes \
83+
--trace-children=yes \
84+
--track-fds=yes \
85+
--show-error-list=all \
86+
--read-var-info=yes \
87+
--gen-suppressions=all \
88+
--suppressions=<(grep -v '^[=-]' "$build_dir"/valgrind.supp) \
89+
\
90+
/usr/bin/cvmfs2 -f -o \
91+
allow_other,fsname="$CVMFS_TEST_REPO",config=/etc/cvmfs/repositories.d/"$CVMFS_TEST_REPO"/client.conf:/var/spool/cvmfs/"$CVMFS_TEST_REPO"/client.local,disable_watchdog,simple_options_parsing,grab_mountpoint,uid=0,gid=0"$MOUNT_OPTS" \
92+
"$CVMFS_TEST_REPO" /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly \
93+
&
94+
while ! grep /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly /proc/mounts; do
95+
sleep 1
96+
done
97+
mount /cvmfs/"$CVMFS_TEST_REPO"
98+
fi
99+
100+
touch /tmp/"$CVMFS_TEST_REPO".big.bin.read.begin
101+
time dd status=progress bs=512 if="$CVMFS_TEST_MOUNTPOINT"/big.bin of=/dev/null
102+
touch /tmp/"$CVMFS_TEST_REPO".big.bin.read.end
103+
DURATION=$(echo -e "scale=10\n $(date +%s.%N --reference=/tmp/"$CVMFS_TEST_REPO".big.bin.read.end) - $(date +%s.%N --reference=/tmp/"$CVMFS_TEST_REPO".big.bin.read.begin)\n" | bc -q )
104+
echo "$DURATION" > /tmp/"$CVMFS_TEST_REPO".big.bin.read.duration
105+
106+
pushd "$CVMFS_TEST_MOUNTPOINT"
107+
time parallel -N0 "sha1sum --check --status CHECKSUMS" ::: {1..100}
108+
popd
109+
umount /cvmfs/"$CVMFS_TEST_REPO"
110+
umount /var/spool/cvmfs/"$CVMFS_TEST_REPO"/rdonly
111+
}
112+
113+
export CVMFS_TEST_REPO=test.repo.dumbdumb
114+
create_repo '' ''
115+
grep -F 'FUSE: Passthrough enabled in build, available at runtime, but not enabled by the config option.' /var/log/test.repo.dumbdumb.cvmfs-debug.log
116+
117+
export CVMFS_TEST_REPO=test.repo.passthru
118+
create_repo ,fuse_passthrough ''
119+
grep -F 'FUSE: Passthrough enabled.' /var/log/test.repo.passthru.cvmfs-debug.log
120+
121+
export CVMFS_TEST_REPO=test.repo.with-var
122+
create_repo '' 'CVMFS_FUSE_PASSTHROUGH=on'
123+
grep -F 'FUSE: Passthrough enabled.' /var/log/test.repo.with-var.cvmfs-debug.log
124+
125+
head /tmp/test.repo.{passthru,dumbdumb}.big.bin.read.duration

0 commit comments

Comments
 (0)