Skip to content

Commit b25ec71

Browse files
committed
Fix PAUSED → RUNNING not stepping dependent services
When a service resumes from PAUSED to RUNNING during reload its pid condition is reasserted using cond_set_path(), which only updates the condition file. Unlike cond_set(), it does not call cond_update(), so services further down the dependency chain are never stepped and remain stuck in PAUSED with cond:flux. Example: in a chain A → B → C → D, touching B and reloading causes C to go PAUSED → RUNNING when pid/B is reasserted by the pidfile plugin (via cond_set). But C's own reassertion of pid/C used cond_set_path(), so D was never notified and stayed PAUSED. This bug has been present since dddd45e (Finit 3.2-rc1, 2018), which added the reassertion but used the lower-level function. Fixes #476 Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
1 parent ab81272 commit b25ec71

File tree

2 files changed

+32
-16
lines changed

2 files changed

+32
-16
lines changed

src/service.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3154,7 +3154,7 @@ int service_step(svc_t *svc)
31543154

31553155
mkcond(svc, name, sizeof(name));
31563156
dbg("Reassert condition %s", name);
3157-
cond_set_path(cond_path(name), COND_ON);
3157+
cond_set(name);
31583158
}
31593159

31603160
dbg("Reassert %s ready condition", svc_ident(svc, NULL, 0));

test/dep-chain-reload.sh

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
#!/bin/sh
22
# Verify transitive dependency chain during reload and crash
33
#
4-
# Three services in a chain: A → B → C, where B depends on pid/A
5-
# and C depends on pid/B. B is placed in a sub-config file so
6-
# we can use 'initctl touch' on it. B uses <!pid/svc_a> (with
7-
# leading '!') so it does not support SIGHUP, causing a full
8-
# stop/start cycle on 'initctl touch svc_b.conf' + reload.
4+
# Four services in a chain: A → B → C → D, where B depends on
5+
# pid/A and C depends on pid/B and D depends on pid/C. B is
6+
# placed in a sub-config file so we can use 'initctl touch' on
7+
# it. B uses <!pid/svc_a> (with leading '!') so it does not
8+
# support SIGHUP, causing a full stop/start cycle on 'initctl
9+
# touch svc_b.conf' + reload.
910
#
1011
# Test 1 - touch + reload:
1112
# After 'initctl touch svc_b.conf' + 'initctl reload':
1213
# - A should be unaffected (same PID)
1314
# - B is restarted (config was touched, noreload)
14-
# - C must be restarted (transitive, depends on pid/B)
15+
# - C goes PAUSED (pid/B in flux), then PAUSED → RUNNING
16+
# once B reasserts pid/B via the pidfile plugin
17+
# - D must also resume: C's PAUSED → RUNNING handler must
18+
# call cond_set() (not just cond_set_path()) so that
19+
# cond_update() steps D out of PAUSED
1520
#
1621
# Test 2 - crash (kill -9):
1722
# When B is killed with SIGKILL the crash path (RUNNING →
@@ -20,7 +25,7 @@
2025
# IN_CLOSE_WRITE, so neither the pidfile removal (IN_DELETE)
2126
# nor a pidfile touch (IN_ATTRIB) triggers an inotify event.
2227
# Without the fix in service_cleanup(), pid/B is never
23-
# invalidated and C is never restarted.
28+
# invalidated and C (and D) are never restarted.
2429

2530
set -eu
2631

@@ -42,8 +47,11 @@ test_setup()
4247
run "cat >> $FINIT_CONF" <<EOF
4348
service log:stdout notify:pid name:svc_a serv -np -i svc_a -- Chain root
4449
service log:stdout notify:pid <pid/svc_b> name:svc_c serv -np -i svc_c -- Needs B
50+
service log:stdout notify:pid <pid/svc_c> name:svc_d serv -np -i svc_d -- Needs C
51+
EOF
52+
run "cat >> $FINIT_RCSD/svc_b.conf" <<EOF
53+
service log:stdout notify:pid <!pid/svc_a> name:svc_b serv -np -i svc_b -- Needs A
4554
EOF
46-
run "echo 'service log:stdout notify:pid <!pid/svc_a> name:svc_b serv -np -i svc_b -- Needs A' > $FINIT_RCSD/svc_b.conf"
4755
}
4856

4957
# shellcheck source=/dev/null
@@ -57,7 +65,7 @@ say "Reload Finit to start all services"
5765
run "initctl reload"
5866

5967
say "Wait for full chain to start"
60-
retry 'assert_status "svc_c" "running"' 10 1
68+
retry 'assert_status "svc_d" "running"' 10 1
6169

6270
run "initctl status"
6371
run "initctl cond dump"
@@ -70,28 +78,32 @@ sep "Test 1: Touch B and global reload"
7078
pid_a=$(pidof svc_a)
7179
pid_b=$(pidof svc_b)
7280
pid_c=$(pidof svc_c)
73-
say "PIDs before: A=$pid_a B=$pid_b C=$pid_c"
81+
pid_d=$(pidof svc_d)
82+
say "PIDs before: A=$pid_a B=$pid_b C=$pid_c D=$pid_d"
7483

7584
run "initctl touch svc_b.conf"
7685
run "initctl reload"
7786

7887
say "Wait for chain to settle"
79-
retry 'assert_status "svc_c" "running"' 15 1
88+
retry 'assert_status "svc_d" "running"' 15 1
8089

8190
run "initctl status"
8291
run "initctl cond dump"
8392

8493
new_pid_a=$(pidof svc_a)
8594
new_pid_b=$(pidof svc_b)
8695
new_pid_c=$(pidof svc_c)
87-
say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c"
96+
new_pid_d=$(pidof svc_d)
97+
say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c D=$new_pid_d"
8898

8999
# shellcheck disable=SC2086
90100
assert "A was not restarted" $new_pid_a -eq $pid_a
91101
# shellcheck disable=SC2086
92102
assert "B was restarted (touched)" $new_pid_b -ne $pid_b
93103
# shellcheck disable=SC2086
94104
assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c
105+
# shellcheck disable=SC2086
106+
assert "D was restarted (transitive dep)" $new_pid_d -ne $pid_d
95107

96108
# ――――――――――――――――――――――――――――――――――――――――――――――――――――――
97109
# Test 2: crash (kill -9), bypasses STOPPING
@@ -100,23 +112,27 @@ sep "Test 2: Kill B with SIGKILL (bypasses STOPPING)"
100112

101113
pid_b=$(pidof svc_b)
102114
pid_c=$(pidof svc_c)
103-
say "PIDs before: B=$pid_b C=$pid_c"
115+
pid_d=$(pidof svc_d)
116+
say "PIDs before: B=$pid_b C=$pid_c D=$pid_d"
104117

105118
run "kill -9 $pid_b"
106119

107120
say "Wait for B to respawn and chain to settle"
108-
retry 'assert_status "svc_c" "running"' 15 1
121+
retry 'assert_status "svc_d" "running"' 15 1
109122

110123
run "initctl status"
111124
run "initctl cond dump"
112125

113126
new_pid_b=$(pidof svc_b)
114127
new_pid_c=$(pidof svc_c)
115-
say "PIDs after: B=$new_pid_b C=$new_pid_c"
128+
new_pid_d=$(pidof svc_d)
129+
say "PIDs after: B=$new_pid_b C=$new_pid_c D=$new_pid_d"
116130

117131
# shellcheck disable=SC2086
118132
assert "B was restarted (crashed+respawn)" $new_pid_b -ne $pid_b
119133
# shellcheck disable=SC2086
120134
assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c
135+
# shellcheck disable=SC2086
136+
assert "D was restarted (transitive dep)" $new_pid_d -ne $pid_d
121137

122138
return 0

0 commit comments

Comments
 (0)