Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions cmd/svcinit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"os/signal"
"strconv"
"strings"
"sync"
"syscall"
"text/tabwriter"
"time"
Expand Down Expand Up @@ -140,6 +141,13 @@ func main() {
r, err := runner.New(ctx, serviceSpecs)
must(err)

mustStopAllForExit := sync.OnceValue(func() map[string]*os.ProcessState {
states, err := r.StopAll()
cancelFunc()
must(err)
return states
})

servicesErrCh := make(chan error, len(unversionedSpecs))

go func() {
Expand Down Expand Up @@ -167,10 +175,11 @@ func main() {
}()

criticalPath, err := r.StartAll(servicesErrCh)
if errors.Is(err, context.Canceled) {
_, err := r.StopAll()
must(err)
return
if err != nil {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: combine with the above? like

if err != nil {
    mustStopAll()
    if errors.Is(err, context.Canceled) {
        return
    }
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated! much cleaner

mustStopAllForExit()
if errors.Is(err, context.Canceled) {
return
}
}
must(err)

Expand Down Expand Up @@ -261,8 +270,7 @@ func main() {
select {
case <-ctx.Done():
log.Println("Shutting down services.")
_, err := r.StopAll()
must(err)
mustStopAllForExit()
log.Println("Cleaning up.")
return
case ibazelCmd := <-interactiveCh:
Expand Down Expand Up @@ -302,20 +310,21 @@ func main() {
if testErr != nil {
log.Printf("Encountered error during test run: %s\n", testErr)
if isOneShot {
mustStopAllForExit()
os.Exit(1)
}
}
case serviceErr := <-servicesErrCh:
log.Print(serviceErr)
if isOneShot {
mustStopAllForExit()
log.Fatal("Service exited uncleanly, marking test as failed.\n\n")
}
}

if isOneShot {
buf.WriteString("Target\tUser Time\tSystem Time\n")
states, err := r.StopAll()
must(err)
states := mustStopAllForExit()
for label, state := range states {
buf.WriteString(fmt.Sprintf("%s\t%s\t%s\n",
label, state.UserTime(), state.SystemTime()))
Expand Down Expand Up @@ -637,4 +646,3 @@ func buildTestEnv(ports svclib.Ports) ([]string, error) {

return baseEnv, nil
}

46 changes: 46 additions & 0 deletions tests/startup_failure/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
load("@rules_itest//:itest.bzl", "itest_service", "service_test")
load("@rules_shell//shell:sh_binary.bzl", "sh_binary")
load("//:must_fail.bzl", "must_fail")
load(":cleanup_failure_test.bzl", "cleanup_failure_test")

package(default_visibility = ["//visibility:public"])

NOT_WINDOWS = select({
"@platforms//os:windows": ["@platforms//:incompatible"],
"//conditions:default": [],
})

sh_binary(
name = "crash_immediately",
srcs = ["crash_immediately.sh"],
)

sh_binary(
name = "cleanup_on_sigterm",
srcs = ["cleanup_on_sigterm.sh"],
)

cleanup_failure_test(
name = "cleanup_on_startup_failure_test",
target_compatible_with = NOT_WINDOWS,

@bmcdonnel bmcdonnel May 26, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

marked this test as NOT_WINDOWS due to .sh script needed for testing failure cleanup behavior. but apparently, runner/pgroup_windows.go uses cmd.Process.Kill() which (i'm told) isn't really trappable anyway? So maybe this test isn't really doable on Windows ...

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i think that's ok, windows is very best-effort

test = ":dependent_startup_failure_test",
timeout = "short",
)

itest_service(
name = "immediate_exit_service",
autoassign_port = True,
Expand All @@ -26,3 +44,31 @@ must_fail(
test = "immediate_exit_service_hygiene_test",
timeout = "short",
)

itest_service(
name = "cleanup_service",
exe = ":cleanup_on_sigterm",
shutdown_timeout = "5s",
tags = ["manual"],
)

itest_service(
name = "dependent_immediate_exit_service",
autoassign_port = True,
deps = [":cleanup_service"],
env = {
"PORT": "$${PORT}",
},
exe = ":crash_immediately",
health_check_interval = "100ms",
http_health_check_address = "http://127.0.0.1:$${PORT}/health",
tags = ["manual"],
)

service_test(
name = "dependent_startup_failure_test",
services = [":dependent_immediate_exit_service"],
tags = ["manual"],
test = "@rules_itest//:exit0",
timeout = "short",
)
72 changes: 72 additions & 0 deletions tests/startup_failure/cleanup_failure_test.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
def _shell_quote(value):
return "'" + value.replace("'", "'\"'\"'") + "'"

def _cleanup_failure_test_impl(ctx):
test_env = ctx.attr.test[RunEnvironmentInfo].environment
test_path = ctx.executable.test.short_path

env_lines = [
"export {}={}".format(key, _shell_quote(value))
for key, value in sorted(test_env.items())
]

script = ctx.actions.declare_file(ctx.label.name + ".sh")
ctx.actions.write(
output = script,
is_executable = True,
content = """#!/usr/bin/env bash
set -euo pipefail

cleanup_marker="${{TEST_TMPDIR}}/startup_failure_cleanup_marker"
log_file="${{TEST_TMPDIR}}/dependent_startup_failure.log"
test_path="${{TEST_SRCDIR}}/${{TEST_WORKSPACE}}/{test_path}"

rm -f "${{cleanup_marker}}"

{env}

set +e
"${{test_path}}" >"${{log_file}}" 2>&1
status=$?
set -e

cat "${{log_file}}"

if [[ "${{status}}" -eq 0 ]]; then
echo "expected dependent startup failure test to fail" >&2
exit 1
fi

if [[ ! -f "${{cleanup_marker}}" ]]; then
echo "expected already-started service to receive shutdown before svcinit exited" >&2
exit 1
fi

echo "cleanup marker found: $(cat "${{cleanup_marker}}")"
""".format(
env = "\n".join(env_lines),
test_path = test_path,
),
)

runfiles = ctx.runfiles(files = [ctx.executable.test])
runfiles = runfiles.merge(ctx.attr.test.default_runfiles)

return [
DefaultInfo(
executable = script,
runfiles = runfiles,
),
]

cleanup_failure_test = rule(
implementation = _cleanup_failure_test_impl,
attrs = {
"test": attr.label(
executable = True,
cfg = "target",
providers = [RunEnvironmentInfo],
),
},
test = True,
)
16 changes: 16 additions & 0 deletions tests/startup_failure/cleanup_on_sigterm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail

cleanup_marker="${TEST_TMPDIR}/startup_failure_cleanup_marker"

cleanup() {
echo "cleanup service received shutdown"
echo "cleanup ran" >"${cleanup_marker}"
exit 0
}

trap cleanup TERM INT

while true; do
sleep 1
done
Loading