diff --git a/cmd/svcinit/main.go b/cmd/svcinit/main.go index c611fee..cc40ec1 100644 --- a/cmd/svcinit/main.go +++ b/cmd/svcinit/main.go @@ -16,6 +16,7 @@ import ( "os/signal" "strconv" "strings" + "sync" "syscall" "text/tabwriter" "time" @@ -140,6 +141,13 @@ func main() { r, err := runner.New(ctx, serviceSpecs) must(err) + mustStopAllForExit := sync.OnceValue(func() map[string]*os.ProcessState { + states, err := r.StopAll() + cancelFunc() + must(err) + return states + }) + servicesErrCh := make(chan error, len(unversionedSpecs)) go func() { @@ -167,10 +175,11 @@ func main() { }() criticalPath, err := r.StartAll(servicesErrCh) - if errors.Is(err, context.Canceled) { - _, err := r.StopAll() - must(err) - return + if err != nil { + mustStopAllForExit() + if errors.Is(err, context.Canceled) { + return + } } must(err) @@ -261,8 +270,7 @@ func main() { select { case <-ctx.Done(): log.Println("Shutting down services.") - _, err := r.StopAll() - must(err) + mustStopAllForExit() log.Println("Cleaning up.") return case ibazelCmd := <-interactiveCh: @@ -302,20 +310,21 @@ func main() { if testErr != nil { log.Printf("Encountered error during test run: %s\n", testErr) if isOneShot { + mustStopAllForExit() os.Exit(1) } } case serviceErr := <-servicesErrCh: log.Print(serviceErr) if isOneShot { + mustStopAllForExit() log.Fatal("Service exited uncleanly, marking test as failed.\n\n") } } if isOneShot { buf.WriteString("Target\tUser Time\tSystem Time\n") - states, err := r.StopAll() - must(err) + states := mustStopAllForExit() for label, state := range states { buf.WriteString(fmt.Sprintf("%s\t%s\t%s\n", label, state.UserTime(), state.SystemTime())) @@ -637,4 +646,3 @@ func buildTestEnv(ports svclib.Ports) ([]string, error) { return baseEnv, nil } - diff --git a/tests/startup_failure/BUILD.bazel b/tests/startup_failure/BUILD.bazel index 33093ae..02eb519 100644 --- a/tests/startup_failure/BUILD.bazel +++ b/tests/startup_failure/BUILD.bazel @@ -1,14 +1,32 @@ load("@rules_itest//:itest.bzl", "itest_service", "service_test") load("@rules_shell//shell:sh_binary.bzl", "sh_binary") load("//:must_fail.bzl", "must_fail") +load(":cleanup_failure_test.bzl", "cleanup_failure_test") package(default_visibility = ["//visibility:public"]) +NOT_WINDOWS = select({ + "@platforms//os:windows": ["@platforms//:incompatible"], + "//conditions:default": [], +}) + sh_binary( name = "crash_immediately", srcs = ["crash_immediately.sh"], ) +sh_binary( + name = "cleanup_on_sigterm", + srcs = ["cleanup_on_sigterm.sh"], +) + +cleanup_failure_test( + name = "cleanup_on_startup_failure_test", + target_compatible_with = NOT_WINDOWS, + test = ":dependent_startup_failure_test", + timeout = "short", +) + itest_service( name = "immediate_exit_service", autoassign_port = True, @@ -26,3 +44,31 @@ must_fail( test = "immediate_exit_service_hygiene_test", timeout = "short", ) + +itest_service( + name = "cleanup_service", + exe = ":cleanup_on_sigterm", + shutdown_timeout = "5s", + tags = ["manual"], +) + +itest_service( + name = "dependent_immediate_exit_service", + autoassign_port = True, + deps = [":cleanup_service"], + env = { + "PORT": "$${PORT}", + }, + exe = ":crash_immediately", + health_check_interval = "100ms", + http_health_check_address = "http://127.0.0.1:$${PORT}/health", + tags = ["manual"], +) + +service_test( + name = "dependent_startup_failure_test", + services = [":dependent_immediate_exit_service"], + tags = ["manual"], + test = "@rules_itest//:exit0", + timeout = "short", +) diff --git a/tests/startup_failure/cleanup_failure_test.bzl b/tests/startup_failure/cleanup_failure_test.bzl new file mode 100644 index 0000000..dab32b1 --- /dev/null +++ b/tests/startup_failure/cleanup_failure_test.bzl @@ -0,0 +1,72 @@ +def _shell_quote(value): + return "'" + value.replace("'", "'\"'\"'") + "'" + +def _cleanup_failure_test_impl(ctx): + test_env = ctx.attr.test[RunEnvironmentInfo].environment + test_path = ctx.executable.test.short_path + + env_lines = [ + "export {}={}".format(key, _shell_quote(value)) + for key, value in sorted(test_env.items()) + ] + + script = ctx.actions.declare_file(ctx.label.name + ".sh") + ctx.actions.write( + output = script, + is_executable = True, + content = """#!/usr/bin/env bash +set -euo pipefail + +cleanup_marker="${{TEST_TMPDIR}}/startup_failure_cleanup_marker" +log_file="${{TEST_TMPDIR}}/dependent_startup_failure.log" +test_path="${{TEST_SRCDIR}}/${{TEST_WORKSPACE}}/{test_path}" + +rm -f "${{cleanup_marker}}" + +{env} + +set +e +"${{test_path}}" >"${{log_file}}" 2>&1 +status=$? +set -e + +cat "${{log_file}}" + +if [[ "${{status}}" -eq 0 ]]; then + echo "expected dependent startup failure test to fail" >&2 + exit 1 +fi + +if [[ ! -f "${{cleanup_marker}}" ]]; then + echo "expected already-started service to receive shutdown before svcinit exited" >&2 + exit 1 +fi + +echo "cleanup marker found: $(cat "${{cleanup_marker}}")" +""".format( + env = "\n".join(env_lines), + test_path = test_path, + ), + ) + + runfiles = ctx.runfiles(files = [ctx.executable.test]) + runfiles = runfiles.merge(ctx.attr.test.default_runfiles) + + return [ + DefaultInfo( + executable = script, + runfiles = runfiles, + ), + ] + +cleanup_failure_test = rule( + implementation = _cleanup_failure_test_impl, + attrs = { + "test": attr.label( + executable = True, + cfg = "target", + providers = [RunEnvironmentInfo], + ), + }, + test = True, +) diff --git a/tests/startup_failure/cleanup_on_sigterm.sh b/tests/startup_failure/cleanup_on_sigterm.sh new file mode 100755 index 0000000..7e89465 --- /dev/null +++ b/tests/startup_failure/cleanup_on_sigterm.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +cleanup_marker="${TEST_TMPDIR}/startup_failure_cleanup_marker" + +cleanup() { + echo "cleanup service received shutdown" + echo "cleanup ran" >"${cleanup_marker}" + exit 0 +} + +trap cleanup TERM INT + +while true; do + sleep 1 +done