diff --git a/pkg/api/socket_windows_test.go b/pkg/api/socket_windows_test.go index b364b5d99a..2756f517c3 100644 --- a/pkg/api/socket_windows_test.go +++ b/pkg/api/socket_windows_test.go @@ -116,3 +116,24 @@ func TestCleanupUnixSocket_NamedPipe_NoOp(t *testing.T) { // cleanly. cleanupUnixSocket(`\\.\pipe\thv-api-cleanup-noop`) } + +// TestSetupUnixSocket_NamedPipe_FirstInstanceWins pins winio's first-wins +// semantics: ListenPipe sets FILE_FLAG_FIRST_PIPE_INSTANCE, so a second +// listener targeting the same name must fail. This is the safety net the +// discovery layer relies on to detect a stale-PID conflict; if a future winio +// bump silently relaxed it, two thv processes could bind the same pipe and +// quietly race for traffic. +func TestSetupUnixSocket_NamedPipe_FirstInstanceWins(t *testing.T) { + t.Parallel() + pipePath := uniqueTestPipe() + + first, err := setupUnixSocket(pipePath) + require.NoError(t, err) + t.Cleanup(func() { _ = first.Close() }) + + // setupUnixSocket returns (nil, err) on the named-pipe failure path, so + // no defensive Close is needed for the second listener. + _, err = setupUnixSocket(pipePath) + require.Error(t, err, "second ListenPipe on the same name must fail") + assert.Contains(t, err.Error(), "failed to create named pipe listener") +} diff --git a/pkg/server/discovery/discovery_test.go b/pkg/server/discovery/discovery_test.go index 1ca39a0ada..fadf0ec46a 100644 --- a/pkg/server/discovery/discovery_test.go +++ b/pkg/server/discovery/discovery_test.go @@ -54,6 +54,31 @@ func TestWriteReadServerInfo_UnixSocket(t *testing.T) { assert.Equal(t, info.Nonce, got.Nonce) } +// TestWriteReadServerInfo_NamedPipe pins the producer/consumer contract for +// npipe:// discovery URLs end to end. The individual pieces (socketURL emit, +// HTTPClientForURL dispatch, ParseNamedPipeURL parse) are covered in their +// own tests; this test asserts that an npipe URL survives the discovery +// file's JSON serialization round-trip without being mangled. +func TestWriteReadServerInfo_NamedPipe(t *testing.T) { + t.Parallel() + dir := t.TempDir() + + info := &ServerInfo{ + URL: "npipe://thv-api", + PID: 99999, + Nonce: "test-nonce-pipe", + StartedAt: time.Date(2026, 5, 7, 14, 0, 0, 0, time.UTC), + } + + require.NoError(t, writeServerInfoTo(dir, info)) + + got, err := readServerInfoFrom(dir) + require.NoError(t, err) + assert.Equal(t, info.URL, got.URL) + assert.Equal(t, info.PID, got.PID) + assert.Equal(t, info.Nonce, got.Nonce) +} + func TestReadServerInfo_NotFound(t *testing.T) { t.Parallel() dir := t.TempDir() diff --git a/pkg/server/discovery/health_windows_test.go b/pkg/server/discovery/health_windows_test.go index bc7c1edc5b..552fe18208 100644 --- a/pkg/server/discovery/health_windows_test.go +++ b/pkg/server/discovery/health_windows_test.go @@ -8,9 +8,12 @@ package discovery import ( "context" "fmt" + "net" "net/http" + "sync" "sync/atomic" "testing" + "time" "github.com/Microsoft/go-winio" "github.com/stretchr/testify/assert" @@ -51,3 +54,65 @@ func TestCheckHealth_NamedPipe_NotFound(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "health check failed") } + +// TestCheckHealth_NamedPipe_HungServerCancelsOnContext pins that a peer which +// accepts the connection but never responds does not wedge CheckHealth: when +// the caller's context expires the dial / read returns and CheckHealth surfaces +// a wrapped error. This is the discovery StateUnhealthy path; without this +// guarantee a hung peer would block the previous-instance probe forever. +func TestCheckHealth_NamedPipe_HungServerCancelsOnContext(t *testing.T) { + t.Parallel() + + pipeName := fmt.Sprintf("thv-test-hung-%d", pipeNameSeq.Add(1)) + pipePath := `\\.\pipe\` + pipeName + + listener, err := winio.ListenPipe(pipePath, &winio.PipeConfig{}) + require.NoError(t, err) + t.Cleanup(func() { _ = listener.Close() }) + + // Collect accepted conns under a mutex so the background goroutine never + // touches t after the test body returns. t.Cleanup must be registered + // from the test goroutine, not from the accept loop, otherwise a late + // Accept could race with test teardown ("Log in goroutine after Test + // has completed"). + var ( + connsMu sync.Mutex + conns []net.Conn + ) + t.Cleanup(func() { + connsMu.Lock() + defer connsMu.Unlock() + for _, c := range conns { + _ = c.Close() + } + }) + + // Drain accepts so the dial succeeds, but never write anything back. The + // goroutine exits when the listener is closed via t.Cleanup above. + go func() { + for { + conn, acceptErr := listener.Accept() + if acceptErr != nil { + return + } + // Hold the connection open without responding so CheckHealth's + // HTTP read blocks until the context deadline fires. + connsMu.Lock() + conns = append(conns, conn) + connsMu.Unlock() + } + }() + + ctx, cancel := context.WithTimeout(context.Background(), 250*time.Millisecond) + defer cancel() + + start := time.Now() + err = CheckHealth(ctx, "npipe://"+pipeName, "") + elapsed := time.Since(start) + + require.Error(t, err) + // The CheckHealth call must return promptly after the context expires + // rather than blocking on the hung peer indefinitely. healthTimeout is + // 5 s, so anything within ~2 s of the 250 ms ctx is the context path. + assert.Less(t, elapsed, 2*time.Second, "CheckHealth wedged on a hung peer") +}