Skip to content

Commit 53b1425

Browse files
authored
Freeport 2.0 (temporalio#6966)
## What changed? <!-- Describe what has changed in this PR --> New freeport implementation; taken from battle-tested Temporal projects. ## Why? <!-- Tell your future self why have you made these changes --> Follow-up to temporalio#6915; making it simpler and more robust. ## How did you test it? <!-- How have you verified this change? Tested locally? Added a unit test? Checked in staging env? --> - [x] works on Linux (ie CI) - [x] works on Mac (ie my computer) [First CI run](https://github.com/temporalio/temporal/actions/runs/12269035797/attempts/1?pr=6966), `TestNexusCallbackReplicated` failed with `Error while dialing: dial tcp 127.0.0.1:33279: connect: connection refused`. Looks like there was an unrelated data race (that I've reported to Slack). [Second CI run](https://github.com/temporalio/temporal/actions/runs/12269035797/attempts/2?pr=6966) passed. [Third CI run](https://github.com/temporalio/temporal/actions/runs/12269035797?pr=6966), Versioning suite seems to have [timed out](https://github.com/temporalio/temporal/actions/runs/12269035797/job/34233452135?pr=6966#step:8:966). Unrelated. So it seems to be fine from a random port perspective. ## Potential risks <!-- Assuming the worst case, what can be broken when deploying this change to production? --> ## Documentation <!-- Have you made sure this change doesn't falsify anything currently stated in `docs/`? If significant new behavior is added, have you described that in `docs/`? --> ## Is hotfix candidate? <!-- Is this PR a hotfix candidate or does it require a notification to be sent to the broader community? (Yes/No) -->
1 parent 09a61bf commit 53b1425

File tree

10 files changed

+135
-140
lines changed

10 files changed

+135
-140
lines changed

common/nexus/nexustest/server.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,11 @@ import (
3232

3333
"github.com/nexus-rpc/sdk-go/nexus"
3434
"github.com/stretchr/testify/require"
35-
"go.temporal.io/server/internal/temporalite"
35+
"go.temporal.io/server/internal/freeport"
3636
)
3737

38-
func AllocListenAddress(t *testing.T) string {
39-
pp := temporalite.NewPortProvider()
40-
listenAddr := fmt.Sprintf("localhost:%d", pp.MustGetFreePort())
41-
require.NoError(t, pp.Close())
42-
return listenAddr
38+
func AllocListenAddress() string {
39+
return fmt.Sprintf("localhost:%d", freeport.MustGetFreePort())
4340
}
4441

4542
func NewNexusServer(t *testing.T, listenAddr string, handler nexus.Handler) {

components/nexusoperations/executors_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ func TestProcessInvocationTask(t *testing.T) {
417417
t.Run(tc.name, func(t *testing.T) {
418418
t.Parallel()
419419
ctrl := gomock.NewController(t)
420-
listenAddr := nexustest.AllocListenAddress(t)
420+
listenAddr := nexustest.AllocListenAddress()
421421
h := handler{}
422422
h.OnStartOperation = func(
423423
ctx context.Context,
@@ -716,7 +716,7 @@ func TestProcessCancelationTask(t *testing.T) {
716716
t.Run(tc.name, func(t *testing.T) {
717717
t.Parallel()
718718
ctrl := gomock.NewController(t)
719-
listenAddr := nexustest.AllocListenAddress(t)
719+
listenAddr := nexustest.AllocListenAddress()
720720
h := handler{}
721721
h.OnCancelOperation = tc.onCancelOperation
722722
nexustest.NewNexusServer(t, listenAddr, h)

internal/freeport/freeport.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// The MIT License
2+
//
3+
// Copyright (c) 2024 Temporal Technologies Inc. All rights reserved.
4+
//
5+
// Permission is hereby granted, free of charge, to any person obtaining a copy
6+
// of this software and associated documentation files (the "Software"), to deal
7+
// in the Software without restriction, including without limitation the rights
8+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
// copies of the Software, and to permit persons to whom the Software is
10+
// furnished to do so, subject to the following conditions:
11+
//
12+
// The above copyright notice and this permission notice shall be included in
13+
// all copies or substantial portions of the Software.
14+
//
15+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
// THE SOFTWARE.
22+
23+
package freeport
24+
25+
import (
26+
"fmt"
27+
"net"
28+
"runtime"
29+
)
30+
31+
// MustGetFreePort returns a TCP port that is available to listen on,
32+
// for the given (local) host.
33+
//
34+
// This works by binding a new TCP socket on port 0, which requests the OS to
35+
// allocate a free port. There is no strict guarantee that the port will remain
36+
// available after this function returns, but it should be safe to assume that
37+
// a given port will not be allocated again to any process on this machine
38+
// within a few seconds.
39+
//
40+
// On Unix-based systems, binding to the port returned by this function requires
41+
// setting the `SO_REUSEADDR` socket option (Go already does that by default,
42+
// but other languages may not); otherwise, the OS may fail with a message such
43+
// as "address already in use". Windows default behavior is already appropriate
44+
// in this regard; on that platform, `SO_REUSEADDR` has a different meaning and
45+
// should not be set (setting it may have unpredictable consequences).
46+
func MustGetFreePort() int {
47+
port, err := getFreePort("127.0.0.1")
48+
if err != nil {
49+
// try ipv6
50+
port, err = getFreePort("[::1]")
51+
if err != nil {
52+
panic(fmt.Errorf("failed assigning ephemeral port: %w", err))
53+
}
54+
}
55+
return port
56+
}
57+
58+
func getFreePort(host string) (int, error) {
59+
l, err := net.Listen("tcp", host+":0")
60+
if err != nil {
61+
return 0, fmt.Errorf("failed to assign a free port: %v", err)
62+
}
63+
defer l.Close()
64+
port := l.Addr().(*net.TCPAddr).Port
65+
66+
// On Linux and some BSD variants, ephemeral ports are randomized, and may
67+
// consequently repeat within a short time frame after the listening end
68+
// has been closed. To avoid this, we make a connection to the port, then
69+
// close that connection from the server's side (this is very important),
70+
// which puts the connection in TIME_WAIT state for some time (by default,
71+
// 60s on Linux). While it remains in that state, the OS will not reallocate
72+
// that port number for bind(:0) syscalls, yet we are not prevented from
73+
// explicitly binding to it (thanks to SO_REUSEADDR).
74+
//
75+
// On macOS and Windows, the above technique is not necessary, as the OS
76+
// allocates ephemeral ports sequentially, meaning a port number will only
77+
// be reused after the entire range has been exhausted. Quite the opposite,
78+
// given that these OSes use a significantly smaller range for ephemeral
79+
// ports, making an extra connection just to reserve a port might actually
80+
// be harmful (by hastening ephemeral port exhaustion).
81+
if runtime.GOOS != "darwin" && runtime.GOOS != "windows" {
82+
r, err := net.DialTCP("tcp", nil, l.Addr().(*net.TCPAddr))
83+
if err != nil {
84+
return 0, fmt.Errorf("failed to assign a free port: %v", err)
85+
}
86+
c, err := l.Accept()
87+
if err != nil {
88+
return 0, fmt.Errorf("failed to assign a free port: %v", err)
89+
}
90+
// Closing the socket from the server side
91+
_ = c.Close()
92+
defer r.Close()
93+
}
94+
95+
return port, nil
96+
}

internal/temporalite/freeport.go

Lines changed: 0 additions & 75 deletions
This file was deleted.

internal/temporalite/lite_server.go

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
"go.temporal.io/server/common/log"
4949
"go.temporal.io/server/common/metrics"
5050
sqliteplugin "go.temporal.io/server/common/persistence/sql/sqlplugin/sqlite"
51+
"go.temporal.io/server/internal/freeport"
5152
"go.temporal.io/server/schema/sqlite"
5253
"go.temporal.io/server/temporal"
5354
expmaps "golang.org/x/exp/maps"
@@ -96,7 +97,7 @@ type LiteServerConfig struct {
9697
SearchAttributes map[string]enums.IndexedValueType
9798
}
9899

99-
func (cfg *LiteServerConfig) apply(serverConfig *config.Config, provider *PortProvider) {
100+
func (cfg *LiteServerConfig) apply(serverConfig *config.Config) {
100101
sqliteConfig := config.SQL{
101102
PluginName: sqliteplugin.PluginName,
102103
ConnectAttributes: make(map[string]string),
@@ -117,12 +118,12 @@ func (cfg *LiteServerConfig) apply(serverConfig *config.Config, provider *PortPr
117118
}
118119

119120
if cfg.FrontendPort == 0 {
120-
cfg.FrontendPort = provider.MustGetFreePort()
121+
cfg.FrontendPort = freeport.MustGetFreePort()
121122
}
122123
if cfg.MetricsPort == 0 {
123-
cfg.MetricsPort = provider.MustGetFreePort()
124+
cfg.MetricsPort = freeport.MustGetFreePort()
124125
}
125-
pprofPort := provider.MustGetFreePort()
126+
pprofPort := freeport.MustGetFreePort()
126127

127128
serverConfig.Global.Membership = config.Membership{
128129
MaxJoinDuration: 30 * time.Second,
@@ -160,10 +161,10 @@ func (cfg *LiteServerConfig) apply(serverConfig *config.Config, provider *PortPr
160161
Policy: "noop",
161162
}
162163
serverConfig.Services = map[string]config.Service{
163-
"frontend": cfg.mustGetService(0, provider),
164-
"history": cfg.mustGetService(1, provider),
165-
"matching": cfg.mustGetService(2, provider),
166-
"worker": cfg.mustGetService(3, provider),
164+
"frontend": cfg.mustGetService(0),
165+
"history": cfg.mustGetService(1),
166+
"matching": cfg.mustGetService(2),
167+
"worker": cfg.mustGetService(3),
167168
}
168169
serverConfig.Archival = config.Archival{
169170
History: config.HistoryArchival{
@@ -244,11 +245,7 @@ func NewLiteServer(liteConfig *LiteServerConfig, opts ...temporal.ServerOption)
244245
return nil, err
245246
}
246247

247-
p := NewPortProvider()
248-
liteConfig.apply(liteConfig.BaseConfig, p)
249-
if err := p.Close(); err != nil {
250-
return nil, err
251-
}
248+
liteConfig.apply(liteConfig.BaseConfig)
252249

253250
sqlConfig := liteConfig.BaseConfig.Persistence.DataStores[sqliteplugin.PluginName].SQL
254251

@@ -379,19 +376,19 @@ func getAllowedPragmas() []string {
379376
return allowedPragmaList
380377
}
381378

382-
func (cfg *LiteServerConfig) mustGetService(frontendPortOffset int, provider *PortProvider) config.Service {
379+
func (cfg *LiteServerConfig) mustGetService(frontendPortOffset int) config.Service {
383380
svc := config.Service{
384381
RPC: config.RPC{
385382
GRPCPort: cfg.FrontendPort + frontendPortOffset,
386-
MembershipPort: provider.MustGetFreePort(),
383+
MembershipPort: freeport.MustGetFreePort(),
387384
BindOnLocalHost: true,
388385
BindOnIP: "",
389386
},
390387
}
391388

392389
// Assign any open port when configured to use dynamic ports
393390
if frontendPortOffset != 0 {
394-
svc.RPC.GRPCPort = provider.MustGetFreePort()
391+
svc.RPC.GRPCPort = freeport.MustGetFreePort()
395392
}
396393

397394
// Optionally bind frontend to IPv4 address

tests/callbacks_test.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ import (
4646
"go.temporal.io/sdk/workflow"
4747
"go.temporal.io/server/common/dynamicconfig"
4848
"go.temporal.io/server/components/callbacks"
49-
"go.temporal.io/server/internal/temporalite"
49+
"go.temporal.io/server/internal/freeport"
5050
"go.temporal.io/server/tests/testcore"
5151
"google.golang.org/protobuf/types/known/durationpb"
5252
)
@@ -250,7 +250,6 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() {
250250
Namespace: s.Namespace(),
251251
})
252252
s.NoError(err)
253-
pp := temporalite.NewPortProvider()
254253

255254
taskQueue := testcore.RandomizeStr(s.T().Name())
256255
workflowType := "test"
@@ -259,8 +258,7 @@ func (s *CallbacksSuite) TestWorkflowNexusCallbacks_CarriedOver() {
259258
requestCh: make(chan *nexus.CompletionRequest, 1),
260259
requestCompleteCh: make(chan error, 1),
261260
}
262-
callbackAddress := fmt.Sprintf("localhost:%d", pp.MustGetFreePort())
263-
s.NoError(pp.Close())
261+
callbackAddress := fmt.Sprintf("localhost:%d", freeport.MustGetFreePort())
264262
shutdownServer := s.runNexusCompletionHTTPServer(ch, callbackAddress)
265263
t.Cleanup(func() {
266264
require.NoError(t, shutdownServer())
@@ -349,16 +347,14 @@ func (s *CallbacksSuite) TestNexusResetWorkflowWithCallback() {
349347
Namespace: s.Namespace(),
350348
})
351349
s.NoError(err)
352-
pp := temporalite.NewPortProvider()
353350

354351
taskQueue := testcore.RandomizeStr(s.T().Name())
355352

356353
ch := &completionHandler{
357354
requestCh: make(chan *nexus.CompletionRequest, 1),
358355
requestCompleteCh: make(chan error, 1),
359356
}
360-
callbackAddress := fmt.Sprintf("localhost:%d", pp.MustGetFreePort())
361-
s.NoError(pp.Close())
357+
callbackAddress := fmt.Sprintf("localhost:%d", freeport.MustGetFreePort())
362358
shutdownServer := s.runNexusCompletionHTTPServer(ch, callbackAddress)
363359
s.T().Cleanup(func() {
364360
require.NoError(s.T(), shutdownServer())

0 commit comments

Comments
 (0)