Skip to content

Commit 1394a9e

Browse files
Ben Iofeldavissp14
Ben Iofel
authored andcommitted
Ensure implementation is migration friendly
1 parent 31c3a48 commit 1394a9e

File tree

15 files changed

+246
-64
lines changed

15 files changed

+246
-64
lines changed

.github/workflows/push.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818

1919
- uses: actions/setup-go@v3
2020
with:
21-
go-version: '1.20'
21+
go-version: '1.23'
2222
go-version-file: 'go.mod'
2323
cache: true
2424

@@ -36,7 +36,7 @@ jobs:
3636

3737
- uses: actions/setup-go@v3
3838
with:
39-
go-version: '1.20'
39+
go-version: '1.23'
4040
go-version-file: 'go.mod'
4141
cache: true
4242

@@ -52,7 +52,7 @@ jobs:
5252

5353
- uses: actions/setup-go@v3
5454
with:
55-
go-version: '1.20'
55+
go-version: '1.23'
5656
go-version-file: 'go.mod'
5757
cache: true
5858

@@ -68,7 +68,7 @@ jobs:
6868

6969
- uses: actions/setup-go@v3
7070
with:
71-
go-version: '1.20'
71+
go-version: '1.23'
7272
go-version-file: 'go.mod'
7373
cache: true
7474

bin/restart-repmgrd

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
#!/bin/bash
22

3-
kill `cat /tmp/repmgrd.pid`
3+
if [ -f /tmp/repmgrd.pid ]; then
4+
PID=$(cat /tmp/repmgrd.pid)
5+
6+
# Check if the process is running
7+
if ps -p $PID > /dev/null 2>&1; then
8+
kill $PID
9+
fi
10+
fi

cmd/pg_unregister/main.go

+11-9
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ func main() {
2727

2828
func processUnregistration(ctx context.Context) error {
2929
encodedArg := os.Args[1]
30-
hostnameBytes, err := base64.StdEncoding.DecodeString(encodedArg)
30+
machineBytes, err := base64.StdEncoding.DecodeString(encodedArg)
3131
if err != nil {
32-
return fmt.Errorf("failed to decode hostname: %v", err)
32+
return fmt.Errorf("failed to decode machine: %v", err)
3333
}
3434

3535
node, err := flypg.NewNode()
@@ -43,21 +43,23 @@ func processUnregistration(ctx context.Context) error {
4343
}
4444
defer func() { _ = conn.Close(ctx) }()
4545

46-
member, err := node.RepMgr.MemberByHostname(ctx, conn, string(hostnameBytes))
46+
machineID := string(machineBytes)
47+
48+
if len(machineID) != 14 {
49+
return fmt.Errorf("invalid machine id: %s. (expected length 16, got %d)", machineID, len(machineBytes))
50+
}
51+
52+
member, err := node.RepMgr.MemberByNodeName(ctx, conn, machineID)
4753
if err != nil {
48-
return fmt.Errorf("failed to resolve member: %s", err)
54+
return fmt.Errorf("failed to resolve member using %s: %s", machineID, err)
4955
}
5056

5157
if err := node.RepMgr.UnregisterMember(*member); err != nil {
5258
return fmt.Errorf("failed to unregister member: %v", err)
5359
}
5460

5561
slotName := fmt.Sprintf("repmgr_slot_%d", member.ID)
56-
if err := removeReplicationSlot(ctx, conn, slotName); err != nil {
57-
return err
58-
}
59-
60-
return nil
62+
return removeReplicationSlot(ctx, conn, slotName)
6163
}
6264

6365
func removeReplicationSlot(ctx context.Context, conn *pgx.Conn, slotName string) error {

go.mod

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
module github.com/fly-apps/postgres-flex
22

3-
go 1.20
3+
go 1.23
44

55
require (
66
github.com/go-chi/chi/v5 v5.0.8
77
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
88
github.com/hashicorp/consul/api v1.18.0
99
github.com/jackc/pgconn v1.14.3
1010
github.com/jackc/pgx/v5 v5.5.4
11+
github.com/olekukonko/tablewriter v0.0.5
1112
github.com/pkg/errors v0.9.1
1213
github.com/pkg/term v1.1.0
14+
github.com/spf13/cobra v1.8.1
1315
github.com/superfly/fly-checks v0.0.0-20230510154016-d189351293f2
1416
golang.org/x/exp v0.0.0-20230105202349-8879d0199aa3
1517
golang.org/x/sync v0.1.0
@@ -36,8 +38,6 @@ require (
3638
github.com/mattn/go-runewidth v0.0.9 // indirect
3739
github.com/mitchellh/go-homedir v1.1.0 // indirect
3840
github.com/mitchellh/mapstructure v1.4.1 // indirect
39-
github.com/olekukonko/tablewriter v0.0.5 // indirect
40-
github.com/spf13/cobra v1.8.1 // indirect
4141
github.com/spf13/pflag v1.0.5 // indirect
4242
github.com/stretchr/objx v0.5.0 // indirect
4343
golang.org/x/crypto v0.20.0 // indirect

go.sum

+4
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ github.com/jackc/pgconn v1.14.3/go.mod h1:RZbme4uasqzybK2RK5c65VsHxoyaml09lx3tXO
6969
github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE=
7070
github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8=
7171
github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65 h1:DadwsjnMwFjfWc9y5Wi/+Zz7xoE5ALHsRQlOctkOiHc=
72+
github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak=
7273
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
7374
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
7475
github.com/jackc/pgproto3/v2 v2.3.3 h1:1HLSx5H+tXR9pW3in3zaztoEwQYRC9SQaYUHjTSUOag=
@@ -78,6 +79,7 @@ github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZ
7879
github.com/jackc/pgx/v5 v5.5.4 h1:Xp2aQS8uXButQdnCMWNmvx6UysWQQC+u1EoizjguY+8=
7980
github.com/jackc/pgx/v5 v5.5.4/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
8081
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
82+
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
8183
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
8284
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
8385
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@@ -139,6 +141,7 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
139141
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
140142
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
141143
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
144+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
142145
github.com/superfly/fly-checks v0.0.0-20230510154016-d189351293f2 h1:mZNiJSrmbQA/3+Vy8GLL/Q9qdHxPzjcxKv+E14GZLFs=
143146
github.com/superfly/fly-checks v0.0.0-20230510154016-d189351293f2/go.mod h1:BbqpB4y6Z/cijQqKWJ3i8LMsAoC29gzX6vsSD3Qq7uw=
144147
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@@ -154,6 +157,7 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
154157
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
155158
golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
156159
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
160+
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
157161
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
158162
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
159163
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=

internal/flypg/node.go

+65-3
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@ import (
1616
"github.com/fly-apps/postgres-flex/internal/privnet"
1717
"github.com/fly-apps/postgres-flex/internal/utils"
1818
"github.com/jackc/pgx/v5"
19+
"golang.org/x/exp/slices"
1920
)
2021

2122
type Node struct {
2223
AppName string
24+
MachineID string
2325
PrivateIP string
2426
PrimaryRegion string
2527
DataDir string
@@ -52,6 +54,8 @@ func NewNode() (*Node, error) {
5254

5355
node.PrivateIP = ipv6.String()
5456

57+
node.MachineID = os.Getenv("FLY_MACHINE_ID")
58+
5559
node.PrimaryRegion = os.Getenv("PRIMARY_REGION")
5660
if node.PrimaryRegion == "" {
5761
return nil, fmt.Errorf("PRIMARY_REGION environment variable must be set")
@@ -88,7 +92,9 @@ func NewNode() (*Node, error) {
8892
UserConfigPath: "/data/repmgr.user.conf",
8993
PasswordConfigPath: "/data/.pgpass",
9094
DataDir: node.DataDir,
95+
HostName: node.Hostname(),
9196
PrivateIP: node.PrivateIP,
97+
MachineID: node.MachineID,
9298
Port: 5433,
9399
DatabaseName: "repmgr",
94100
Credentials: node.ReplCredentials,
@@ -182,7 +188,7 @@ func (n *Node) Init(ctx context.Context) error {
182188
}
183189
} else {
184190
log.Println("Provisioning standby")
185-
cloneTarget, err := n.RepMgr.ResolveMemberOverDNS(ctx)
191+
cloneTarget, err := n.RepMgr.ResolvePrimaryOverDNS(ctx)
186192
if err != nil {
187193
return fmt.Errorf("failed to resolve member over dns: %s", err)
188194
}
@@ -265,7 +271,7 @@ func (n *Node) PostInit(ctx context.Context) error {
265271
return fmt.Errorf("failed to resolve member role: %s", err)
266272
}
267273

268-
// Restart repmgrd in the event the IP changes for an already registered node.
274+
// Restart repmgrd in the event the machine ID changes for an already registered node.
269275
// This can happen if the underlying volume is moved to a different node.
270276
daemonRestartRequired := n.RepMgr.daemonRestartRequired(member)
271277

@@ -311,6 +317,10 @@ func (n *Node) PostInit(ctx context.Context) error {
311317
}
312318
}
313319
case StandbyRoleName:
320+
if err := n.migrateNodeNameIfNeeded(ctx, repConn); err != nil {
321+
return fmt.Errorf("failed to migrate node name: %s", err)
322+
}
323+
314324
// Register existing standby to apply any configuration changes.
315325
if err := n.RepMgr.registerStandby(daemonRestartRequired); err != nil {
316326
return fmt.Errorf("failed to register existing standby: %s", err)
@@ -399,7 +409,7 @@ func (n *Node) PostInit(ctx context.Context) error {
399409
return fmt.Errorf("failed to enable repmgr: %s", err)
400410
}
401411

402-
primary, err := n.RepMgr.ResolveMemberOverDNS(ctx)
412+
primary, err := n.RepMgr.ResolvePrimaryOverDNS(ctx)
403413
if err != nil {
404414
return fmt.Errorf("failed to resolve primary member: %s", err)
405415
}
@@ -527,3 +537,55 @@ func (n *Node) handleRemoteRestore(ctx context.Context, store *state.Store) erro
527537

528538
return nil
529539
}
540+
541+
// migrate node name from 6pn to machine ID if needed
542+
func (n *Node) migrateNodeNameIfNeeded(ctx context.Context, repConn *pgx.Conn) error {
543+
primary, err := n.RepMgr.PrimaryMember(ctx, repConn)
544+
if err != nil {
545+
return fmt.Errorf("failed to resolve primary member when updating standby: %s", err)
546+
}
547+
548+
primaryConn, err := n.RepMgr.NewRemoteConnection(ctx, primary.Hostname)
549+
if err != nil {
550+
return fmt.Errorf("failed to establish connection to primary: %s", err)
551+
}
552+
defer func() { _ = primaryConn.Close(ctx) }()
553+
554+
rows, err := primaryConn.Query(ctx, "select application_name from pg_stat_replication")
555+
if err != nil {
556+
return fmt.Errorf("failed to query pg_stat_replication: %s", err)
557+
}
558+
defer rows.Close()
559+
560+
var applicationNames []string
561+
for rows.Next() {
562+
var applicationName string
563+
if err := rows.Scan(&applicationName); err != nil {
564+
return fmt.Errorf("failed to scan application_name: %s", err)
565+
}
566+
applicationNames = append(applicationNames, applicationName)
567+
}
568+
if err := rows.Err(); err != nil {
569+
return fmt.Errorf("failed to iterate over rows: %s", err)
570+
}
571+
572+
// if we find our 6pn as application_name, we need to regenerate postgresql.auto.conf and reload postgresql
573+
if slices.Contains(applicationNames, n.PrivateIP) {
574+
log.Printf("pg_stat_replication on the primary has our ipv6 address as application_name, converting to machine ID...")
575+
576+
if err := n.RepMgr.regenReplicationConf(ctx); err != nil {
577+
return fmt.Errorf("failed to clone standby: %s", err)
578+
}
579+
580+
if err := admin.ReloadPostgresConfig(ctx, repConn); err != nil {
581+
return fmt.Errorf("failed to reload postgresql: %s", err)
582+
}
583+
}
584+
585+
return nil
586+
}
587+
588+
// Hostname returns the hostname of the node.
589+
func (n *Node) Hostname() string {
590+
return fmt.Sprintf("%s.vm.%s.internal", n.MachineID, n.AppName)
591+
}

internal/flypg/readonly.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func BroadcastReadonlyChange(ctx context.Context, n *Node, enabled bool) error {
7070

7171
for _, member := range members {
7272
if member.Role == PrimaryRoleName {
73-
endpoint := fmt.Sprintf("http://[%s]:5500/%s", member.Hostname, target)
73+
endpoint := fmt.Sprintf("http://%s:5500/%s", member.Hostname, target)
7474
resp, err := http.Get(endpoint)
7575
if err != nil {
7676
log.Printf("[WARN] Failed to broadcast readonly state change to member %s: %s", member.Hostname, err)
@@ -85,7 +85,7 @@ func BroadcastReadonlyChange(ctx context.Context, n *Node, enabled bool) error {
8585
}
8686

8787
for _, member := range members {
88-
endpoint := fmt.Sprintf("http://[%s]:5500/%s", member.Hostname, RestartHaproxyEndpoint)
88+
endpoint := fmt.Sprintf("http://%s:5500/%s", member.Hostname, RestartHaproxyEndpoint)
8989
resp, err := http.Get(endpoint)
9090
if err != nil {
9191
log.Printf("[WARN] Failed to restart haproxy on member %s: %s", member.Hostname, err)

0 commit comments

Comments
 (0)