Skip to content

Commit ca76922

Browse files
committed
PMM-14267 Fix persistence of changed listen ports
1 parent ae3203a commit ca76922

File tree

15 files changed

+419
-9
lines changed

15 files changed

+419
-9
lines changed

.mockery.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
disable-version-string: True
33
with-expecter: False
44
resolve-type-alias: False
5+
issue-845-fix: True
56
inpackage: True
67
dir: "{{.InterfaceDir}}"
78
filename: "mock_{{ .InterfaceName | snakecase }}_test.go"

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ replace golang.org/x/crypto => github.com/Percona-Lab/crypto v0.0.0-202512191250
1414
require (
1515
github.com/AlekSi/pointer v1.2.0
1616
github.com/ClickHouse/clickhouse-go/v2 v2.43.0
17-
github.com/DATA-DOG/go-sqlmock v1.5.0
17+
github.com/DATA-DOG/go-sqlmock v1.5.2
1818
github.com/alecthomas/kingpin/v2 v2.4.0
1919
github.com/alecthomas/kong v1.13.0
2020
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b

go.sum

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ github.com/ClickHouse/clickhouse-go v1.4.3 h1:iAFMa2UrQdR5bHJ2/yaSLffZkxpcOYQMCU
3535
github.com/ClickHouse/clickhouse-go v1.4.3/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI=
3636
github.com/ClickHouse/clickhouse-go/v2 v2.43.0 h1:fUR05TrF1GyvLDa/mAQjkx7KbgwdLRffs2n9O3WobtE=
3737
github.com/ClickHouse/clickhouse-go/v2 v2.43.0/go.mod h1:o6jf7JM/zveWC/PP277BLxjHy5KjnGX/jfljhM4s34g=
38-
github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
39-
github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
38+
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
39+
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
4040
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
4141
github.com/HdrHistogram/hdrhistogram-go v1.2.0 h1:XMJkDWuz6bM9Fzy7zORuVFKH7ZJY41G2q8KWhVGkNiY=
4242
github.com/HdrHistogram/hdrhistogram-go v1.2.0/go.mod h1:CiIeGiHSd06zjX+FypuEJ5EQ07KKtxZ+8J6hszwVQig=
@@ -373,6 +373,7 @@ github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXw
373373
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
374374
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
375375
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
376+
github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
376377
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
377378
github.com/klauspost/compress v1.18.3 h1:9PJRvfbmTabkOX8moIpXPbMMbYN60bWImDDU7L+/6zw=
378379
github.com/klauspost/compress v1.18.3/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=

managed/AGENT.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ db.InTransactionContext(ctx, nil, func(tx *reform.TX) error {
3636
- `*_helpers.go` files contain CRUD operations
3737
- Always use `reform.Querier` parameter, not concrete types
3838
- Check for `reform.ErrNoRows` explicitly
39+
- Use models.Find...() to access data, not q.Reload() or q.Select() directly
3940

4041
### Service Architecture
4142

managed/services/agents/deps.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ import (
3232
// FIXME Rename to victoriaMetrics.Service, update tests.
3333
type prometheusService interface {
3434
RequestConfigurationUpdate()
35+
// ForceConfigurationUpdate triggers immediate synchronous configuration update,
36+
// bypassing the batch delay. Use this for critical updates like port changes.
37+
ForceConfigurationUpdate(ctx context.Context) error
3538
BuildScrapeConfigForVMAgent(ctx context.Context, pmmAgentID string) ([]byte, error)
3639
}
3740

managed/services/agents/handler.go

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package agents
1717

1818
import (
1919
"context"
20+
"fmt"
2021
"runtime/pprof"
2122
"strings"
2223
"time"
@@ -173,8 +174,10 @@ func (h *Handler) Run(stream agentv1.AgentService_ConnectServer) error {
173174

174175
func (h *Handler) stateChanged(ctx context.Context, req *agentv1.StateChangedRequest) error {
175176
var PMMAgentID string
177+
var portsChanged bool
178+
l := logger.Get(ctx)
176179

177-
errTX := h.db.InTransaction(func(tx *reform.TX) error {
180+
errTX := h.db.InTransactionContext(ctx, nil, func(tx *reform.TX) error {
178181
var agentIDs []string
179182
var err error
180183
req.AgentId = strings.TrimPrefix(req.AgentId, "/agent_id/")
@@ -184,6 +187,13 @@ func (h *Handler) stateChanged(ctx context.Context, req *agentv1.StateChangedReq
184187
}
185188

186189
for _, agentID := range agentIDs {
190+
// Check if port changed before updating
191+
if checkPortChanged(tx.Querier, agentID, req.ListenPort) {
192+
portsChanged = true
193+
// TODO: change to debug after PMM-14267 is resolved, but for now we want to have clear logs about port changes
194+
l.Infof("Port changed for agent %s, will trigger immediate VM config update", agentID)
195+
}
196+
187197
err := updateAgentStatus(
188198
ctx,
189199
tx.Querier,
@@ -202,7 +212,18 @@ func (h *Handler) stateChanged(ctx context.Context, req *agentv1.StateChangedReq
202212
return errTX
203213
}
204214

205-
h.vmdb.RequestConfigurationUpdate()
215+
// For port changes, force immediate synchronous config update to prevent
216+
// VictoriaMetrics from scraping stale ports (PMM-14267)
217+
if portsChanged {
218+
l.Warn("Listen port changed, forcing immediate VictoriaMetrics configuration update")
219+
if err := h.vmdb.ForceConfigurationUpdate(ctx); err != nil {
220+
return fmt.Errorf("failed to force configuration update: %w", err)
221+
}
222+
} else {
223+
// Normal async update
224+
h.vmdb.RequestConfigurationUpdate()
225+
}
226+
206227
agent, err := models.FindAgentByID(h.db.Querier, PMMAgentID)
207228
if err != nil {
208229
return err
@@ -215,6 +236,19 @@ func (h *Handler) stateChanged(ctx context.Context, req *agentv1.StateChangedReq
215236
return nil
216237
}
217238

239+
// checkPortChanged checks if the agent's listen port is changing.
240+
func checkPortChanged(q *reform.Querier, agentID string, newPort uint32) bool {
241+
agent, err := models.FindAgentByID(q, agentID)
242+
if err != nil {
243+
// Can't determine, assume no change
244+
return false
245+
}
246+
oldPort := pointer.GetUint16(agent.ListenPort)
247+
newPort16 := uint16(newPort) //nolint:gosec
248+
// Port changed if old port exists and is different from new port
249+
return oldPort != 0 && oldPort != newPort16
250+
}
251+
218252
func updateAgentStatus(
219253
ctx context.Context,
220254
q *reform.Querier,
@@ -227,8 +261,7 @@ func updateAgentStatus(
227261
l := logger.Get(ctx)
228262
l.Debugf("updateAgentStatus: %s %s %d", agentID, status, listenPort)
229263

230-
agent := &models.Agent{AgentID: agentID}
231-
err := q.Reload(agent)
264+
agent, err := models.FindAgentByID(q, agentID)
232265

233266
// agent can be already deleted, but we still can receive status message from pmm-agent.
234267
if errors.Is(err, reform.ErrNoRows) {

0 commit comments

Comments
 (0)