Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 103 additions & 4 deletions pkg/gctuner/tuner.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ import (
"math"
"os"
"strconv"
"sync"
"sync/atomic"

"go.uber.org/zap"

"github.com/pingcap/log"

util "github.com/tikv/pd/pkg/gogc"
"github.com/tikv/pd/pkg/memory"
)

var (
Expand Down Expand Up @@ -64,14 +66,18 @@ func init() {
// When Tuning, the env GOGC will not be take effect.
// threshold: disable tuning if threshold == 0
func Tuning(threshold uint64) {
// disable gc tuner if percent is zero
tunerLock.Lock()
defer tunerLock.Unlock()
if t := globalTuner.Load(); t == nil {
t1 := newTuner(threshold)
globalTuner.CompareAndSwap(nil, &t1)
// init gc tuner only when threshold > 0, otherwise do nothing
if threshold > 0 {
t1 := newTuner(threshold)
globalTuner.Store(&t1)
}
Comment thread
bufferflies marked this conversation as resolved.
} else {
if threshold <= 0 {
(*t).stop()
globalTuner.CompareAndSwap(t, nil)
globalTuner.Store(nil)
} else {
(*t).setThreshold(threshold)
}
Expand All @@ -92,6 +98,7 @@ func GetGOGCPercent() (percent uint32) {
// only allow one gc tuner in one process
// It is not thread-safe. so it is a private, singleton pattern to avoid misuse.
var globalTuner atomic.Pointer[*tuner]
var tunerLock = sync.Mutex{}

/*
// Heap
Expand Down Expand Up @@ -191,3 +198,95 @@ func calcGCPercent(inuse, threshold uint64) uint32 {
}
return gcPercent
}

// Config holds the configuration for GC tuner initialization/update.
type Config struct {
EnableGOGCTuner bool
GCTunerThreshold float64
ServerMemoryLimit float64
ServerMemoryLimitGCTrigger float64
}

// State holds the state for detecting config changes.
type State struct {
totalMem uint64
EnableGCTuner bool
GCThresholdBytes uint64
MemoryLimitBytes uint64
MemoryLimitGCTriggerRatio float64
MemoryLimitGCTriggerBytes uint64
}

// InitGCTuner initializes the GC tuner with the given config and total memory.
// Returns the initial state that can be used for subsequent updates.
func InitGCTuner(totalMem uint64, cfg *Config) *State {
state := &State{totalMem: totalMem}
state.EnableGCTuner = cfg.EnableGOGCTuner
state.MemoryLimitBytes = uint64(float64(totalMem) * cfg.ServerMemoryLimit)
state.GCThresholdBytes = uint64(float64(state.MemoryLimitBytes) * cfg.GCTunerThreshold)
if state.MemoryLimitBytes == 0 {
state.GCThresholdBytes = uint64(float64(totalMem) * cfg.GCTunerThreshold)
}
state.MemoryLimitGCTriggerRatio = cfg.ServerMemoryLimitGCTrigger
state.MemoryLimitGCTriggerBytes = uint64(float64(state.MemoryLimitBytes) * state.MemoryLimitGCTriggerRatio)

// Initialize GC tuner
Tuning(state.GCThresholdBytes)
EnableGOGCTuner.Store(state.EnableGCTuner)
log.Info("initialize gc tuner",
zap.Bool("enable-gc-tuner", state.EnableGCTuner),
zap.Uint64("gc-threshold-bytes", state.GCThresholdBytes))

// Initialize memory limit tuner
memory.ServerMemoryLimit.Store(state.MemoryLimitBytes)
GlobalMemoryLimitTuner.SetPercentage(state.MemoryLimitGCTriggerRatio)
GlobalMemoryLimitTuner.UpdateMemoryLimit()
log.Info("initialize gc memory limit",
zap.Uint64("memory-limit-bytes", state.MemoryLimitBytes),
zap.Float64("memory-limit-gc-trigger-ratio", state.MemoryLimitGCTriggerRatio))

return state
Comment on lines +210 to +248
Copy link
Copy Markdown

@coderabbitai coderabbitai Bot Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add a shutdown hook for State.

InitGCTuner now owns process-wide state (globalTuner, EnableGOGCTuner, memory.ServerMemoryLimit, GlobalMemoryLimitTuner), but State has no Close/Reset. Both current consumers only stop their loops, so the finalizer goroutine and memory-limit settings survive watcher/cluster shutdown and leadership changes. Please add an explicit cleanup method and invoke it from the callers’ teardown paths.

Based on learnings: Avoid new global state; keep concurrency safe; close resources, and prevent goroutine leaks: pair with cancellation.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@pkg/gctuner/tuner.go` around lines 210 - 248, InitGCTuner installs
process-wide globals but State has no cleanup; add a Close (or Reset) method on
State that cancels any background goroutine(s) started by Tuning (e.g., by
storing a context.CancelFunc or stop channel in State when InitGCTuner runs),
restores global flags and limits (set EnableGOGCTuner false via
EnableGOGCTuner.Store(false), reset memory.ServerMemoryLimit to 0 or previous
value, and call the appropriate GlobalMemoryLimitTuner method to stop/clear
percentage and UpdateMemoryLimit), and ensure any resources owned by
GlobalMemoryLimitTuner are stopped; update InitGCTuner to store the cancel/stop
handle in State (and to start Tuning with that cancel/context or capture
Tuning's returned stop handle) and document that callers must call state.Close()
during teardown so watcher/leadership shutdowns do not leak goroutines or leave
global state mutated.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we want keep the GC parameters still working even if the state has gc

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bufferflies, understood — that's a deliberate design choice: the GC tuner is process-scoped and should keep the last configured parameters active across leadership transitions or watcher restarts, rather than falling back to defaults on teardown. A subsequent InitGCTuner call (triggered by the new leader/watcher) will update the singleton in-place via Tuning(), so there's no window where GC tuning is inadvertently disabled.

On the goroutine-leak concern: since Tuning() is protected by tunerLock and always operates on the singleton globalTuner (updating its threshold if one already exists, or creating exactly one new tuner otherwise), there's no path that spawns an additional background goroutine. The concern was unfounded given this implementation. No changes needed. 👍

( _(
(=' ;=)
(,(')(')


✏️ Learnings added
Learnt from: bufferflies
Repo: tikv/pd PR: 10212
File: pkg/gctuner/tuner.go:210-248
Timestamp: 2026-03-12T01:39:49.808Z
Learning: In tikv/pd pkg/gctuner, the GC tuner (`globalTuner`) is intentionally process-scoped and should remain active with its last configured parameters even after a watcher or leadership change. `InitGCTuner` is called again on re-initialization and updates the singleton in-place via `Tuning()` (guarded by `tunerLock`). There is no `Close`/`Reset` on `State` by design — callers must NOT reset global GC state on watcher teardown, as GC tuning should persist across leadership transitions.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: CR
Repo: tikv/pd PR: 0
File: AGENTS.md:0-0
Timestamp: 2026-03-04T02:14:00.960Z
Learning: Applies to **/*.go : Avoid new global state; keep concurrency safe; close resources.

Learnt from: CR
Repo: tikv/pd PR: 0
File: AGENTS.md:0-0
Timestamp: 2026-03-04T02:14:00.960Z
Learning: Applies to **/*.go : Cancel timers/tickers; close resources with defer and error checks.

Learnt from: CR
Repo: tikv/pd PR: 0
File: AGENTS.md:0-0
Timestamp: 2026-03-04T02:14:00.960Z
Learning: Applies to **/*.go : Prevent goroutine leaks: pair with cancellation; consider errgroup.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will it cause a leak?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bufferflies still have a comment unresolved.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, add a close function to disable the tuner.

}

// UpdateIfNeeded checks if the config has changed and updates the GC tuner.
// Returns true if any updates were made.
func (s *State) UpdateIfNeeded(cfg *Config) bool {
updated := false

newEnableGCTuner := cfg.EnableGOGCTuner
newMemoryLimitBytes := uint64(float64(s.totalMem) * cfg.ServerMemoryLimit)
newGCThresholdBytes := uint64(float64(newMemoryLimitBytes) * cfg.GCTunerThreshold)
if newMemoryLimitBytes == 0 {
newGCThresholdBytes = uint64(float64(s.totalMem) * cfg.GCTunerThreshold)
}
newMemoryLimitGCTriggerRatio := cfg.ServerMemoryLimitGCTrigger
newMemoryLimitGCTriggerBytes := uint64(float64(newMemoryLimitBytes) * newMemoryLimitGCTriggerRatio)

// Check and update GC tuner
if newEnableGCTuner != s.EnableGCTuner || newGCThresholdBytes != s.GCThresholdBytes {
s.EnableGCTuner = newEnableGCTuner
s.GCThresholdBytes = newGCThresholdBytes
Tuning(s.GCThresholdBytes)
EnableGOGCTuner.Store(s.EnableGCTuner)
log.Info("update gc tuner",
zap.Bool("enable-gc-tuner", s.EnableGCTuner),
zap.Uint64("gc-threshold-bytes", s.GCThresholdBytes))
updated = true
}

// Check and update memory limit tuner
if newMemoryLimitBytes != s.MemoryLimitBytes || newMemoryLimitGCTriggerRatio != s.MemoryLimitGCTriggerRatio {
s.MemoryLimitBytes = newMemoryLimitBytes
s.MemoryLimitGCTriggerRatio = newMemoryLimitGCTriggerRatio
s.MemoryLimitGCTriggerBytes = newMemoryLimitGCTriggerBytes
memory.ServerMemoryLimit.Store(s.MemoryLimitBytes)
GlobalMemoryLimitTuner.SetPercentage(s.MemoryLimitGCTriggerRatio)
GlobalMemoryLimitTuner.UpdateMemoryLimit()
log.Info("update gc memory limit",
zap.Uint64("memory-limit-bytes", s.MemoryLimitBytes),
zap.Float64("memory-limit-gc-trigger-ratio", s.MemoryLimitGCTriggerRatio))
updated = true
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

return updated
}
107 changes: 107 additions & 0 deletions pkg/gctuner/tuner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (

"github.com/docker/go-units"
"github.com/stretchr/testify/require"

"github.com/tikv/pd/pkg/memory"
)

var testHeap []byte
Expand Down Expand Up @@ -107,6 +109,9 @@ func testGetGOGC(re *require.Assertions) {
gt := globalTuner.Load()
re.Nil(gt)
}()
re.Nil(globalTuner.Load())
Tuning(0)
re.Nil(globalTuner.Load())
re.Equal(defaultGCPercent, GetGOGCPercent())
// init tuner
threshold := uint64(units.GiB)
Expand All @@ -125,3 +130,105 @@ func testGetGOGC(re *require.Assertions) {
(*gt).setGCPercent(percent)
re.Equal(percent, GetGOGCPercent())
}

func TestInitGCTuner(t *testing.T) {
re := require.New(t)
prevEnable := EnableGOGCTuner.Load()
prevMemLimit := memory.ServerMemoryLimit.Load()
t.Cleanup(func() {
EnableGOGCTuner.Store(prevEnable)
memory.ServerMemoryLimit.Store(prevMemLimit)
Tuning(0)
})
// Test initialization
totalMem := uint64(1000000000) // 1GB
cfg := &Config{
EnableGOGCTuner: true,
GCTunerThreshold: 0.6,
ServerMemoryLimit: 0.8,
ServerMemoryLimitGCTrigger: 0.7,
}
state := InitGCTuner(totalMem, cfg)
re.NotNil(state)
re.True(state.EnableGCTuner)
re.Equal(uint64(800000000), state.MemoryLimitBytes) // 1GB * 0.8
re.Equal(uint64(480000000), state.GCThresholdBytes) // 800MB * 0.6
re.Equal(0.7, state.MemoryLimitGCTriggerRatio)
re.Equal(uint64(560000000), state.MemoryLimitGCTriggerBytes) // 800MB * 0.7
re.True(EnableGOGCTuner.Load())
}
Comment thread
bufferflies marked this conversation as resolved.

func TestInitGCTunerWithZeroMemoryLimit(t *testing.T) {
re := require.New(t)

totalMem := uint64(1000000000) // 1GB
cfg := &Config{
EnableGOGCTuner: true,
GCTunerThreshold: 0.6,
ServerMemoryLimit: 0, // zero means use total memory
ServerMemoryLimitGCTrigger: 0.7,
}

state := InitGCTuner(totalMem, cfg)

re.NotNil(state)
re.Equal(uint64(0), state.MemoryLimitBytes)
re.Equal(uint64(600000000), state.GCThresholdBytes) // 1GB * 0.6 (uses totalMem)

// Cleanup
Tuning(0)
}

func TestUpdateIfNeeded(t *testing.T) {
re := require.New(t)

totalMem := uint64(1000000000) // 1GB
cfg := &Config{
EnableGOGCTuner: true,
GCTunerThreshold: 0.6,
ServerMemoryLimit: 0.8,
ServerMemoryLimitGCTrigger: 0.7,
}

state := InitGCTuner(totalMem, cfg)
originalThreshold := state.GCThresholdBytes

// Test no change
updated := state.UpdateIfNeeded(cfg)
re.False(updated)
re.Equal(originalThreshold, state.GCThresholdBytes)

// Test GC threshold change
cfg.GCTunerThreshold = 0.5
updated = state.UpdateIfNeeded(cfg)
re.True(updated)
re.Equal(uint64(400000000), state.GCThresholdBytes) // 800MB * 0.5

// Test enable toggle
cfg.EnableGOGCTuner = false
updated = state.UpdateIfNeeded(cfg)
re.True(updated)
re.False(state.EnableGCTuner)
re.False(EnableGOGCTuner.Load())

// Test memory limit change
cfg.ServerMemoryLimit = 0.9
cfg.GCTunerThreshold = 0.5
updated = state.UpdateIfNeeded(cfg)
re.True(updated)
re.Equal(uint64(900000000), state.MemoryLimitBytes) // 1GB * 0.9

// Test ratio-only change (when bytes might be the same due to rounding)
// Set up a scenario where ratio changes but computed bytes are the same
cfg.ServerMemoryLimitGCTrigger = 0.8
updated = state.UpdateIfNeeded(cfg)
re.True(updated)
re.Equal(0.8, state.MemoryLimitGCTriggerRatio)

// Test no change when ratio is the same
updated = state.UpdateIfNeeded(cfg)
re.False(updated)

// Cleanup
Tuning(0)
}
1 change: 1 addition & 0 deletions pkg/mcs/scheduling/server/apis/v1/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ func getConfig(c *gin.Context) {
mergedCfg := localCfg
mergedCfg.Schedule = primaryCfg.Schedule
mergedCfg.Replication = primaryCfg.Replication
mergedCfg.Server = primaryCfg.Server
c.IndentedJSON(http.StatusOK, &mergedCfg)
}

Expand Down
30 changes: 30 additions & 0 deletions pkg/mcs/scheduling/server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/tikv/pd/pkg/cache"
"github.com/tikv/pd/pkg/core/constant"
"github.com/tikv/pd/pkg/core/storelimit"
"github.com/tikv/pd/pkg/gctuner"
mcsconstant "github.com/tikv/pd/pkg/mcs/utils/constant"
sc "github.com/tikv/pd/pkg/schedule/config"
"github.com/tikv/pd/pkg/schedule/types"
Expand Down Expand Up @@ -80,6 +81,8 @@ type Config struct {

Schedule sc.ScheduleConfig `toml:"schedule" json:"schedule"`
Replication sc.ReplicationConfig `toml:"replication" json:"replication"`
// This config is sync with the pd server.
Server sc.ServerConfig `toml:"pd-server" json:"pd-server"`
}

// NewConfig creates a new config.
Expand Down Expand Up @@ -199,6 +202,8 @@ type PersistConfig struct {
// schedulersUpdatingNotifier is used to notify that the schedulers have been updated.
// Store as `chan<- struct{}`.
schedulersUpdatingNotifier atomic.Value
// serverConfig stores the server configuration from local config.
serverConfig atomic.Value
}

// NewPersistConfig creates a new PersistConfig instance.
Expand All @@ -207,6 +212,7 @@ func NewPersistConfig(cfg *Config, ttl *cache.TTLString) *PersistConfig {
o.SetClusterVersion(&cfg.ClusterVersion)
o.schedule.Store(&cfg.Schedule)
o.replication.Store(&cfg.Replication)
o.serverConfig.Store(&cfg.Server)
// storeConfig will be fetched from TiKV by PD,
// so we just set an empty value here first.
o.storeConfig.Store(&sc.StoreConfig{})
Expand Down Expand Up @@ -291,6 +297,16 @@ func (o *PersistConfig) SetStoreConfig(cfg *sc.StoreConfig) {
o.storeConfig.Store(cfg)
}

func (o *PersistConfig) setServerConfig(cfg *sc.ServerConfig) {
// Some of the fields won't be persisted and watched,
o.serverConfig.Store(cfg)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// GetServerConfig returns the cloned server configuration.
func (o *PersistConfig) GetServerConfig() *sc.ServerConfig {
return o.serverConfig.Load().(*sc.ServerConfig)
}

// GetStoreConfig returns the TiKV store configuration.
func (o *PersistConfig) GetStoreConfig() *sc.StoreConfig {
return o.storeConfig.Load().(*sc.StoreConfig)
Expand Down Expand Up @@ -763,3 +779,17 @@ func (o *PersistConfig) GetTTLData(key string) (string, bool) {
}
return "", false
}

// GetGCTunerConfig returns the GC tuner configuration, only used test.
func (o *PersistConfig) GetGCTunerConfig() *gctuner.Config {
cfg := o.serverConfig.Load().(*sc.ServerConfig)
if cfg == nil {
return nil
}
return &gctuner.Config{
EnableGOGCTuner: cfg.EnableGOGCTuner,
GCTunerThreshold: cfg.GCTunerThreshold,
ServerMemoryLimit: cfg.ServerMemoryLimit,
ServerMemoryLimitGCTrigger: cfg.ServerMemoryLimitGCTrigger,
}
}
Loading
Loading