From ba6af92e2a8e8dc8c859dfdd4f076c70f5dc7bbb Mon Sep 17 00:00:00 2001 From: Jacob Salway Date: Sun, 2 Mar 2025 00:01:34 +1100 Subject: [PATCH] Allow metrics cache expiry to be configured --- README.md | 3 ++ cmd/root.go | 5 ++- internal/client/client.go | 40 ++++++++++++++++--- internal/client/metrics.go | 38 +++++------------- internal/client/metrics_test.go | 17 +++++--- internal/client/options.go | 12 ++++++ internal/client/types.go | 3 ++ internal/config/config_test.go | 2 + internal/config/json/schemas/k9s.json | 1 + internal/config/k9s.go | 26 +++++++----- internal/config/k9s_test.go | 3 ++ internal/config/mock/test_helpers.go | 3 ++ internal/config/testdata/configs/default.yaml | 1 + .../config/testdata/configs/expected.yaml | 1 + internal/config/testdata/configs/k9s.yaml | 1 + internal/config/types.go | 5 ++- internal/dao/container.go | 2 +- internal/dao/container_test.go | 1 + internal/dao/node.go | 4 +- internal/dao/pod.go | 4 +- internal/model/cluster.go | 2 +- internal/model/pulse_health.go | 2 +- 22 files changed, 117 insertions(+), 59 deletions(-) create mode 100644 internal/client/options.go diff --git a/README.md b/README.md index c7c02a21d6..fbdde03845 100644 --- a/README.md +++ b/README.md @@ -396,6 +396,8 @@ You can now override the context portForward default address configuration by se screenDumpDir: /tmp/dumps # Represents ui poll intervals. Default 2secs refreshRate: 2 + # How often metrics data will be cached for e.g. CPU and memory usage. Default 60s + metricsCacheExpiry: 60 # Number of retries once the connection to the api-server is lost. Default 15. maxConnRetry: 5 # Indicates whether modification commands like delete/kill/edit are disabled. Default is false @@ -966,6 +968,7 @@ k9s: liveViewAutoRefresh: false screenDumpDir: /tmp/dumps refreshRate: 2 + metricsCacheExpiry: 60 maxConnRetry: 5 readOnly: false noExitOnCtrlC: false diff --git a/cmd/root.go b/cmd/root.go index 610c7c7efb..2075ba12ff 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -138,7 +138,8 @@ func loadConfiguration() (*config.Config, error) { errs = errors.Join(errs, err) } - conn, err := client.InitConnection(k8sCfg) + options := client.NewOptions(k9sCfg.K9s.MetricsCacheExpiry) + conn, err := client.InitConnection(k8sCfg, options) if err != nil { errs = errors.Join(errs, err) @@ -376,7 +377,7 @@ func initK8sFlagCompletion() { _ = rootCmd.RegisterFlagCompletionFunc("namespace", func(cmd *cobra.Command, args []string, s string) ([]string, cobra.ShellCompDirective) { conn := client.NewConfig(k8sFlags) - if c, err := client.InitConnection(conn); err == nil { + if c, err := client.InitConnection(conn, client.Options{}); err == nil { if nss, err := c.ValidNamespaceNames(); err == nil { return filterFlagCompletions(nss, s) } diff --git a/internal/client/client.go b/internal/client/client.go index 60ee189243..a5e6e139a4 100644 --- a/internal/client/client.go +++ b/internal/client/client.go @@ -46,10 +46,13 @@ type APIClient struct { nsClient dynamic.NamespaceableResourceInterface mxsClient *versioned.Clientset cachedClient *disk.CachedDiscoveryClient + metricsServer *MetricsServer config *Config mx sync.RWMutex cache *cache.LRUExpireCache connOK bool + + options Options } // NewTestAPIClient for testing ONLY!! @@ -62,11 +65,12 @@ func NewTestAPIClient() *APIClient { // InitConnection initialize connection from command line args. // Checks for connectivity with the api server. -func InitConnection(config *Config) (*APIClient, error) { +func InitConnection(config *Config, options Options) (*APIClient, error) { a := APIClient{ - config: config, - cache: cache.NewLRUExpireCache(cacheSize), - connOK: true, + config: config, + cache: cache.NewLRUExpireCache(cacheSize), + connOK: true, + options: options, } err := a.supportsMetricsResources() if err != nil { @@ -394,6 +398,20 @@ func (a *APIClient) getLogClient() kubernetes.Interface { return a.logClient } +func (a *APIClient) getMetricsServer() *MetricsServer { + a.mx.RLock() + defer a.mx.RUnlock() + + return a.metricsServer +} + +func (a *APIClient) setMetricsServer(ms *MetricsServer) { + a.mx.Lock() + defer a.mx.Unlock() + + a.metricsServer = ms +} + func (a *APIClient) setClient(k kubernetes.Interface) { a.mx.Lock() defer a.mx.Unlock() @@ -453,6 +471,18 @@ func (a *APIClient) Dial() (kubernetes.Interface, error) { return a.getClient(), nil } +func (a *APIClient) DialMetrics() *MetricsServer { + if ms := a.getMetricsServer(); ms != nil { + return ms + } + + cacheExpiry := time.Duration(a.options.MetricsCacheExpiry) * time.Second + ms := NewMetricsServer(a, cacheExpiry) + a.setMetricsServer(ms) + + return a.getMetricsServer() +} + // RestConfig returns a rest api client. func (a *APIClient) RestConfig() (*restclient.Config, error) { return a.config.RESTConfig() @@ -552,7 +582,6 @@ func (a *APIClient) SwitchContext(name string) error { return err } a.reset() - ResetMetrics() // Need reload to pick up any kubeconfig changes. a.config = NewConfig(a.config.flags) @@ -570,6 +599,7 @@ func (a *APIClient) reset() { a.setCachedClient(nil) a.setClient(nil) a.setLogClient(nil) + a.setMetricsServer(nil) a.setConnOK(true) } diff --git a/internal/client/metrics.go b/internal/client/metrics.go index f79aeac4d8..87fafc0fb6 100644 --- a/internal/client/metrics.go +++ b/internal/client/metrics.go @@ -18,41 +18,25 @@ import ( ) const ( - mxCacheSize = 100 - mxCacheExpiry = 1 * time.Minute - podMXGVR = "metrics.k8s.io/v1beta1/pods" - nodeMXGVR = "metrics.k8s.io/v1beta1/nodes" + mxCacheSize = 100 + podMXGVR = "metrics.k8s.io/v1beta1/pods" + nodeMXGVR = "metrics.k8s.io/v1beta1/nodes" ) -// MetricsDial tracks global metric server handle. -var MetricsDial *MetricsServer - -// DialMetrics dials the metrics server. -func DialMetrics(c Connection) *MetricsServer { - if MetricsDial == nil { - MetricsDial = NewMetricsServer(c) - } - - return MetricsDial -} - -// ResetMetrics resets the metric server handle. -func ResetMetrics() { - MetricsDial = nil -} - // MetricsServer serves cluster metrics for nodes and pods. type MetricsServer struct { Connection - cache *cache.LRUExpireCache + cache *cache.LRUExpireCache + cacheExpiry time.Duration } // NewMetricsServer return a metric server instance. -func NewMetricsServer(c Connection) *MetricsServer { +func NewMetricsServer(c Connection, cacheExpiry time.Duration) *MetricsServer { return &MetricsServer{ - Connection: c, - cache: cache.NewLRUExpireCache(mxCacheSize), + Connection: c, + cache: cache.NewLRUExpireCache(mxCacheSize), + cacheExpiry: cacheExpiry, } } @@ -172,7 +156,7 @@ func (m *MetricsServer) FetchNodesMetrics(ctx context.Context) (*mv1beta1.NodeMe if err != nil { return mx, err } - m.cache.Add(key, mxList, mxCacheExpiry) + m.cache.Add(key, mxList, m.cacheExpiry) return mxList, nil } @@ -243,7 +227,7 @@ func (m *MetricsServer) FetchPodsMetrics(ctx context.Context, ns string) (*mv1be if err != nil { return mx, err } - m.cache.Add(key, mxList, mxCacheExpiry) + m.cache.Add(key, mxList, m.cacheExpiry) return mxList, err } diff --git a/internal/client/metrics_test.go b/internal/client/metrics_test.go index cb1a357295..767955b070 100644 --- a/internal/client/metrics_test.go +++ b/internal/client/metrics_test.go @@ -5,6 +5,7 @@ package client_test import ( "testing" + "time" "github.com/derailed/k9s/internal/client" "github.com/stretchr/testify/assert" @@ -72,7 +73,7 @@ func TestPodsMetrics(t *testing.T) { }, } - m := client.NewMetricsServer(nil) + m := makeMetricsServer() for k := range uu { u := uu[k] t.Run(k, func(t *testing.T) { @@ -91,7 +92,7 @@ func TestPodsMetrics(t *testing.T) { } func BenchmarkPodsMetrics(b *testing.B) { - m := client.NewMetricsServer(nil) + m := makeMetricsServer() metrics := v1beta1.PodMetricsList{ Items: []v1beta1.PodMetrics{ @@ -168,7 +169,7 @@ func TestNodesMetrics(t *testing.T) { }, } - m := client.NewMetricsServer(nil) + m := makeMetricsServer() for k := range uu { u := uu[k] t.Run(k, func(t *testing.T) { @@ -201,7 +202,7 @@ func BenchmarkNodesMetrics(b *testing.B) { }, } - m := client.NewMetricsServer(nil) + m := makeMetricsServer() mmx := make(client.NodesMetrics) b.ResetTimer() @@ -261,7 +262,7 @@ func TestClusterLoad(t *testing.T) { }, } - m := client.NewMetricsServer(nil) + m := makeMetricsServer() for k := range uu { u := uu[k] t.Run(k, func(t *testing.T) { @@ -287,7 +288,7 @@ func BenchmarkClusterLoad(b *testing.B) { }, } - m := client.NewMetricsServer(nil) + m := makeMetricsServer() var mx client.ClusterMetrics b.ResetTimer() b.ReportAllocs() @@ -299,6 +300,10 @@ func BenchmarkClusterLoad(b *testing.B) { // ---------------------------------------------------------------------------- // Helpers... +func makeMetricsServer() *client.MetricsServer { + return client.NewMetricsServer(nil, 1*time.Minute) +} + func makeMxPod(name, cpu, mem string) *v1beta1.PodMetrics { return &v1beta1.PodMetrics{ ObjectMeta: metav1.ObjectMeta{ diff --git a/internal/client/options.go b/internal/client/options.go new file mode 100644 index 0000000000..9f1c131323 --- /dev/null +++ b/internal/client/options.go @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of K9s + +package client + +type Options struct { + MetricsCacheExpiry int +} + +func NewOptions(metricsCacheExpiry int) Options { + return Options{MetricsCacheExpiry: metricsCacheExpiry} +} diff --git a/internal/client/types.go b/internal/client/types.go index b5a7d3cf76..78b5c24781 100644 --- a/internal/client/types.go +++ b/internal/client/types.go @@ -102,6 +102,9 @@ type Connection interface { // DialLogs connects to api server for logs. DialLogs() (kubernetes.Interface, error) + // DialMetrics connects to metrics server. + DialMetrics() *MetricsServer + // SwitchContext switches cluster based on context. SwitchContext(ctx string) error diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 3910eb5c74..1463becf36 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -537,6 +537,7 @@ func TestConfigLoad(t *testing.T) { assert.Nil(t, cfg.Load("testdata/configs/k9s.yaml", true)) assert.Equal(t, 2, cfg.K9s.RefreshRate) + assert.Equal(t, 60, cfg.K9s.MetricsCacheExpiry) assert.Equal(t, int64(200), cfg.K9s.Logger.TailCount) assert.Equal(t, 2000, cfg.K9s.Logger.BufferSize) } @@ -553,6 +554,7 @@ func TestConfigSaveFile(t *testing.T) { assert.Nil(t, cfg.Load("testdata/configs/k9s.yaml", true)) cfg.K9s.RefreshRate = 100 + cfg.K9s.MetricsCacheExpiry = 100 cfg.K9s.ReadOnly = true cfg.K9s.Logger.TailCount = 500 cfg.K9s.Logger.BufferSize = 800 diff --git a/internal/config/json/schemas/k9s.json b/internal/config/json/schemas/k9s.json index e6fc4d83cf..23d2aa0da3 100644 --- a/internal/config/json/schemas/k9s.json +++ b/internal/config/json/schemas/k9s.json @@ -10,6 +10,7 @@ "liveViewAutoRefresh": { "type": "boolean" }, "screenDumpDir": {"type": "string"}, "refreshRate": { "type": "integer" }, + "metricsCacheExpiry": { "type": "integer" }, "maxConnRetry": { "type": "integer" }, "readOnly": { "type": "boolean" }, "noExitOnCtrlC": { "type": "boolean" }, diff --git a/internal/config/k9s.go b/internal/config/k9s.go index 5cc2ec411b..1dd19d23a9 100644 --- a/internal/config/k9s.go +++ b/internal/config/k9s.go @@ -23,6 +23,7 @@ type K9s struct { LiveViewAutoRefresh bool `json:"liveViewAutoRefresh" yaml:"liveViewAutoRefresh"` ScreenDumpDir string `json:"screenDumpDir" yaml:"screenDumpDir,omitempty"` RefreshRate int `json:"refreshRate" yaml:"refreshRate"` + MetricsCacheExpiry int `json:"metricsCacheExpiry" yaml:"metricsCacheExpiry"` MaxConnRetry int `json:"maxConnRetry" yaml:"maxConnRetry"` ReadOnly bool `json:"readOnly" yaml:"readOnly"` NoExitOnCtrlC bool `json:"noExitOnCtrlC" yaml:"noExitOnCtrlC"` @@ -51,16 +52,17 @@ type K9s struct { // NewK9s create a new K9s configuration. func NewK9s(conn client.Connection, ks data.KubeSettings) *K9s { return &K9s{ - RefreshRate: defaultRefreshRate, - MaxConnRetry: defaultMaxConnRetry, - ScreenDumpDir: AppDumpsDir, - Logger: NewLogger(), - Thresholds: NewThreshold(), - ShellPod: NewShellPod(), - ImageScans: NewImageScans(), - dir: data.NewDir(AppContextsDir), - conn: conn, - ks: ks, + RefreshRate: defaultRefreshRate, + MetricsCacheExpiry: defaultMetricsCacheExpiry, + MaxConnRetry: defaultMaxConnRetry, + ScreenDumpDir: AppDumpsDir, + Logger: NewLogger(), + Thresholds: NewThreshold(), + ShellPod: NewShellPod(), + ImageScans: NewImageScans(), + dir: data.NewDir(AppContextsDir), + conn: conn, + ks: ks, } } @@ -98,6 +100,7 @@ func (k *K9s) Merge(k1 *K9s) { k.LiveViewAutoRefresh = k1.LiveViewAutoRefresh k.ScreenDumpDir = k1.ScreenDumpDir k.RefreshRate = k1.RefreshRate + k.MetricsCacheExpiry = k1.MetricsCacheExpiry k.MaxConnRetry = k1.MaxConnRetry k.ReadOnly = k1.ReadOnly k.NoExitOnCtrlC = k1.NoExitOnCtrlC @@ -344,6 +347,9 @@ func (k *K9s) Validate(c client.Connection, ks data.KubeSettings) { if k.RefreshRate <= 0 { k.RefreshRate = defaultRefreshRate } + if k.MetricsCacheExpiry <= 0 { + k.MetricsCacheExpiry = defaultMetricsCacheExpiry + } if k.MaxConnRetry <= 0 { k.MaxConnRetry = defaultMaxConnRetry } diff --git a/internal/config/k9s_test.go b/internal/config/k9s_test.go index d74c59c39d..359e38aa1c 100644 --- a/internal/config/k9s_test.go +++ b/internal/config/k9s_test.go @@ -88,6 +88,7 @@ func TestK9sMerge(t *testing.T) { LiveViewAutoRefresh: false, ScreenDumpDir: "", RefreshRate: 0, + MetricsCacheExpiry: 0, MaxConnRetry: 0, ReadOnly: false, NoExitOnCtrlC: false, @@ -102,12 +103,14 @@ func TestK9sMerge(t *testing.T) { k2: &config.K9s{ LiveViewAutoRefresh: true, MaxConnRetry: 100, + MetricsCacheExpiry: 100, ShellPod: config.NewShellPod(), }, ek: &config.K9s{ LiveViewAutoRefresh: true, ScreenDumpDir: "", RefreshRate: 0, + MetricsCacheExpiry: 100, MaxConnRetry: 100, ReadOnly: false, NoExitOnCtrlC: false, diff --git a/internal/config/mock/test_helpers.go b/internal/config/mock/test_helpers.go index c8f954fb77..3347f6083f 100644 --- a/internal/config/mock/test_helpers.go +++ b/internal/config/mock/test_helpers.go @@ -136,6 +136,9 @@ func (m mockConnection) Dial() (kubernetes.Interface, error) { func (m mockConnection) DialLogs() (kubernetes.Interface, error) { return nil, nil } +func (m mockConnection) DialMetrics() *client.MetricsServer { + return nil +} func (m mockConnection) SwitchContext(ctx string) error { return nil } diff --git a/internal/config/testdata/configs/default.yaml b/internal/config/testdata/configs/default.yaml index 937972f09c..0cece8fdcd 100644 --- a/internal/config/testdata/configs/default.yaml +++ b/internal/config/testdata/configs/default.yaml @@ -2,6 +2,7 @@ k9s: liveViewAutoRefresh: false screenDumpDir: /tmp/k9s-test/screen-dumps refreshRate: 2 + metricsCacheExpiry: 60 maxConnRetry: 5 readOnly: false noExitOnCtrlC: false diff --git a/internal/config/testdata/configs/expected.yaml b/internal/config/testdata/configs/expected.yaml index e7c2091127..0a5a6eee2d 100644 --- a/internal/config/testdata/configs/expected.yaml +++ b/internal/config/testdata/configs/expected.yaml @@ -2,6 +2,7 @@ k9s: liveViewAutoRefresh: true screenDumpDir: /tmp/k9s-test/screen-dumps refreshRate: 100 + metricsCacheExpiry: 100 maxConnRetry: 5 readOnly: true noExitOnCtrlC: false diff --git a/internal/config/testdata/configs/k9s.yaml b/internal/config/testdata/configs/k9s.yaml index c33ed9f613..173e303f7e 100644 --- a/internal/config/testdata/configs/k9s.yaml +++ b/internal/config/testdata/configs/k9s.yaml @@ -2,6 +2,7 @@ k9s: liveViewAutoRefresh: true screenDumpDir: /tmp/k9s-test/screen-dumps refreshRate: 2 + metricsCacheExpiry: 60 maxConnRetry: 5 readOnly: false noExitOnCtrlC: false diff --git a/internal/config/types.go b/internal/config/types.go index 6938e55585..877130153d 100644 --- a/internal/config/types.go +++ b/internal/config/types.go @@ -4,8 +4,9 @@ package config const ( - defaultRefreshRate = 2 - defaultMaxConnRetry = 5 + defaultRefreshRate = 2 + defaultMetricsCacheExpiry = 60 + defaultMaxConnRetry = 5 ) // UI tracks ui specific configs. diff --git a/internal/dao/container.go b/internal/dao/container.go index 14b2034409..067fcad102 100644 --- a/internal/dao/container.go +++ b/internal/dao/container.go @@ -46,7 +46,7 @@ func (c *Container) List(ctx context.Context, _ string) ([]runtime.Object, error err error ) if withMx, ok := ctx.Value(internal.KeyWithMetrics).(bool); ok && withMx { - cmx, _ = client.DialMetrics(c.Client()).FetchContainersMetrics(ctx, fqn) + cmx, _ = c.Client().DialMetrics().FetchContainersMetrics(ctx, fqn) } po, err := c.fetchPod(fqn) diff --git a/internal/dao/container_test.go b/internal/dao/container_test.go index f53c82959d..03417ae116 100644 --- a/internal/dao/container_test.go +++ b/internal/dao/container_test.go @@ -48,6 +48,7 @@ func makeConn() *conn { func (c *conn) Config() *client.Config { return nil } func (c *conn) Dial() (kubernetes.Interface, error) { return nil, nil } func (c *conn) DialLogs() (kubernetes.Interface, error) { return nil, nil } +func (c *conn) DialMetrics() *client.MetricsServer { return nil } func (c *conn) ConnectionOK() bool { return true } func (c *conn) SwitchContext(ctx string) error { return nil } func (c *conn) CachedDiscovery() (*disk.CachedDiscoveryClient, error) { return nil, nil } diff --git a/internal/dao/node.go b/internal/dao/node.go index d348255b5b..36b30d1ae0 100644 --- a/internal/dao/node.go +++ b/internal/dao/node.go @@ -141,7 +141,7 @@ func (n *Node) Get(ctx context.Context, path string) (runtime.Object, error) { var nmx *mv1beta1.NodeMetrics if withMx, ok := ctx.Value(internal.KeyWithMetrics).(bool); ok && withMx { - nmx, _ = client.DialMetrics(n.Client()).FetchNodeMetrics(ctx, path) + nmx, _ = n.Client().DialMetrics().FetchNodeMetrics(ctx, path) } return &render.NodeWithMetrics{Raw: raw, MX: nmx}, nil @@ -156,7 +156,7 @@ func (n *Node) List(ctx context.Context, ns string) ([]runtime.Object, error) { var nmx client.NodesMetricsMap if withMx, ok := ctx.Value(internal.KeyWithMetrics).(bool); withMx || !ok { - nmx, _ = client.DialMetrics(n.Client()).FetchNodesMetricsMap(ctx) + nmx, _ = n.Client().DialMetrics().FetchNodesMetricsMap(ctx) } shouldCountPods, _ := ctx.Value(internal.KeyPodCounting).(bool) diff --git a/internal/dao/pod.go b/internal/dao/pod.go index f4ee453dbe..149a397a48 100644 --- a/internal/dao/pod.go +++ b/internal/dao/pod.go @@ -61,7 +61,7 @@ func (p *Pod) Get(ctx context.Context, path string) (runtime.Object, error) { var pmx *mv1beta1.PodMetrics if withMx, ok := ctx.Value(internal.KeyWithMetrics).(bool); ok && withMx { - pmx, _ = client.DialMetrics(p.Client()).FetchPodMetrics(ctx, path) + pmx, _ = p.Client().DialMetrics().FetchPodMetrics(ctx, path) } return &render.PodWithMetrics{Raw: u, MX: pmx}, nil @@ -86,7 +86,7 @@ func (p *Pod) List(ctx context.Context, ns string) ([]runtime.Object, error) { var pmx client.PodsMetricsMap if withMx, ok := ctx.Value(internal.KeyWithMetrics).(bool); ok && withMx { - pmx, _ = client.DialMetrics(p.Client()).FetchPodsMetricsMap(ctx, ns) + pmx, _ = p.Client().DialMetrics().FetchPodsMetricsMap(ctx, ns) } sel, _ := ctx.Value(internal.KeyFields).(string) fsel, err := labels.ConvertSelectorToLabelsMap(sel) diff --git a/internal/model/cluster.go b/internal/model/cluster.go index e598217e92..65d99c6751 100644 --- a/internal/model/cluster.go +++ b/internal/model/cluster.go @@ -50,7 +50,7 @@ type ( func NewCluster(f dao.Factory) *Cluster { return &Cluster{ factory: f, - mx: client.DialMetrics(f.Client()), + mx: f.Client().DialMetrics(), cache: cache.NewLRUExpireCache(clusterCacheSize), } } diff --git a/internal/model/pulse_health.go b/internal/model/pulse_health.go index d2be69c9bb..cf198b09af 100644 --- a/internal/model/pulse_health.go +++ b/internal/model/pulse_health.go @@ -63,7 +63,7 @@ func (h *PulseHealth) List(ctx context.Context, ns string) ([]runtime.Object, er } func (h *PulseHealth) checkMetrics(ctx context.Context) (health.Checks, error) { - dial := client.DialMetrics(h.factory.Client()) + dial := h.factory.Client().DialMetrics() nn, err := dao.FetchNodes(ctx, h.factory, "") if err != nil {