Skip to content

Commit 539dafd

Browse files
authored
Merge branch 'refactor/unified-federated-type' into fix-e2e-tests
2 parents 6d0a0a6 + 1717536 commit 539dafd

File tree

23 files changed

+432
-55
lines changed

23 files changed

+432
-55
lines changed

go.mod

-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ require (
1212
github.com/onsi/gomega v1.27.8
1313
github.com/pkg/errors v0.9.1
1414
github.com/prometheus/client_golang v1.14.0
15-
github.com/sirupsen/logrus v1.9.3
1615
github.com/spf13/pflag v1.0.5
1716
github.com/stretchr/testify v1.8.4
18-
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2
1917
golang.org/x/sync v0.2.0
2018
golang.org/x/time v0.3.0
2119
k8s.io/api v0.26.6

go.sum

-6
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
270270
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
271271
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
272272
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
273-
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
274-
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
275273
github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g=
276274
github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA=
277275
github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
@@ -285,7 +283,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
285283
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
286284
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
287285
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
288-
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
289286
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
290287
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
291288
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -316,8 +313,6 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
316313
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
317314
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
318315
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
319-
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI=
320-
golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
321316
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
322317
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
323318
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -433,7 +428,6 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
433428
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
434429
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
435430
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
436-
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
437431
golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
438432
golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
439433
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

pkg/apis/core/v1alpha1/types_schedulingprofile.go

+12
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ const (
108108
WebhookPlugin PluginType = "Webhook"
109109
)
110110

111+
const (
112+
// DefaultSchedulerName defines the name of default scheduler.
113+
DefaultSchedulerName = "default-scheduler"
114+
)
115+
111116
// PluginConfig specifies arguments that should be passed to a plugin at the time of initialization.
112117
// A plugin that is invoked at multiple extension points is initialized once. Args can have arbitrary structure.
113118
// It is up to the plugin to process these Args.
@@ -118,3 +123,10 @@ type PluginConfig struct {
118123
// +optional
119124
Args apiextensionsv1.JSON `json:"args"`
120125
}
126+
127+
func (s *SchedulingProfile) ProfileName() string {
128+
if s == nil {
129+
return DefaultSchedulerName
130+
}
131+
return s.Name
132+
}

pkg/controllers/federatedcluster/clusterjoin.go

+18
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939

4040
fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1"
4141
"github.com/kubewharf/kubeadmiral/pkg/controllers/common"
42+
"github.com/kubewharf/kubeadmiral/pkg/stats"
4243
"github.com/kubewharf/kubeadmiral/pkg/util/logging"
4344
)
4445

@@ -72,6 +73,11 @@ const (
7273
EventReasonClusterUnjoinable = "ClusterUnjoinable"
7374
)
7475

76+
const (
77+
joinSuccess = "success"
78+
joinFailure = "failed"
79+
)
80+
7581
// Processes a cluster that has not joined.
7682
// If either condition or joinPerformed returned is non-nil, the caller should merge them into
7783
// the cluster status and update the cluster.
@@ -93,6 +99,10 @@ func (c *FederatedClusterController) handleNotJoinedCluster(
9399
time.Since(joinedCondition.LastTransitionTime.Time) > c.clusterJoinTimeout {
94100
// Join timed out
95101
logger.Error(nil, "Cluster join timed out")
102+
c.metrics.Duration("cluster_joined_duration", cluster.CreationTimestamp.Time,
103+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
104+
stats.Tag{Name: "result", Value: joinFailure},
105+
stats.Tag{Name: "reason", Value: EventReasonJoinClusterTimeoutExceeded})
96106
c.eventRecorder.Eventf(
97107
cluster,
98108
corev1.EventTypeWarning,
@@ -152,6 +162,10 @@ func (c *FederatedClusterController) handleNotJoinedCluster(
152162
// Namespace exists and is not created by us - the cluster is managed by another control plane.
153163
msg := "Cluster is unjoinable (check if cluster is already joined to another federation)"
154164
logger.Error(nil, msg, "UID", memberFedNamespace.Annotations[FederatedClusterUID], "clusterUID", string(cluster.UID))
165+
c.metrics.Duration("cluster_joined_duration", cluster.CreationTimestamp.Time,
166+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
167+
stats.Tag{Name: "result", Value: joinFailure},
168+
stats.Tag{Name: "reason", Value: EventReasonClusterUnjoinable})
155169
c.eventRecorder.Eventf(
156170
cluster,
157171
corev1.EventTypeWarning,
@@ -223,6 +237,10 @@ func (c *FederatedClusterController) handleNotJoinedCluster(
223237
// 5. Cluster is joined, update condition
224238

225239
logger.V(2).Info("Cluster joined successfully")
240+
c.metrics.Duration("cluster_joined_duration", cluster.CreationTimestamp.Time,
241+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
242+
stats.Tag{Name: "result", Value: joinSuccess},
243+
stats.Tag{Name: "reason", Value: EventReasonJoinClusterSuccess})
226244
c.eventRecorder.Eventf(
227245
cluster,
228246
corev1.EventTypeNormal,

pkg/controllers/federatedcluster/clusterstatus.go

+60
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import (
3838
"k8s.io/klog/v2"
3939

4040
fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1"
41+
"github.com/kubewharf/kubeadmiral/pkg/stats"
42+
clusterutil "github.com/kubewharf/kubeadmiral/pkg/util/cluster"
4143
)
4244

4345
const (
@@ -63,6 +65,10 @@ func (c *FederatedClusterController) collectIndividualClusterStatus(
6365
ctx context.Context,
6466
cluster *fedcorev1a1.FederatedCluster,
6567
) (retryAfter time.Duration, err error) {
68+
startTime := time.Now()
69+
defer func() {
70+
c.recordClusterStatus(cluster, startTime)
71+
}()
6672
logger := klog.FromContext(ctx)
6773

6874
clusterKubeClient, exists := c.federatedInformerManager.GetClusterKubeClient(cluster.Name)
@@ -308,3 +314,57 @@ func shouldCollectClusterStatus(cluster *fedcorev1a1.FederatedCluster, collectIn
308314
nextCollectTime := readyCond.LastProbeTime.Time.Add(collectInterval)
309315
return time.Now().After(nextCollectTime)
310316
}
317+
318+
func (c *FederatedClusterController) recordClusterStatus(cluster *fedcorev1a1.FederatedCluster, startTime time.Time) {
319+
if clusterutil.IsClusterReady(&cluster.Status) {
320+
c.metrics.Store("cluster_ready_state",
321+
1,
322+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
323+
} else {
324+
c.metrics.Store("cluster_ready_state",
325+
0,
326+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
327+
}
328+
if clusterutil.IsClusterOffline(&cluster.Status) {
329+
c.metrics.Store("cluster_offline_state",
330+
1,
331+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
332+
} else {
333+
c.metrics.Store("cluster_offline_state",
334+
0,
335+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
336+
}
337+
if clusterutil.IsClusterJoined(&cluster.Status) {
338+
c.metrics.Store("cluster_joined_state",
339+
1,
340+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
341+
} else {
342+
c.metrics.Store("cluster_joined_state",
343+
0,
344+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
345+
}
346+
c.metrics.Duration("cluster_sync_status_duration",
347+
startTime,
348+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
349+
if cluster.Status.Resources.Allocatable != nil {
350+
c.metrics.Store("cluster_memory_allocatable_bytes",
351+
cluster.Status.Resources.Allocatable.Memory().AsApproximateFloat64(),
352+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
353+
c.metrics.Store("cluster_cpu_allocatable_number",
354+
cluster.Status.Resources.Allocatable.Cpu().AsApproximateFloat64(),
355+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
356+
}
357+
if cluster.Status.Resources.Available != nil {
358+
c.metrics.Store("cluster_memory_available_bytes",
359+
cluster.Status.Resources.Available.Memory().AsApproximateFloat64(),
360+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
361+
c.metrics.Store("cluster_cpu_available_number",
362+
cluster.Status.Resources.Available.Cpu().AsApproximateFloat64(),
363+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
364+
}
365+
if cluster.Status.Resources.SchedulableNodes != nil {
366+
c.metrics.Store("cluster_schedulable_nodes_total",
367+
*cluster.Status.Resources.SchedulableNodes,
368+
stats.Tag{Name: "cluster_name", Value: cluster.Name})
369+
}
370+
}

pkg/controllers/federatedcluster/controller.go

+12
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@ func (c *FederatedClusterController) reconcile(
220220
cluster = cluster.DeepCopy()
221221

222222
if cluster.GetDeletionTimestamp() != nil {
223+
c.metrics.Store("cluster_deletion_state", 1,
224+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
225+
stats.Tag{Name: "status", Value: "deleting"})
226+
c.metrics.Store("cluster_deletion_state", 0,
227+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
228+
stats.Tag{Name: "status", Value: "deleted"})
223229
logger.V(2).Info("Handle terminating cluster")
224230
if err := c.handleTerminatingCluster(ctx, cluster); err != nil {
225231
if apierrors.IsConflict(err) {
@@ -446,6 +452,12 @@ func (c *FederatedClusterController) handleTerminatingCluster(
446452
return fmt.Errorf("failed to update cluster for finalizer removal: %w", err)
447453
}
448454

455+
c.metrics.Store("cluster_deletion_state", 0,
456+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
457+
stats.Tag{Name: "status", Value: "deleting"})
458+
c.metrics.Store("cluster_deletion_state", 1,
459+
stats.Tag{Name: "cluster_name", Value: cluster.Name},
460+
stats.Tag{Name: "status", Value: "deleted"})
449461
return nil
450462
}
451463

pkg/controllers/follower/controller.go

+11
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,12 @@ func (c *Controller) reconcileLeader(
290290
}
291291
return worker.StatusError
292292
}
293+
294+
c.metrics.Store("followers_total", len(desiredFollowers),
295+
stats.Tag{Name: "namespace", Value: key.namespace},
296+
stats.Tag{Name: "name", Value: key.sourceName},
297+
stats.Tag{Name: "group", Value: key.sourceGK.Group},
298+
stats.Tag{Name: "kind", Value: key.sourceGK.Kind})
293299
}
294300

295301
c.cacheObservedFromLeaders.update(leader, desiredFollowers)
@@ -458,6 +464,11 @@ func (c *Controller) reconcileFollower(
458464
)
459465
return worker.StatusError
460466
} else if updated {
467+
c.metrics.Store("leaders_total", len(desiredLeaders),
468+
stats.Tag{Name: "namespace", Value: key.namespace},
469+
stats.Tag{Name: "name", Value: key.sourceName},
470+
stats.Tag{Name: "group", Value: key.sourceGK.Group},
471+
stats.Tag{Name: "kind", Value: key.sourceGK.Kind})
461472
keyedLogger.V(1).Info("Updated follower to sync with leaders")
462473
}
463474

pkg/controllers/nsautoprop/controller.go

+16
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi
254254
return worker.StatusAllOK
255255
}
256256

257+
c.recordNamespacePropagationFailedMetric(fedNamespace)
258+
257259
needsUpdate := false
258260

259261
// Set placement to propagate to all clusters
@@ -395,3 +397,17 @@ func (c *Controller) HasSynced() bool {
395397
c.namespaceInformer.Informer().HasSynced() &&
396398
c.informerManager.HasSynced()
397399
}
400+
401+
func (c *Controller) recordNamespacePropagationFailedMetric(fedNamespace *fedcorev1a1.ClusterFederatedObject) {
402+
errorClusterCount := 0
403+
404+
for _, clusterStatus := range fedNamespace.Status.Clusters {
405+
if clusterStatus.Status != fedcorev1a1.ClusterPropagationOK && clusterStatus.Status != fedcorev1a1.WaitingForRemoval {
406+
errorClusterCount++
407+
}
408+
}
409+
410+
if errorClusterCount != 0 {
411+
c.metrics.Store("namespace_propagate_failed_total", errorClusterCount, stats.Tag{Name: "namespace", Value: fedNamespace.Name})
412+
}
413+
}

pkg/controllers/policyrc/controller.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func NewPolicyRCController(
9595
func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result {
9696
return c.reconcilePersist(
9797
ctx,
98-
"propagation-policy",
98+
"propagation_policy_reference_count",
9999
qualifiedName,
100100
c.propagationPolicyInformer.Informer().GetStore(),
101101
c.clusterPropagationPolicyInformer.Informer().GetStore(),
@@ -112,7 +112,7 @@ func NewPolicyRCController(
112112
func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result {
113113
return c.reconcilePersist(
114114
ctx,
115-
"override-policy",
115+
"override_policy_reference_count",
116116
qualifiedName,
117117
c.overridePolicyInformer.Informer().GetStore(),
118118
c.clusterOverridePolicyInformer.Informer().GetStore(),
@@ -317,5 +317,10 @@ func (c *Controller) reconcilePersist(
317317
}
318318
}
319319

320+
c.metrics.Store(metricName, newRefCount, []stats.Tag{
321+
{Name: "name", Value: qualifiedName.Name},
322+
{Name: "namespace", Value: qualifiedName.Namespace},
323+
}...)
324+
320325
return worker.StatusAllOK
321326
}

pkg/controllers/scheduler/constants.go

+11
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,14 @@ const (
4949
SchedulingTriggersAnnotation = common.DefaultPrefix + "scheduling-triggers"
5050
SchedulingDeferredReasonsAnnotation = common.DefaultPrefix + "scheduling-deferred-reasons"
5151
)
52+
53+
const (
54+
// FedObjChanged is the event when FederatedObject/ClusterFederatedObject changes.
55+
FedObjChanged = "FedObjChanged"
56+
// PolicyChanged is the event when PropagationPolicy/ClusterPropagationPolicy changes.
57+
PolicyChanged = "PolicyChanged"
58+
// ClusterChanged is the event when cluster changes.
59+
ClusterChanged = "ClusterChanged"
60+
// FTCChanged is the event when FTC changes.
61+
FTCChanged = "FTCChanged"
62+
)

pkg/controllers/scheduler/core/generic_scheduler_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
fedcorev1a1 "github.com/kubewharf/kubeadmiral/pkg/apis/core/v1alpha1"
3030
"github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework"
3131
"github.com/kubewharf/kubeadmiral/pkg/controllers/scheduler/framework/runtime"
32+
"github.com/kubewharf/kubeadmiral/pkg/stats"
3233
)
3334

3435
type naiveReplicasPlugin struct{}
@@ -57,7 +58,8 @@ func getFramework() framework.Framework {
5758
DefaultRegistry := runtime.Registry{
5859
"NaiveReplicas": newNaiveReplicas,
5960
}
60-
f, _ := runtime.NewFramework(DefaultRegistry, nil, &fedcore.EnabledPlugins{ReplicasPlugins: []string{"NaiveReplicas"}})
61+
metrics := stats.NewMock("test", "kubeadmiral_controller_manager", false)
62+
f, _ := runtime.NewFramework(DefaultRegistry, nil, &fedcore.EnabledPlugins{ReplicasPlugins: []string{"NaiveReplicas"}}, "", metrics)
6163
return f
6264
}
6365

0 commit comments

Comments
 (0)