feat(topology): add member cluster workload detail drawer to topology graph#531
feat(topology): add member cluster workload detail drawer to topology graph#531SunsetB612 wants to merge 1 commit into
Conversation
|
[APPROVALNOTIFIER] This PR is NOT APPROVED This pull-request has been approved by: The full list of commands accepted by this bot can be found here. DetailsNeeds approval from an approver in each of these files:Approvers can indicate their approval by writing |
There was a problem hiding this comment.
Code Review
This pull request introduces a resource topology visualization feature to the Karmada dashboard, allowing users to trace propagation chains from control-plane workloads to member cluster resources and Pods. It includes a new backend API with custom informer indexers and a frontend graph view using React Flow. Feedback focuses on improving context propagation for request cancellation, optimizing Pod lookups with label selectors, extending health status support to more workload types, and enhancing UI robustness through better error handling and the restoration of terminal resize functionality.
| return | ||
| } | ||
|
|
||
| result, err := topology.GetResourceTopology(k8sClient, namespace, name, kind) |
There was a problem hiding this comment.
The request context should be passed to GetResourceTopology to ensure that any downstream operations (like API calls to member clusters) can be cancelled if the client disconnects.
| result, err := topology.GetResourceTopology(k8sClient, namespace, name, kind) | |
| result, err := topology.GetResourceTopology(c.Request.Context(), k8sClient, namespace, name, kind) |
| func GetResourceTopology( | ||
| k8sClient kubeclient.Interface, | ||
| namespace, name, kind string) (*TopologyResponse, error) { | ||
| return traceChain(context.TODO(), k8sClient, namespace, name, kind) |
There was a problem hiding this comment.
Avoid using context.TODO() in a function that is part of a request handling chain. The function should accept a context.Context parameter and pass it down to traceChain.
| func GetResourceTopology( | |
| k8sClient kubeclient.Interface, | |
| namespace, name, kind string) (*TopologyResponse, error) { | |
| return traceChain(context.TODO(), k8sClient, namespace, name, kind) | |
| func GetResourceTopology( | |
| ctx context.Context, | |
| k8sClient kubeclient.Interface, | |
| namespace, name, kind string) (*TopologyResponse, error) { | |
| return traceChain(ctx, k8sClient, namespace, name, kind) | |
| } |
| return nil, err | ||
| } | ||
| ownerUIDs := getPodDirectOwnerUIDs(ctx, memberClient, namespace, kind, workloadUID) | ||
| podList, err := memberClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{}) |
There was a problem hiding this comment.
Listing all pods in a namespace and filtering them in-memory is inefficient, especially in large clusters with many resources. It is better to use a label selector in the ListOptions to fetch only the relevant pods. Since the workload object is already fetched in previous steps, its selector should be used here.
| switch kind { | ||
| case "Deployment": | ||
| deploy, err := memberClient.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) | ||
| if err != nil { | ||
| klog.V(4).InfoS("Failed to get member deployment", "cluster", clusterName, "err", err) | ||
| return NodeStatusAbnormal | ||
| } | ||
| if deploy.Spec.Replicas != nil && deploy.Status.ReadyReplicas == *deploy.Spec.Replicas { | ||
| return NodeStatusHealthy | ||
| } | ||
| return NodeStatusProgressing | ||
| default: | ||
| return NodeStatusHealthy | ||
| } | ||
| } |
There was a problem hiding this comment.
The getMemberWorkloadStatus function currently only handles Deployment. It should be extended to support other workload types like StatefulSet and DaemonSet to provide accurate health status in the topology graph.
func getMemberWorkloadStatus(ctx context.Context, clusterName, namespace, name, kind string) NodeStatus {
memberClient := client.InClusterClientForMemberCluster(clusterName)
if memberClient == nil {
return NodeStatusAbnormal
}
switch kind {
case "Deployment":
deploy, err := memberClient.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.V(4).InfoS("Failed to get member deployment", "cluster", clusterName, "err", err)
return NodeStatusAbnormal
}
if deploy.Spec.Replicas != nil && deploy.Status.ReadyReplicas == *deploy.Spec.Replicas {
return NodeStatusHealthy
}
return NodeStatusProgressing
case "StatefulSet":
sts, err := memberClient.AppsV1().StatefulSets(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.V(4).InfoS("Failed to get member statefulset", "cluster", clusterName, "err", err)
return NodeStatusAbnormal
}
if sts.Spec.Replicas != nil && sts.Status.ReadyReplicas == *sts.Spec.Replicas {
return NodeStatusHealthy
}
return NodeStatusProgressing
case "DaemonSet":
ds, err := memberClient.AppsV1().DaemonSets(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.V(4).InfoS("Failed to get member daemonset", "cluster", clusterName, "err", err)
return NodeStatusAbnormal
}
if ds.Status.NumberReady == ds.Status.DesiredNumberScheduled {
return NodeStatusHealthy
}
return NodeStatusProgressing
default:
return NodeStatusHealthy
}
}| void Promise.all([ | ||
| GetMemberClusterWorkloadDetail({ memberClusterName, namespace, name, kind }), | ||
| GetMemberClusterWorkloadEvents({ memberClusterName, namespace, name, kind }), | ||
| GetMemberClusterPods({ memberClusterName, namespace }), | ||
| ]).then(([detailResp, eventsRet, podsRet]) => { | ||
| const workloadDetail = (detailResp?.data ?? {}) as WorkloadDetail; | ||
| setViewDetail(workloadDetail); | ||
| setViewEvents(eventsRet?.data?.events || []); | ||
| const selector = workloadDetail.selector; | ||
| setViewPods( | ||
| selector | ||
| ? (podsRet?.data?.pods || []).filter((pod) => | ||
| Object.entries(selector).every( | ||
| ([k, v]) => pod.objectMeta.labels?.[k] === v, | ||
| ), | ||
| ) | ||
| : [], | ||
| ); | ||
| }).finally(() => setViewLoading(false)); |
There was a problem hiding this comment.
Using Promise.all without individual error handling means that if any of the requests (like fetching events) fails, the entire data loading process will fail, and the user won't see the workload details. Consider using Promise.allSettled or handling errors for each request individually.
Promise.allSettled([
GetMemberClusterWorkloadDetail({ memberClusterName, namespace, name, kind }),
GetMemberClusterWorkloadEvents({ memberClusterName, namespace, name, kind }),
GetMemberClusterPods({ memberClusterName, namespace }),
]).then(([detailRes, eventsRes, podsRes]) => {
if (detailRes.status === 'fulfilled') {
const workloadDetail = (detailRes.value?.data ?? {}) as WorkloadDetail;
setViewDetail(workloadDetail);
const selector = workloadDetail.selector;
if (podsRes.status === 'fulfilled') {
setViewPods(
selector
? (podsRes.value?.data?.pods || []).filter((pod) =>
Object.entries(selector).every(([k, v]) => pod.objectMeta.labels?.[k] === v)
)
: []
);
}
}
if (eventsRes.status === 'fulfilled') {
setViewEvents(eventsRes.value?.data?.events || []);
}
}).finally(() => setViewLoading(false));
| else if (frame.Op === 'toast' && frame.Data) void messageApi.info(frame.Data); | ||
| } catch { /* ignore */ } | ||
| }; | ||
| term.onData((data) => { sock.send(JSON.stringify({ Op: 'stdin', Data: data })); }); |
There was a problem hiding this comment.
The terminal resize listener was removed during refactoring. This is a regression that will cause the terminal to not adjust its dimensions when the drawer or window is resized.
| term.onData((data) => { sock.send(JSON.stringify({ Op: 'stdin', Data: data })); }); | |
| term.onData((data) => { sock.send(JSON.stringify({ Op: 'stdin', Data: data })); }); | |
| term.onResize(({ cols, rows }) => { sock.send(JSON.stringify({ Op: 'resize', Cols: cols, Rows: rows })); }); |
| <Select | ||
| placeholder="Resource Name" | ||
| className="min-w-[220px]" | ||
| showSearch | ||
| allowClear | ||
| value={name || undefined} | ||
| onChange={(v) => setName(v || '')} | ||
| options={[]} | ||
| mode={undefined} | ||
| searchValue={name} | ||
| onSearch={(v) => setName(v)} | ||
| notFoundContent={null} | ||
| /> |
There was a problem hiding this comment.
Using a Select component with empty options for the resource name is confusing for users. Since the user is expected to type the name, an Input component or an AutoComplete component would provide a better user experience.
| <Select | |
| placeholder="Resource Name" | |
| className="min-w-[220px]" | |
| showSearch | |
| allowClear | |
| value={name || undefined} | |
| onChange={(v) => setName(v || '')} | |
| options={[]} | |
| mode={undefined} | |
| searchValue={name} | |
| onSearch={(v) => setName(v)} | |
| notFoundContent={null} | |
| /> | |
| <Input | |
| placeholder="Resource Name" | |
| className="min-w-[220px]" | |
| allowClear | |
| value={name} | |
| onChange={(e) => setName(e.target.value)} | |
| /> |
d96f2d1 to
fec8cce
Compare
fec8cce to
40fa6ae
Compare
9f8bf0c to
54741c4
Compare
016e541 to
e6b7517
Compare
|
@SunsetB612 sorry for that there is still some conflict ~ I want to review this PR, feel free to resovle the conflict |
Sorry about that, I’ll resolve the conflicts tonight. Thanks for reviewing the PR~ |
… graph Signed-off-by: SunsetB612 <10235101575@stu.ecnu.edu.cn>
e6b7517 to
d456a22
Compare
@warjiang the conflicts have been resolved now. thanks again for reviewing the PR ❤️ ~ |
|
/assign |
No description provided.