Skip to content

Commit 25631d7

Browse files
andrewd-zededarene
authored andcommitted
eve-k: reduce idle logging, ignore api outage during failover
The 'looking for cont' log near the top of checkAppsStatus and checkAppsFailover is run when apps are running outside of any active management operations or app migration operations. Avoid logging above Functionf in this state to minimize pillar logs. Kubevirt operations on the Hypervisor interface which either change the state of a domain or get state of a domain specific to a host/node should first call scheduledOnNode which implements vm/container handlers. Don't call the direct replicaVmiScheduledOnMe or replicaPodScheduledOnMe. During failover multiple nodes may attempt info on a kubevirt domain. scheduledOnNode is used to skip operations for nodes where an app is not running. In cases where scheduledOnMe is unable to determine the scheduling state of an app: the caller should determine if the error is some type where the k3s api server may be temporarily unavailable. If the api server is temporarily unavailable then declare app state UNKNOWN as BROKEN signals to the controller that no further tries will be attempted. Remove incorrect error log in getNodeDrainRequester(). Its possible to meet that error log in normal conditions: - zedkube startup as single node mode - zedkube recv edgenodeclusterconfig - zedkube node delete - zedkube cluster delete Signed-off-by: Andrew Durbin <andrewd@zededa.com>
1 parent b7970e6 commit 25631d7

File tree

4 files changed

+8
-6
lines changed

4 files changed

+8
-6
lines changed

pkg/pillar/cmd/zedkube/applogs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ func (z *zedkube) checkAppsStatus() {
160160
// Both Pods will be of the pattern <appname>-<uuid prefix>-<pod uuid suffix>
161161
for _, pod := range pods.Items {
162162
contVMIName := "virt-launcher-" + contName
163-
log.Noticef("checkAppsStatus: pod %s, looking for cont %s", pod.Name, contName)
163+
log.Functionf("checkAppsStatus: pod %s, looking for cont %s", pod.Name, contName)
164164
foundVMIPod := strings.HasPrefix(pod.Name, contVMIName)
165165
if strings.HasPrefix(pod.Name, contName) || foundVMIPod {
166166
// Case 1

pkg/pillar/cmd/zedkube/failover.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func (z *zedkube) checkAppsFailover(wdFunc func()) {
7676

7777
for _, pod := range pods.Items {
7878
contVMIName := "virt-launcher-" + contName
79-
log.Noticef("checkAppsStatus: pod %s, looking for cont %s", pod.Name, contName)
79+
log.Functionf("checkAppsFailover: pod %s, looking for cont %s", pod.Name, contName)
8080
foundVMIPod := strings.HasPrefix(pod.Name, contVMIName)
8181
if strings.HasPrefix(pod.Name, contName) || foundVMIPod {
8282
// Case 1

pkg/pillar/cmd/zedkube/handlenodedrain.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ func getNodeDrainRequester(ctx *zedkube) kubeapi.DrainRequester {
3838
if len(items) == 1 {
3939
return kubeapi.UPDATE
4040
}
41-
log.Errorf("getNodeDrainRequester should never get here")
4241
return kubeapi.NONE
4342
}
4443

pkg/pillar/hypervisor/kubevirt.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ func (ctx kubevirtContext) Delete(domainName string) (result error) {
736736
return logError("delete domain %s failed to get vmlist", domainName)
737737
}
738738

739-
onMe, err := ctx.replicaVmiScheduledOnMe(vmis.name)
739+
onMe, err := ctx.scheduledOnMe(vmis.mtype, vmis.name)
740740
if err != nil {
741741
return err
742742
}
@@ -813,9 +813,12 @@ func (ctx kubevirtContext) Info(domainName string) (int, types.SwState, error) {
813813
return 0, types.HALTED, logError("info domain %s failed to get vmlist", domainName)
814814
}
815815

816-
onMe, err := ctx.replicaVmiScheduledOnMe(vmis.name)
816+
onMe, err := ctx.scheduledOnMe(vmis.mtype, vmis.name)
817817
if err != nil {
818-
return 0, types.BROKEN, logError("Failed to determine scheduled node")
818+
if isK3sUnreachable(err) {
819+
return 0, types.UNKNOWN, nil
820+
}
821+
return 0, types.BROKEN, logError("Failed to determine scheduled node: %s", err)
819822
}
820823
if !onMe {
821824
return 0, types.UNKNOWN, nil

0 commit comments

Comments
 (0)