Skip to content

Commit dab53fc

Browse files
committed
fix: ensure kubevirt migration uses current migration state
Add MigrationUID validation to prevent stale state usage during consecutive VM migrations (e.g., A→B→A). Changes: 1. Add MigrationUID check: Only use vmi.Status.MigrationState if MigrationUID matches current vmiMigration.UID 2. Remove early return on Completed: Allow MigrationSucceeded/Failed phases to execute Reset logic 3. Add unit tests covering UID validation and migration scenarios The root cause was that vmi.Status.MigrationState could contain stale info from a previous migration, causing incorrect node detection and skipping SetLogicalSwitchPortMigrateOptions. Fixes: #6220 Signed-off-by: zbb88888 <[email protected]>
1 parent 8f63d67 commit dab53fc

File tree

2 files changed

+267
-6
lines changed

2 files changed

+267
-6
lines changed

pkg/controller/kubevirt.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
110110
return nil
111111
}
112112

113-
if vmiMigration.Status.MigrationState.Completed {
114-
klog.V(3).Infof("VirtualMachineInstanceMigration %s migration state is completed, skipping", key)
115-
return nil
116-
}
113+
// NOTE: Remove the early return on Completed check here because:
114+
// 1. MigrationSucceeded/MigrationFailed phases need to execute Reset logic
115+
// 2. The Completed flag may be set before we process the final phase
116+
// 3. This was causing cleanup to be skipped in some timing scenarios
117117

118118
vmi, err := c.config.KubevirtClient.VirtualMachineInstance(namespace).Get(context.TODO(), vmiMigration.Spec.VMIName, metav1.GetOptions{})
119119
if err != nil {
@@ -122,8 +122,10 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
122122
}
123123

124124
// use VirtualMachineInstance's MigrationState because VirtualMachineInstanceMigration's MigrationState is not updated until migration finished
125+
// IMPORTANT: Only use vmi.Status.MigrationState if its MigrationUID matches the current vmiMigration.UID
126+
// Otherwise we may be using stale state from a previous migration, which causes incorrect node info
125127
var srcNodeName, targetNodeName string
126-
if vmi.Status.MigrationState != nil {
128+
if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == vmiMigration.UID {
127129
klog.Infof("current vmiMigration %s status %s, target Node %s, source Node %s, target Pod %s, source Pod %s", key,
128130
vmiMigration.Status.Phase,
129131
vmi.Status.MigrationState.TargetNode,
@@ -133,7 +135,19 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
133135
srcNodeName = vmi.Status.MigrationState.SourceNode
134136
targetNodeName = vmi.Status.MigrationState.TargetNode
135137
} else {
136-
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is nil", key, vmiMigration.Status.Phase)
138+
if vmi.Status.MigrationState != nil {
139+
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is stale (MigrationUID mismatch: %s != %s)",
140+
key, vmiMigration.Status.Phase, vmi.Status.MigrationState.MigrationUID, vmiMigration.UID)
141+
} else {
142+
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is nil", key, vmiMigration.Status.Phase)
143+
}
144+
// If we're at a final state and the vmi migration state is stale or nil, we can't proceed
145+
// since we don't have the correct source and target nodes for resetting the migrate options
146+
if vmiMigration.IsFinal() {
147+
klog.V(3).Infof("VirtualMachineInstanceMigration %s is in final phase %s but VMI migration state is stale or nil, skipping",
148+
key, vmiMigration.Status.Phase)
149+
return nil
150+
}
137151
}
138152

139153
portName := ovs.PodNameToPortName(vmiMigration.Spec.VMIName, vmiMigration.Namespace, util.OvnProvider)

pkg/controller/kubevirt_test.go

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
package controller
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"k8s.io/apimachinery/pkg/types"
8+
kubevirtv1 "kubevirt.io/api/core/v1"
9+
)
10+
11+
func TestMigrationUIDValidation(t *testing.T) {
12+
// Helper function to simulate the MigrationUID check logic in handleAddOrUpdateVMIMigration
13+
isMigrationStateValid := func(vmiMigrationState *kubevirtv1.VirtualMachineInstanceMigrationState, vmiMigrationUID types.UID) bool {
14+
if vmiMigrationState == nil {
15+
return false
16+
}
17+
return vmiMigrationState.MigrationUID == vmiMigrationUID
18+
}
19+
20+
tests := []struct {
21+
name string
22+
vmiMigrationState *kubevirtv1.VirtualMachineInstanceMigrationState
23+
vmiMigrationUID types.UID
24+
expected bool
25+
description string
26+
}{
27+
{
28+
name: "nil MigrationState",
29+
vmiMigrationState: nil,
30+
vmiMigrationUID: "migration-uid-1",
31+
expected: false,
32+
description: "Should return false when MigrationState is nil",
33+
},
34+
{
35+
name: "MigrationUID matches",
36+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
37+
MigrationUID: "migration-uid-1",
38+
SourceNode: "node-a",
39+
TargetNode: "node-b",
40+
},
41+
vmiMigrationUID: "migration-uid-1",
42+
expected: true,
43+
description: "Should return true when MigrationUID matches current migration",
44+
},
45+
{
46+
name: "MigrationUID does not match (stale state)",
47+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
48+
MigrationUID: "old-migration-uid",
49+
SourceNode: "node-a",
50+
TargetNode: "node-b",
51+
},
52+
vmiMigrationUID: "new-migration-uid",
53+
expected: false,
54+
description: "Should return false when MigrationUID does not match (stale state from previous migration)",
55+
},
56+
{
57+
name: "Empty MigrationUID in state",
58+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
59+
MigrationUID: "",
60+
SourceNode: "node-a",
61+
TargetNode: "node-b",
62+
},
63+
vmiMigrationUID: "migration-uid-1",
64+
expected: false,
65+
description: "Should return false when MigrationUID in state is empty",
66+
},
67+
}
68+
69+
for _, tt := range tests {
70+
t.Run(tt.name, func(t *testing.T) {
71+
result := isMigrationStateValid(tt.vmiMigrationState, tt.vmiMigrationUID)
72+
assert.Equal(t, tt.expected, result, tt.description)
73+
})
74+
}
75+
}
76+
77+
func TestIsFinalPhase(t *testing.T) {
78+
// Test the logic for handling different migration phases when MigrationState is stale/nil
79+
// This mirrors the behavior of vmiMigration.IsFinal() which returns true for Succeeded/Failed
80+
isFinalPhase := func(phase kubevirtv1.VirtualMachineInstanceMigrationPhase) bool {
81+
return phase == kubevirtv1.MigrationSucceeded || phase == kubevirtv1.MigrationFailed
82+
}
83+
84+
tests := []struct {
85+
name string
86+
phase kubevirtv1.VirtualMachineInstanceMigrationPhase
87+
isFinal bool
88+
description string
89+
}{
90+
{
91+
name: "MigrationScheduling phase",
92+
phase: kubevirtv1.MigrationScheduling,
93+
isFinal: false,
94+
description: "Should NOT skip MigrationScheduling - can still try to get node info from pod",
95+
},
96+
{
97+
name: "MigrationPending phase",
98+
phase: kubevirtv1.MigrationPending,
99+
isFinal: false,
100+
description: "Should NOT skip MigrationPending - waiting for state sync",
101+
},
102+
{
103+
name: "MigrationRunning phase",
104+
phase: kubevirtv1.MigrationRunning,
105+
isFinal: false,
106+
description: "Should NOT skip MigrationRunning - migration in progress",
107+
},
108+
{
109+
name: "MigrationSucceeded phase",
110+
phase: kubevirtv1.MigrationSucceeded,
111+
isFinal: true,
112+
description: "Should skip MigrationSucceeded with stale state - need correct nodes for Reset",
113+
},
114+
{
115+
name: "MigrationFailed phase",
116+
phase: kubevirtv1.MigrationFailed,
117+
isFinal: true,
118+
description: "Should skip MigrationFailed with stale state - need correct nodes for Reset",
119+
},
120+
}
121+
122+
for _, tt := range tests {
123+
t.Run(tt.name, func(t *testing.T) {
124+
result := isFinalPhase(tt.phase)
125+
assert.Equal(t, tt.isFinal, result, tt.description)
126+
})
127+
}
128+
}
129+
130+
func TestMigrationStateScenarios(t *testing.T) {
131+
// Simulate the complete migration scenario to ensure correct behavior
132+
// This tests the combination of MigrationUID validation and IsFinal() logic
133+
type migrationScenario struct {
134+
name string
135+
vmiMigrationState *kubevirtv1.VirtualMachineInstanceMigrationState
136+
currentMigrationUID types.UID
137+
currentPhase kubevirtv1.VirtualMachineInstanceMigrationPhase
138+
expectedSrcNode string
139+
expectedTargetNode string
140+
expectedSkip bool
141+
description string
142+
}
143+
144+
// Helper to check if phase is final (mirrors vmiMigration.IsFinal())
145+
isFinal := func(phase kubevirtv1.VirtualMachineInstanceMigrationPhase) bool {
146+
return phase == kubevirtv1.MigrationSucceeded || phase == kubevirtv1.MigrationFailed
147+
}
148+
149+
scenarios := []migrationScenario{
150+
{
151+
name: "First migration - state matches",
152+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
153+
MigrationUID: "migration-1",
154+
SourceNode: "node-a",
155+
TargetNode: "node-b",
156+
},
157+
currentMigrationUID: "migration-1",
158+
currentPhase: kubevirtv1.MigrationScheduling,
159+
expectedSrcNode: "node-a",
160+
expectedTargetNode: "node-b",
161+
expectedSkip: false,
162+
description: "First migration should use MigrationState",
163+
},
164+
{
165+
name: "Consecutive migration - state is stale (A->B->A scenario)",
166+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
167+
MigrationUID: "migration-1", // Old migration
168+
SourceNode: "node-a",
169+
TargetNode: "node-b",
170+
},
171+
currentMigrationUID: "migration-2", // New migration
172+
currentPhase: kubevirtv1.MigrationScheduling,
173+
expectedSrcNode: "", // Cannot use stale state
174+
expectedTargetNode: "",
175+
expectedSkip: false, // MigrationScheduling should not skip, can try pod-based approach
176+
description: "Consecutive migration should NOT use stale state, but continue to try pod-based node detection",
177+
},
178+
{
179+
name: "Final phase (Succeeded) with stale state",
180+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
181+
MigrationUID: "migration-1",
182+
SourceNode: "node-a",
183+
TargetNode: "node-b",
184+
},
185+
currentMigrationUID: "migration-2",
186+
currentPhase: kubevirtv1.MigrationSucceeded,
187+
expectedSrcNode: "",
188+
expectedTargetNode: "",
189+
expectedSkip: true, // Should skip - can't Reset without correct nodes
190+
description: "MigrationSucceeded with stale state should skip",
191+
},
192+
{
193+
name: "Final phase (Failed) with stale state",
194+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
195+
MigrationUID: "migration-1",
196+
SourceNode: "node-a",
197+
TargetNode: "node-b",
198+
},
199+
currentMigrationUID: "migration-2",
200+
currentPhase: kubevirtv1.MigrationFailed,
201+
expectedSrcNode: "",
202+
expectedTargetNode: "",
203+
expectedSkip: true, // Should skip - can't Reset without correct nodes
204+
description: "MigrationFailed with stale state should skip",
205+
},
206+
{
207+
name: "Non-final phase with nil state",
208+
vmiMigrationState: nil,
209+
currentMigrationUID: "migration-1",
210+
currentPhase: kubevirtv1.MigrationScheduling,
211+
expectedSrcNode: "",
212+
expectedTargetNode: "",
213+
expectedSkip: false, // MigrationScheduling should continue
214+
description: "Nil MigrationState should continue for MigrationScheduling",
215+
},
216+
{
217+
name: "Final phase (Succeeded) with nil state",
218+
vmiMigrationState: nil,
219+
currentMigrationUID: "migration-1",
220+
currentPhase: kubevirtv1.MigrationSucceeded,
221+
expectedSrcNode: "",
222+
expectedTargetNode: "",
223+
expectedSkip: true, // Should skip - can't Reset without correct nodes
224+
description: "Nil MigrationState with MigrationSucceeded should skip",
225+
},
226+
}
227+
228+
for _, sc := range scenarios {
229+
t.Run(sc.name, func(t *testing.T) {
230+
var srcNode, targetNode string
231+
var shouldSkip bool
232+
233+
// Simulate the fixed logic from handleAddOrUpdateVMIMigration
234+
if sc.vmiMigrationState != nil && sc.vmiMigrationState.MigrationUID == sc.currentMigrationUID {
235+
srcNode = sc.vmiMigrationState.SourceNode
236+
targetNode = sc.vmiMigrationState.TargetNode
237+
} else if isFinal(sc.currentPhase) {
238+
// State is stale or nil - use IsFinal() to determine if we should skip
239+
shouldSkip = true
240+
}
241+
242+
assert.Equal(t, sc.expectedSrcNode, srcNode, "Source node mismatch: "+sc.description)
243+
assert.Equal(t, sc.expectedTargetNode, targetNode, "Target node mismatch: "+sc.description)
244+
assert.Equal(t, sc.expectedSkip, shouldSkip, "Skip decision mismatch: "+sc.description)
245+
})
246+
}
247+
}

0 commit comments

Comments
 (0)