Skip to content

Commit 3952d11

Browse files
committed
fix: ensure kubevirt migration uses current migration state
Add MigrationUID validation to prevent stale state usage during consecutive VM migrations (e.g., A→B→A). Changes: 1. Add MigrationUID check: Only use vmi.Status.MigrationState if MigrationUID matches current vmiMigration.UID 2. Remove early return on Completed: Allow MigrationSucceeded/Failed phases to execute Reset logic 3. Add unit tests covering UID validation and migration scenarios The root cause was that vmi.Status.MigrationState could contain stale info from a previous migration, causing incorrect node detection and skipping SetLogicalSwitchPortMigrateOptions. Fixes: #6220 Signed-off-by: zbb88888 <jmdxjsjgcxy@gmail.com>
1 parent 8f63d67 commit 3952d11

File tree

2 files changed

+252
-6
lines changed

2 files changed

+252
-6
lines changed

pkg/controller/kubevirt.go

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
110110
return nil
111111
}
112112

113-
if vmiMigration.Status.MigrationState.Completed {
114-
klog.V(3).Infof("VirtualMachineInstanceMigration %s migration state is completed, skipping", key)
115-
return nil
116-
}
113+
// NOTE: Remove the early return on Completed check here because:
114+
// 1. MigrationSucceeded/MigrationFailed phases need to execute Reset logic
115+
// 2. The Completed flag may be set before we process the final phase
116+
// 3. This was causing cleanup to be skipped in some timing scenarios
117117

118118
vmi, err := c.config.KubevirtClient.VirtualMachineInstance(namespace).Get(context.TODO(), vmiMigration.Spec.VMIName, metav1.GetOptions{})
119119
if err != nil {
@@ -122,8 +122,10 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
122122
}
123123

124124
// use VirtualMachineInstance's MigrationState because VirtualMachineInstanceMigration's MigrationState is not updated until migration finished
125+
// IMPORTANT: Only use vmi.Status.MigrationState if its MigrationUID matches the current vmiMigration.UID
126+
// Otherwise we may be using stale state from a previous migration, which causes incorrect node info
125127
var srcNodeName, targetNodeName string
126-
if vmi.Status.MigrationState != nil {
128+
if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.MigrationUID == vmiMigration.UID {
127129
klog.Infof("current vmiMigration %s status %s, target Node %s, source Node %s, target Pod %s, source Pod %s", key,
128130
vmiMigration.Status.Phase,
129131
vmi.Status.MigrationState.TargetNode,
@@ -133,7 +135,19 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
133135
srcNodeName = vmi.Status.MigrationState.SourceNode
134136
targetNodeName = vmi.Status.MigrationState.TargetNode
135137
} else {
136-
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is nil", key, vmiMigration.Status.Phase)
138+
if vmi.Status.MigrationState != nil {
139+
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is stale (MigrationUID mismatch: %s != %s)",
140+
key, vmiMigration.Status.Phase, vmi.Status.MigrationState.MigrationUID, vmiMigration.UID)
141+
} else {
142+
klog.Infof("current vmiMigration %s status %s, vmi MigrationState is nil", key, vmiMigration.Status.Phase)
143+
}
144+
// If we're at an end state and the vmi migration state is stale or nil, we can't proceed
145+
// since we don't have the correct source and target nodes for resetting the migrate options
146+
if vmiMigration.Status.Phase == kubevirtv1.MigrationSucceeded || vmiMigration.Status.Phase == kubevirtv1.MigrationFailed {
147+
klog.V(3).Infof("VirtualMachineInstanceMigration %s is %s but VMI migration state is stale or nil, skipping",
148+
key, vmiMigration.Status.Phase)
149+
return nil
150+
}
137151
}
138152

139153
portName := ovs.PodNameToPortName(vmiMigration.Spec.VMIName, vmiMigration.Namespace, util.OvnProvider)

pkg/controller/kubevirt_test.go

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
package controller
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"k8s.io/apimachinery/pkg/types"
8+
kubevirtv1 "kubevirt.io/api/core/v1"
9+
)
10+
11+
func TestShouldUseMigrationState(t *testing.T) {
12+
// Helper function to simulate the MigrationUID check logic
13+
shouldUseMigrationState := func(vmiMigrationState *kubevirtv1.VirtualMachineInstanceMigrationState, vmiMigrationUID types.UID) bool {
14+
if vmiMigrationState == nil {
15+
return false
16+
}
17+
return vmiMigrationState.MigrationUID == vmiMigrationUID
18+
}
19+
20+
tests := []struct {
21+
name string
22+
vmiMigrationState *kubevirtv1.VirtualMachineInstanceMigrationState
23+
vmiMigrationUID types.UID
24+
expected bool
25+
description string
26+
}{
27+
{
28+
name: "nil MigrationState",
29+
vmiMigrationState: nil,
30+
vmiMigrationUID: "migration-uid-1",
31+
expected: false,
32+
description: "Should return false when MigrationState is nil",
33+
},
34+
{
35+
name: "MigrationUID matches",
36+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
37+
MigrationUID: "migration-uid-1",
38+
SourceNode: "node-a",
39+
TargetNode: "node-b",
40+
},
41+
vmiMigrationUID: "migration-uid-1",
42+
expected: true,
43+
description: "Should return true when MigrationUID matches current migration",
44+
},
45+
{
46+
name: "MigrationUID does not match (stale state)",
47+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
48+
MigrationUID: "old-migration-uid",
49+
SourceNode: "node-a",
50+
TargetNode: "node-b",
51+
},
52+
vmiMigrationUID: "new-migration-uid",
53+
expected: false,
54+
description: "Should return false when MigrationUID does not match (stale state from previous migration)",
55+
},
56+
{
57+
name: "Empty MigrationUID in state",
58+
vmiMigrationState: &kubevirtv1.VirtualMachineInstanceMigrationState{
59+
MigrationUID: "",
60+
SourceNode: "node-a",
61+
TargetNode: "node-b",
62+
},
63+
vmiMigrationUID: "migration-uid-1",
64+
expected: false,
65+
description: "Should return false when MigrationUID in state is empty",
66+
},
67+
}
68+
69+
for _, tt := range tests {
70+
t.Run(tt.name, func(t *testing.T) {
71+
result := shouldUseMigrationState(tt.vmiMigrationState, tt.vmiMigrationUID)
72+
assert.Equal(t, tt.expected, result, tt.description)
73+
})
74+
}
75+
}
76+
77+
func TestMigrationPhaseHandling(t *testing.T) {
78+
// Test the logic for handling different migration phases when MigrationState is stale/nil
79+
shouldSkipForStaleState := func(phase kubevirtv1.VirtualMachineInstanceMigrationPhase) bool {
80+
return phase == kubevirtv1.MigrationSucceeded || phase == kubevirtv1.MigrationFailed
81+
}
82+
83+
tests := []struct {
84+
name string
85+
phase kubevirtv1.VirtualMachineInstanceMigrationPhase
86+
shouldSkip bool
87+
description string
88+
}{
89+
{
90+
name: "MigrationScheduling phase",
91+
phase: kubevirtv1.MigrationScheduling,
92+
shouldSkip: false,
93+
description: "Should NOT skip MigrationScheduling - can still try to get node info from pod",
94+
},
95+
{
96+
name: "MigrationPending phase",
97+
phase: kubevirtv1.MigrationPending,
98+
shouldSkip: false,
99+
description: "Should NOT skip MigrationPending - waiting for state sync",
100+
},
101+
{
102+
name: "MigrationRunning phase",
103+
phase: kubevirtv1.MigrationRunning,
104+
shouldSkip: false,
105+
description: "Should NOT skip MigrationRunning - migration in progress",
106+
},
107+
{
108+
name: "MigrationSucceeded phase",
109+
phase: kubevirtv1.MigrationSucceeded,
110+
shouldSkip: true,
111+
description: "Should skip MigrationSucceeded with stale state - need correct nodes for Reset",
112+
},
113+
{
114+
name: "MigrationFailed phase",
115+
phase: kubevirtv1.MigrationFailed,
116+
shouldSkip: true,
117+
description: "Should skip MigrationFailed with stale state - need correct nodes for Reset",
118+
},
119+
}
120+
121+
for _, tt := range tests {
122+
t.Run(tt.name, func(t *testing.T) {
123+
result := shouldSkipForStaleState(tt.phase)
124+
assert.Equal(t, tt.shouldSkip, result, tt.description)
125+
})
126+
}
127+
}
128+
129+
func TestMigrationScenarios(t *testing.T) {
130+
// Simulate the complete migration scenario to ensure correct behavior
131+
type migrationScenario struct {
132+
name string
133+
previousMigration *kubevirtv1.VirtualMachineInstanceMigrationState // State from previous migration
134+
currentMigrationUID types.UID
135+
currentPhase kubevirtv1.VirtualMachineInstanceMigrationPhase
136+
expectedSrcNode string
137+
expectedTargetNode string
138+
expectedSkip bool
139+
description string
140+
}
141+
142+
scenarios := []migrationScenario{
143+
{
144+
name: "First migration - state matches",
145+
previousMigration: &kubevirtv1.VirtualMachineInstanceMigrationState{
146+
MigrationUID: "migration-1",
147+
SourceNode: "node-a",
148+
TargetNode: "node-b",
149+
},
150+
currentMigrationUID: "migration-1",
151+
currentPhase: kubevirtv1.MigrationScheduling,
152+
expectedSrcNode: "node-a",
153+
expectedTargetNode: "node-b",
154+
expectedSkip: false,
155+
description: "First migration should use MigrationState",
156+
},
157+
{
158+
name: "Second migration - state is stale (A->B->A scenario)",
159+
previousMigration: &kubevirtv1.VirtualMachineInstanceMigrationState{
160+
MigrationUID: "migration-1", // Old migration
161+
SourceNode: "node-a",
162+
TargetNode: "node-b",
163+
},
164+
currentMigrationUID: "migration-2", // New migration
165+
currentPhase: kubevirtv1.MigrationScheduling,
166+
expectedSrcNode: "", // Cannot use stale state
167+
expectedTargetNode: "",
168+
expectedSkip: false, // MigrationScheduling should not skip, can try pod-based approach
169+
description: "Second migration should NOT use stale state, but continue to try pod-based node detection",
170+
},
171+
{
172+
name: "Migration succeeded with stale state",
173+
previousMigration: &kubevirtv1.VirtualMachineInstanceMigrationState{
174+
MigrationUID: "migration-1",
175+
SourceNode: "node-a",
176+
TargetNode: "node-b",
177+
},
178+
currentMigrationUID: "migration-2",
179+
currentPhase: kubevirtv1.MigrationSucceeded,
180+
expectedSrcNode: "",
181+
expectedTargetNode: "",
182+
expectedSkip: true, // Should skip - can't Reset without correct nodes
183+
description: "MigrationSucceeded with stale state should skip",
184+
},
185+
{
186+
name: "Migration failed with stale state",
187+
previousMigration: &kubevirtv1.VirtualMachineInstanceMigrationState{
188+
MigrationUID: "migration-1",
189+
SourceNode: "node-a",
190+
TargetNode: "node-b",
191+
},
192+
currentMigrationUID: "migration-2",
193+
currentPhase: kubevirtv1.MigrationFailed,
194+
expectedSrcNode: "",
195+
expectedTargetNode: "",
196+
expectedSkip: true, // Should skip - can't Reset without correct nodes
197+
description: "MigrationFailed with stale state should skip",
198+
},
199+
{
200+
name: "Migration with nil state",
201+
previousMigration: nil,
202+
currentMigrationUID: "migration-1",
203+
currentPhase: kubevirtv1.MigrationScheduling,
204+
expectedSrcNode: "",
205+
expectedTargetNode: "",
206+
expectedSkip: false, // MigrationScheduling should continue
207+
description: "Nil MigrationState should continue for MigrationScheduling",
208+
},
209+
}
210+
211+
for _, sc := range scenarios {
212+
t.Run(sc.name, func(t *testing.T) {
213+
var srcNode, targetNode string
214+
var shouldSkip bool
215+
216+
// Simulate the fixed logic
217+
if sc.previousMigration != nil && sc.previousMigration.MigrationUID == sc.currentMigrationUID {
218+
srcNode = sc.previousMigration.SourceNode
219+
targetNode = sc.previousMigration.TargetNode
220+
} else {
221+
// State is stale or nil
222+
if sc.currentPhase == kubevirtv1.MigrationSucceeded || sc.currentPhase == kubevirtv1.MigrationFailed {
223+
shouldSkip = true
224+
}
225+
}
226+
227+
assert.Equal(t, sc.expectedSrcNode, srcNode, "Source node mismatch: "+sc.description)
228+
assert.Equal(t, sc.expectedTargetNode, targetNode, "Target node mismatch: "+sc.description)
229+
assert.Equal(t, sc.expectedSkip, shouldSkip, "Skip decision mismatch: "+sc.description)
230+
})
231+
}
232+
}

0 commit comments

Comments
 (0)