@@ -18,19 +18,21 @@ import (
1818)
1919
2020func (c * Controller ) enqueueAddVMIMigration (obj any ) {
21- key := cache .MetaObjectToName (obj .(* kubevirtv1.VirtualMachineInstanceMigration )).String ()
21+ vmiMigration := obj .(* kubevirtv1.VirtualMachineInstanceMigration )
22+ key := cache .MetaObjectToName (vmiMigration ).String ()
2223 klog .Infof ("enqueue add VMI migration %s" , key )
2324 c .addOrUpdateVMIMigrationQueue .Add (key )
2425}
2526
2627func (c * Controller ) enqueueUpdateVMIMigration (oldObj , newObj any ) {
27- oldVmi := oldObj .(* kubevirtv1.VirtualMachineInstanceMigration )
28- newVmi := newObj .(* kubevirtv1.VirtualMachineInstanceMigration )
28+ oldVmiMigration := oldObj .(* kubevirtv1.VirtualMachineInstanceMigration )
29+ newVmiMigration := newObj .(* kubevirtv1.VirtualMachineInstanceMigration )
2930
30- if ! newVmi .DeletionTimestamp .IsZero () ||
31- oldVmi .Status .Phase != newVmi .Status .Phase {
32- key := cache .MetaObjectToName (newVmi ).String ()
33- klog .Infof ("enqueue update VMI migration %s" , key )
31+ if ! newVmiMigration .DeletionTimestamp .IsZero () ||
32+ oldVmiMigration .Status .Phase != newVmiMigration .Status .Phase {
33+ key := cache .MetaObjectToName (newVmiMigration ).String ()
34+ klog .Infof ("enqueue update VMI migration %s (phase: %s -> %s)" ,
35+ key , oldVmiMigration .Status .Phase , newVmiMigration .Status .Phase )
3436 c .addOrUpdateVMIMigrationQueue .Add (key )
3537 }
3638}
@@ -105,102 +107,197 @@ func (c *Controller) handleAddOrUpdateVMIMigration(key string) error {
105107 utilruntime .HandleError (fmt .Errorf ("failed to get VMI migration by key %s: %w" , key , err ))
106108 return err
107109 }
110+
111+ migrationUID := vmiMigration .UID
112+ vmiName := vmiMigration .Spec .VMIName
113+ phase := vmiMigration .Status .Phase
114+
115+ // ====== MIGRATION LIFECYCLE MARKERS ======
116+ // Log migration lifecycle events for debugging and tracking
117+ switch phase {
118+ case kubevirtv1 .MigrationPending :
119+ klog .Infof (">>> [MIGRATION START] New migration %s (UID: %s) for VMI %s/%s - Phase: %s" ,
120+ key , migrationUID , namespace , vmiName , phase )
121+ case kubevirtv1 .MigrationSucceeded :
122+ klog .Infof ("<<< [MIGRATION END] Migration %s (UID: %s) for VMI %s/%s SUCCEEDED" ,
123+ key , migrationUID , namespace , vmiName )
124+ case kubevirtv1 .MigrationFailed :
125+ klog .Infof ("<<< [MIGRATION END] Migration %s (UID: %s) for VMI %s/%s FAILED" ,
126+ key , migrationUID , namespace , vmiName )
127+ default :
128+ klog .Infof ("--- [MIGRATION PROGRESS] Migration %s (UID: %s) for VMI %s/%s - Phase: %s" ,
129+ key , migrationUID , namespace , vmiName , phase )
130+ }
131+
132+ // Skip if migration state is not yet initialized
108133 if vmiMigration .Status .MigrationState == nil {
109- klog .V (3 ).Infof ("VirtualMachineInstanceMigration %s migration state is nil, skipping" , key )
134+ klog .V (3 ).Infof ("VirtualMachineInstanceMigration %s (UID: %s) migration state is nil, waiting for KubeVirt to initialize" ,
135+ key , migrationUID )
110136 return nil
111137 }
112138
139+ // Skip completed migrations (already processed in final phase)
113140 if vmiMigration .Status .MigrationState .Completed {
114- klog .V (3 ).Infof ("VirtualMachineInstanceMigration %s migration state is completed, skipping" , key )
141+ klog .V (3 ).Infof ("VirtualMachineInstanceMigration %s (UID: %s) migration state is already completed, skipping" ,
142+ key , migrationUID )
115143 return nil
116144 }
117145
118- vmi , err := c .config .KubevirtClient .VirtualMachineInstance (namespace ).Get (context .TODO (), vmiMigration .Spec .VMIName , metav1.GetOptions {})
146+ // Get VMI to access current migration state
147+ vmi , err := c .config .KubevirtClient .VirtualMachineInstance (namespace ).Get (context .TODO (), vmiName , metav1.GetOptions {})
119148 if err != nil {
120- utilruntime .HandleError (fmt .Errorf ("failed to get VMI by name %s: %w" , vmiMigration . Spec . VMIName , err ))
149+ utilruntime .HandleError (fmt .Errorf ("failed to get VMI by name %s: %w" , vmiName , err ))
121150 return err
122151 }
123152
124- // use VirtualMachineInstance's MigrationState because VirtualMachineInstanceMigration's MigrationState is not updated until migration finished
153+ portName := ovs .PodNameToPortName (vmiName , namespace , util .OvnProvider )
154+
155+ // ====== MIGRATION UID VALIDATION ======
156+ // CRITICAL: Only use vmi.Status.MigrationState if its MigrationUID matches the current vmiMigration.UID
157+ // This prevents using stale state from a previous migration (e.g., A→B data when doing B→A migration)
125158 var srcNodeName , targetNodeName string
159+ var migrationStateValid bool
160+
126161 if vmi .Status .MigrationState != nil {
127- klog .Infof ("current vmiMigration %s status %s, target Node %s, source Node %s, target Pod %s, source Pod %s" , key ,
128- vmiMigration .Status .Phase ,
129- vmi .Status .MigrationState .TargetNode ,
130- vmi .Status .MigrationState .SourceNode ,
131- vmi .Status .MigrationState .TargetPod ,
132- vmi .Status .MigrationState .SourcePod )
133- srcNodeName = vmi .Status .MigrationState .SourceNode
134- targetNodeName = vmi .Status .MigrationState .TargetNode
162+ vmiMigrationUID := vmi .Status .MigrationState .MigrationUID
163+ if vmiMigrationUID == migrationUID {
164+ // MigrationUID matches - this state belongs to current migration
165+ migrationStateValid = true
166+ srcNodeName = vmi .Status .MigrationState .SourceNode
167+ targetNodeName = vmi .Status .MigrationState .TargetNode
168+ klog .Infof ("Migration %s (UID: %s) - VMI MigrationState is VALID (UID match), source: %s, target: %s, targetPod: %s" ,
169+ key , migrationUID , srcNodeName , targetNodeName , vmi .Status .MigrationState .TargetPod )
170+ } else {
171+ // MigrationUID mismatch - this is STALE state from a PREVIOUS migration
172+ // Clean up any residual migrate options from the previous migration
173+ klog .Warningf ("Migration %s (UID: %s) - VMI MigrationState is STALE (UID mismatch: VMI has %s), cleaning residual migrate options" ,
174+ key , migrationUID , vmiMigrationUID )
175+ if err := c .OVNNbClient .CleanLogicalSwitchPortMigrateOptions (portName ); err != nil {
176+ klog .Errorf ("failed to clean stale migrate options for lsp %s: %v" , portName , err )
177+ return err
178+ }
179+ // Don't return here - continue processing the current migration
180+ // The current migration will set new options in MigrationScheduling phase
181+ }
135182 } else {
136- klog .Infof ("current vmiMigration %s status %s, vmi MigrationState is nil" , key , vmiMigration .Status .Phase )
183+ klog .V (3 ).Infof ("Migration %s (UID: %s) - VMI MigrationState is nil, waiting for KubeVirt to populate" ,
184+ key , migrationUID )
137185 }
138186
139- portName := ovs .PodNameToPortName (vmiMigration .Spec .VMIName , vmiMigration .Namespace , util .OvnProvider )
140- switch vmiMigration .Status .Phase {
187+ switch phase {
141188 case kubevirtv1 .MigrationScheduling :
189+ // During MigrationScheduling, get target node from Pod (more reliable than vmi.Status.MigrationState)
142190 selector , err := metav1 .LabelSelectorAsSelector (& metav1.LabelSelector {
143191 MatchLabels : map [string ]string {
144- kubevirtv1 .MigrationJobLabel : string (vmiMigration . UID ),
192+ kubevirtv1 .MigrationJobLabel : string (migrationUID ),
145193 },
146194 })
147195 if err != nil {
148- err = fmt .Errorf ("failed to create label selector for migration job UID %s: %w" , vmiMigration . UID , err )
196+ err = fmt .Errorf ("failed to create label selector for migration job UID %s: %w" , migrationUID , err )
149197 klog .Error (err )
150198 return err
151199 }
152200
153- pods , err := c .podsLister .Pods (vmiMigration . Namespace ).List (selector )
201+ pods , err := c .podsLister .Pods (namespace ).List (selector )
154202 if err != nil {
155- err = fmt .Errorf ("failed to list pods with migration job UID %s: %w" , vmiMigration . UID , err )
203+ err = fmt .Errorf ("failed to list pods with migration job UID %s: %w" , migrationUID , err )
156204 klog .Error (err )
157205 return err
158206 }
159207
160- if len (pods ) > 0 {
161- targetPod := pods [0 ]
162- // During MigrationScheduling phase, use vmi.Status.NodeName if SourceNode is empty
163- // because vmi.Status.MigrationState may not be fully synchronized yet
164- sourceNode := srcNodeName
165- if sourceNode == "" {
166- sourceNode = vmi .Status .NodeName
167- }
208+ if len (pods ) == 0 {
209+ klog .V (3 ).Infof ("Migration %s (UID: %s) - Target pod not yet created, waiting for pod creation" ,
210+ key , migrationUID )
211+ return nil
212+ }
168213
169- if sourceNode == "" || targetPod .Spec .NodeName == "" || sourceNode == targetPod .Spec .NodeName {
170- klog .Warningf ("VM pod %s/%s migration setup skipped, source node: %s, target node: %s (migration job UID: %s)" ,
171- targetPod .Namespace , targetPod .Name , sourceNode , targetPod .Spec .NodeName , vmiMigration .UID )
172- return nil
173- }
214+ targetPod := pods [0 ]
215+ targetNode := targetPod .Spec .NodeName
174216
175- klog .Infof ("VM pod %s/%s is migrating from %s to %s (migration job UID: %s)" ,
176- targetPod .Namespace , targetPod .Name , sourceNode , targetPod .Spec .NodeName , vmiMigration .UID )
217+ // Use vmi.Status.NodeName as source if MigrationState is not valid or SourceNode is empty
218+ sourceNode := srcNodeName
219+ if sourceNode == "" {
220+ sourceNode = vmi .Status .NodeName
221+ klog .V (3 ).Infof ("Migration %s (UID: %s) - Using vmi.Status.NodeName as source: %s" ,
222+ key , migrationUID , sourceNode )
223+ }
177224
178- if err := c .OVNNbClient .SetLogicalSwitchPortMigrateOptions (portName , sourceNode , targetPod .Spec .NodeName ); err != nil {
179- err = fmt .Errorf ("failed to set migrate options for VM pod lsp %s: %w" , portName , err )
180- klog .Error (err )
225+ // Validate we have both source and target
226+ if sourceNode == "" || targetNode == "" {
227+ klog .Warningf ("Migration %s (UID: %s) - Cannot set LSP options: source=%q, target=%q (waiting for nodes)" ,
228+ key , migrationUID , sourceNode , targetNode )
229+ return nil
230+ }
231+
232+ // Skip if source and target are the same (shouldn't happen in normal migration)
233+ if sourceNode == targetNode {
234+ klog .Warningf ("Migration %s (UID: %s) - Source and target are same node %s, skipping LSP setup" ,
235+ key , migrationUID , sourceNode )
236+ return nil
237+ }
238+
239+ klog .Infof (">>> [MIGRATION LSP SET] Migration %s (UID: %s) - Setting LSP %s migrate options: %s -> %s" ,
240+ key , migrationUID , portName , sourceNode , targetNode )
241+
242+ if err := c .OVNNbClient .SetLogicalSwitchPortMigrateOptions (portName , sourceNode , targetNode ); err != nil {
243+ err = fmt .Errorf ("failed to set migrate options for lsp %s: %w" , portName , err )
244+ klog .Error (err )
245+ return err
246+ }
247+
248+ klog .Infof (">>> [MIGRATION LSP SET OK] Migration %s (UID: %s) - Successfully set LSP %s migrate options" ,
249+ key , migrationUID , portName )
250+
251+ case kubevirtv1 .MigrationSucceeded :
252+ // For final phases, we need valid migration state to know source/target
253+ if ! migrationStateValid {
254+ klog .Warningf ("Migration %s (UID: %s) SUCCEEDED but VMI MigrationState is invalid/stale, cannot reset LSP options properly" ,
255+ key , migrationUID )
256+ // Try to clean up migrate options anyway using CleanLogicalSwitchPortMigrateOptions
257+ if err := c .OVNNbClient .CleanLogicalSwitchPortMigrateOptions (portName ); err != nil {
258+ klog .Errorf ("failed to clean migrate options for lsp %s: %v" , portName , err )
181259 return err
182260 }
183- klog .Infof ("successfully set migrate options for lsp %s from %s to %s" , portName , sourceNode , targetPod .Spec .NodeName )
184- } else {
185- klog .Warningf ("target pod not yet created for migration job UID %s in phase %s, waiting for pod creation" ,
186- vmiMigration .UID , vmiMigration .Status .Phase )
187261 return nil
188262 }
189- case kubevirtv1 .MigrationSucceeded :
190- klog .Infof ("migrate end reset options for lsp %s from %s to %s, migrated succeed" , portName , srcNodeName , targetNodeName )
263+
264+ klog .Infof ("<<< [MIGRATION LSP RESET] Migration %s (UID: %s) - Resetting LSP %s to target %s (migration succeeded)" ,
265+ key , migrationUID , portName , targetNodeName )
266+
191267 if err := c .OVNNbClient .ResetLogicalSwitchPortMigrateOptions (portName , srcNodeName , targetNodeName , false ); err != nil {
192- err = fmt .Errorf ("failed to clean migrate options for lsp %s, %w" , portName , err )
268+ err = fmt .Errorf ("failed to reset migrate options for lsp %s: %w" , portName , err )
193269 klog .Error (err )
194270 return err
195271 }
272+
273+ klog .Infof ("<<< [MIGRATION LSP RESET OK] Migration %s (UID: %s) - Successfully reset LSP %s" ,
274+ key , migrationUID , portName )
275+
196276 case kubevirtv1 .MigrationFailed :
197- klog .Infof ("migrate end reset options for lsp %s from %s to %s, migrated fail" , portName , srcNodeName , targetNodeName )
277+ if ! migrationStateValid {
278+ klog .Warningf ("Migration %s (UID: %s) FAILED but VMI MigrationState is invalid/stale, cannot reset LSP options properly" ,
279+ key , migrationUID )
280+ // Try to clean up migrate options anyway
281+ if err := c .OVNNbClient .CleanLogicalSwitchPortMigrateOptions (portName ); err != nil {
282+ klog .Errorf ("failed to clean migrate options for lsp %s: %v" , portName , err )
283+ return err
284+ }
285+ return nil
286+ }
287+
288+ klog .Infof ("<<< [MIGRATION LSP RESET] Migration %s (UID: %s) - Resetting LSP %s to source %s (migration failed)" ,
289+ key , migrationUID , portName , srcNodeName )
290+
198291 if err := c .OVNNbClient .ResetLogicalSwitchPortMigrateOptions (portName , srcNodeName , targetNodeName , true ); err != nil {
199- err = fmt .Errorf ("failed to clean migrate options for lsp %s, %w" , portName , err )
292+ err = fmt .Errorf ("failed to reset migrate options for lsp %s: %w" , portName , err )
200293 klog .Error (err )
201294 return err
202295 }
296+
297+ klog .Infof ("<<< [MIGRATION LSP RESET OK] Migration %s (UID: %s) - Successfully reset LSP %s" ,
298+ key , migrationUID , portName )
203299 }
300+
204301 return nil
205302}
206303
0 commit comments