net, tests, stuntime: Add OVN localnet migration stuntime scenario

Anatw · Anatw · commit 76e55c4e7b1e · 2026-04-16T13:01:31.000+03:00
Implement the initial stuntime scenario to serve as a baseline for future
performance testing.

For now, the global stuntime threshold is set to a 5s placeholder. Once
we finish automating the remaining scenarios and have the baseline data
to calibrate our expectations, we’ll replace this with a more precise,
data-driven value.

Technical changes:
 - Annotate get_node_selector_dict to satisfy strict mypy checks
   (disallow_untyped_calls) for the new localnet migration fixture.
 - Stuntime scenarios require two VMs on the same node. To support this,
   anti-affinity is now configurable; it remains enabled by default to
   avoid impacting existing callers.

Signed-off-by: Anat Wax &lt;awax@redhat.com&gt;
Assisted by: Cursor
diff --git a/libs/vm/spec.py b/libs/vm/spec.py
@@ -31,6 +31,7 @@ class VMISpec:
     volumes: list[Volume] | None = None
     terminationGracePeriodSeconds: int | None = None  # noqa: N815
     affinity: Affinity | None = None
+    nodeSelector: dict[str, str] | None = None  # noqa: N815
 
 
 @dataclass
diff --git a/libs/vm/vm.py b/libs/vm/vm.py
@@ -114,6 +114,17 @@ def update_template_annotations(self, template_annotations: dict[str, str]) -> N
         }
         ResourceEditor(patches=patches).update()
 
+    def update_template_node_selector(self, node_selector: dict[str, str] | None) -> None:
+        """Update the VM template node selector.
+
+        Args:
+            node_selector: Node selector dictionary to apply to the VM template spec.
+                Set to None to clear the node selector.
+        """
+        self._spec.template.spec.nodeSelector = node_selector
+        patches = {self: {"spec": {"template": {"spec": {"nodeSelector": node_selector}}}}}
+        ResourceEditor(patches=patches).update()
+
     @property
     def template_spec(self) -> VMISpec:
         return self._spec.template.spec
diff --git a/tests/network/localnet/liblocalnet.py b/tests/network/localnet/liblocalnet.py
@@ -1,7 +1,8 @@
 import contextlib
 import logging
 import uuid
-from typing import Final, Generator
+from collections.abc import Generator
+from typing import Final
 
 from kubernetes.client import ApiException
 from kubernetes.dynamic import DynamicClient
@@ -77,6 +78,8 @@ def localnet_vm(
     networks: list[Network],
     interfaces: list[Interface],
     network_data: cloudinit.NetworkData | None = None,
+    pod_anti_affinity: bool = True,
+    node_selector: dict[str, str] | None = None,
 ) -> BaseVirtualMachine:
     """
     Create a Fedora-based Virtual Machine connected to localnet network(s).
@@ -95,6 +98,9 @@ def localnet_vm(
             Each Interface should have a name matching a Network, and additional configuration and state.
         network_data (cloudinit.NetworkData | None): Cloud-init NetworkData object containing the network
             configuration for the VM interfaces. If None, no network configuration is applied via cloud-init.
+        pod_anti_affinity (bool): When True (default), prevent this VM from being scheduled on the same node
+            as other VMs with the localnet test label.
+        node_selector (dict[str, str] | None): Optional VMI nodeSelector (e.g. pin to a worker hostname).
 
     Returns:
         BaseVirtualMachine: The configured VM object ready for creation.
@@ -136,8 +142,12 @@ def localnet_vm(
         )
         vmi_spec = add_volume_disk(vmi_spec=vmi_spec, volume=volume, disk=disk)
 
-    vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
-    vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
+    if pod_anti_affinity:
+        vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
+        vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
+
+    if node_selector is not None:
+        vmi_spec.nodeSelector = node_selector
 
     return fedora_vm(namespace=namespace, name=name, client=client, spec=spec)
 
diff --git a/tests/network/localnet/migration_stuntime/conftest.py b/tests/network/localnet/migration_stuntime/conftest.py
@@ -0,0 +1,123 @@
+import logging
+from collections.abc import Generator
+
+import pytest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.namespace import Namespace
+
+from libs.net.vmspec import lookup_iface_status_ip
+from libs.vm.spec import Interface, Multus, Network
+from libs.vm.vm import BaseVirtualMachine
+from tests.network.libs import cloudinit
+from tests.network.libs import cluster_user_defined_network as libcudn
+from tests.network.localnet.liblocalnet import (
+    GUEST_1ST_IFACE_NAME,
+    LOCALNET_OVS_BRIDGE_INTERFACE,
+    ip_addresses_from_pool,
+    libnncp,
+    localnet_vm,
+    run_vms,
+)
+from tests.network.localnet.migration_stuntime import libstuntime
+from utilities.infra import get_node_selector_dict
+
+LOGGER = logging.getLogger(__name__)
+
+
+@pytest.fixture()
+def localnet_stuntime_server_vm(
+    unprivileged_client: DynamicClient,
+    nncp_localnet_on_secondary_node_nic: libnncp.NodeNetworkConfigurationPolicy,
+    cudn_localnet_ovs_bridge: libcudn.ClusterUserDefinedNetwork,
+    namespace_localnet_1: Namespace,
+    ipv4_localnet_address_pool: Generator[str],
+    ipv6_localnet_address_pool: Generator[str],
+) -> Generator[BaseVirtualMachine]:
+    """Fedora VM on OVS localnet acting as ping server for stuntime tests."""
+    with localnet_vm(
+        namespace=namespace_localnet_1.name,
+        name="localnet-stuntime-server",
+        client=unprivileged_client,
+        networks=[
+            Network(name=LOCALNET_OVS_BRIDGE_INTERFACE, multus=Multus(networkName=cudn_localnet_ovs_bridge.name))
+        ],
+        interfaces=[Interface(name=LOCALNET_OVS_BRIDGE_INTERFACE, bridge={})],
+        network_data=cloudinit.NetworkData(
+            ethernets={
+                GUEST_1ST_IFACE_NAME: cloudinit.EthernetDevice(
+                    addresses=ip_addresses_from_pool(
+                        ipv4_pool=ipv4_localnet_address_pool,
+                        ipv6_pool=ipv6_localnet_address_pool,
+                    )
+                )
+            }
+        ),
+        pod_anti_affinity=False,
+    ) as server_vm:
+        run_vms(vms=(server_vm,))
+        yield server_vm
+
+
+@pytest.fixture()
+def localnet_stuntime_client_vm(
+    unprivileged_client: DynamicClient,
+    cudn_localnet_ovs_bridge: libcudn.ClusterUserDefinedNetwork,
+    namespace_localnet_1: Namespace,
+    ipv4_localnet_address_pool: Generator[str],
+    ipv6_localnet_address_pool: Generator[str],
+    localnet_stuntime_server_vm: BaseVirtualMachine,
+) -> Generator[BaseVirtualMachine]:
+    """Fedora VM on OVS localnet acting as ping client, initially on same node as server."""
+    server_node_name = localnet_stuntime_server_vm.vmi.node.name
+    with localnet_vm(
+        namespace=namespace_localnet_1.name,
+        name="localnet-stuntime-client",
+        client=unprivileged_client,
+        networks=[
+            Network(name=LOCALNET_OVS_BRIDGE_INTERFACE, multus=Multus(networkName=cudn_localnet_ovs_bridge.name))
+        ],
+        interfaces=[Interface(name=LOCALNET_OVS_BRIDGE_INTERFACE, bridge={})],
+        network_data=cloudinit.NetworkData(
+            ethernets={
+                GUEST_1ST_IFACE_NAME: cloudinit.EthernetDevice(
+                    addresses=ip_addresses_from_pool(
+                        ipv4_pool=ipv4_localnet_address_pool,
+                        ipv6_pool=ipv6_localnet_address_pool,
+                    )
+                )
+            }
+        ),
+        pod_anti_affinity=False,
+        node_selector=get_node_selector_dict(node_selector=server_node_name),
+    ) as client_vm:
+        run_vms(vms=(client_vm,))
+        # Clear node selector to allow migration to any node
+        client_vm.update_template_node_selector(node_selector=None)
+        yield client_vm
+
+
+@pytest.fixture()
+def stuntime_active_ping(
+    request: pytest.FixtureRequest,
+    localnet_stuntime_server_vm: BaseVirtualMachine,
+    localnet_stuntime_client_vm: BaseVirtualMachine,
+) -> Generator:
+    """Active ping from client to server for stuntime measurement.
+
+    Ping starts as a precondition before test runs.
+    Test must call stop_and_get_summary() to get results.
+    Context manager guarantees cleanup even if test fails.
+
+    Requires indirect parametrization with ip_family parameter.
+    """
+    ip_family = request.param
+    server_ip = str(
+        lookup_iface_status_ip(
+            vm=localnet_stuntime_server_vm,
+            iface_name=LOCALNET_OVS_BRIDGE_INTERFACE,
+            ip_family=ip_family,
+        )
+    )
+
+    with libstuntime.continuous_ping(source_vm=localnet_stuntime_client_vm, destination_ip=server_ip) as ping:
+        yield ping
diff --git a/tests/network/localnet/migration_stuntime/libstuntime.py b/tests/network/localnet/migration_stuntime/libstuntime.py
@@ -0,0 +1,149 @@
+"""Helpers for OVN localnet migration stuntime tests."""
+
+from __future__ import annotations
+
+import ipaddress
+import logging
+import re
+from contextlib import contextmanager
+from typing import Final, Generator
+
+from libs.vm.vm import BaseVirtualMachine
+
+LOGGER = logging.getLogger(__name__)
+
+STUNTIME_THRESHOLD_SECONDS: Final[float] = 5.0
+STUNTIME_PING_LOG_PATH: Final[str] = "/tmp/stuntime-ping.log"
+PING_INTERVAL_SECONDS: Final[float] = 0.1
+
+
+class InsufficientStuntimeDataError(ValueError):
+    """Raised when ping log has too few successful replies to compute stuntime."""
+
+
+def _get_ping_ipv6_flag(destination_ip: str) -> str:
+    """Return the ping IPv6 flag based on the IP address version."""
+    ip = ipaddress.ip_address(address=destination_ip)
+    return " -6" if ip.version == 6 else ""
+
+
+def compute_stuntime(ping_log: str) -> float:
+    """Parse ping summary output and compute stuntime from lost packets.
+
+    Uses the summary line `<tx> packets transmitted, <rx> received` and converts
+    packet loss into stuntime using ping interval.
+
+    Args:
+        ping_log: Tail output from ping including summary lines.
+
+    Returns:
+        float: Stuntime in seconds.
+
+    Raises:
+        InsufficientStuntimeDataError: When summary line with transmitted/received is missing.
+    """
+    summary_match = re.search(r"(\d+)\s+packets transmitted,\s+(\d+)\s+received", ping_log)
+    if not summary_match:
+        raise InsufficientStuntimeDataError("Insufficient data to compute stuntime (missing ping summary line)")
+
+    transmitted_packets = int(summary_match.group(1))
+    received_packets = int(summary_match.group(2))
+    lost_packets = transmitted_packets - received_packets
+    # Add +1 to account for the gap from last successful reply before loss to first successful reply after recovery
+    stuntime = 0.0 if lost_packets == 0 else (lost_packets + 1) * PING_INTERVAL_SECONDS
+    LOGGER.info(f"Ping lost={lost_packets}, stuntime={stuntime:.1f}s")
+    return stuntime
+
+
+def _verify_ping_reaches_destination(
+    source_vm: BaseVirtualMachine,
+    destination_ip: str,
+) -> None:
+    """Verify network connectivity from source VM to destination IP.
+
+    Args:
+        source_vm: The virtual machine from which to initiate the ping.
+        destination_ip: The target IP address (IPv4 or IPv6) to ping.
+    """
+    ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
+    source_vm.console(
+        commands=[f"ping{ping_ipv6_flag} -q -c 3 {destination_ip}"],
+        timeout=30,
+    )
+
+
+def _stop_continuous_ping_and_read_log(ping_source_vm: BaseVirtualMachine) -> str:
+    """Stop the continuous ping process and retrieve the summary statistics.
+
+    Args:
+        ping_source_vm: The virtual machine running the continuous ping.
+
+    Returns:
+        str: Ping summary containing packet transmission statistics.
+    """
+    cmd_pkill = "sudo sh -c 'pkill -SIGINT -x ping || true'"
+    cmd_tail = f"sudo tail -n 3 {STUNTIME_PING_LOG_PATH}"
+    lines_by_cmd = ping_source_vm.console(
+        commands=[cmd_pkill, cmd_tail],
+        timeout=120,
+    )
+    assert lines_by_cmd is not None, "Failed to stop continuous ping and read log"
+    ping_log = "\n".join(lines_by_cmd[cmd_tail])
+    LOGGER.info(f"Collected ping summary tail from {ping_source_vm.name}")
+    return ping_log
+
+
+class ContinuousPingController:
+    """Controller to manage ping lifecycle and retrieve results."""
+
+    def __init__(self, source_vm: BaseVirtualMachine):
+        self._source_vm = source_vm
+        self._ping_summary: str | None = None
+
+    def stop_and_get_summary(self) -> str:
+        """Stop the ping and retrieve the summary.
+
+        Returns:
+            str: Ping summary containing packet transmission statistics.
+        """
+        if self._ping_summary is None:
+            self._ping_summary = _stop_continuous_ping_and_read_log(ping_source_vm=self._source_vm)
+        return self._ping_summary
+
+
+@contextmanager
+def continuous_ping(
+    source_vm: BaseVirtualMachine, destination_ip: str
+) -> Generator[ContinuousPingController, None, None]:
+    """Context manager for continuous ping monitoring during VM operations.
+
+    Starts a continuous ping process on entry, and stops it on exit,
+    guaranteeing cleanup even if exceptions occur.
+
+    Args:
+        source_vm: The virtual machine from which to initiate the continuous ping.
+        destination_ip: The target IP address (IPv4 or IPv6) to ping continuously.
+
+    Yields:
+        ContinuousPingController: Controller with stop_and_get_summary() method.
+
+    Example:
+        >>> with continuous_ping(client_vm, server_ip) as ping:
+        ...     migrate_vm_and_verify(vm=client_vm)
+        ...     measured = compute_stuntime(ping_log=ping.stop_and_get_summary())
+    """
+    _verify_ping_reaches_destination(source_vm=source_vm, destination_ip=destination_ip)
+    ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
+    source_vm.console(
+        commands=[
+            f"ping{ping_ipv6_flag} -O -i {PING_INTERVAL_SECONDS} {destination_ip} >{STUNTIME_PING_LOG_PATH} 2>&1 &",
+        ],
+        timeout=10,
+    )
+    LOGGER.info(f"Started continuous ping from {source_vm.name} to {destination_ip} (log {STUNTIME_PING_LOG_PATH})")
+
+    controller = ContinuousPingController(source_vm=source_vm)
+    try:
+        yield controller
+    finally:
+        controller.stop_and_get_summary()
diff --git a/tests/network/localnet/migration_stuntime/test_migration_stuntime.py b/tests/network/localnet/migration_stuntime/test_migration_stuntime.py
diff --git a/utilities/infra.py b/utilities/infra.py