Skip to content

Commit 76e55c4

Browse files
committed
net, tests, stuntime: Add OVN localnet migration stuntime scenario
Implement the initial stuntime scenario to serve as a baseline for future performance testing. For now, the global stuntime threshold is set to a 5s placeholder. Once we finish automating the remaining scenarios and have the baseline data to calibrate our expectations, we’ll replace this with a more precise, data-driven value. Technical changes: - Annotate get_node_selector_dict to satisfy strict mypy checks (disallow_untyped_calls) for the new localnet migration fixture. - Stuntime scenarios require two VMs on the same node. To support this, anti-affinity is now configurable; it remains enabled by default to avoid impacting existing callers. Signed-off-by: Anat Wax <awax@redhat.com> Assisted by: Cursor
1 parent 9ec6aeb commit 76e55c4

File tree

7 files changed

+342
-14
lines changed

7 files changed

+342
-14
lines changed

libs/vm/spec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class VMISpec:
3131
volumes: list[Volume] | None = None
3232
terminationGracePeriodSeconds: int | None = None # noqa: N815
3333
affinity: Affinity | None = None
34+
nodeSelector: dict[str, str] | None = None # noqa: N815
3435

3536

3637
@dataclass

libs/vm/vm.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,17 @@ def update_template_annotations(self, template_annotations: dict[str, str]) -> N
114114
}
115115
ResourceEditor(patches=patches).update()
116116

117+
def update_template_node_selector(self, node_selector: dict[str, str] | None) -> None:
118+
"""Update the VM template node selector.
119+
120+
Args:
121+
node_selector: Node selector dictionary to apply to the VM template spec.
122+
Set to None to clear the node selector.
123+
"""
124+
self._spec.template.spec.nodeSelector = node_selector
125+
patches = {self: {"spec": {"template": {"spec": {"nodeSelector": node_selector}}}}}
126+
ResourceEditor(patches=patches).update()
127+
117128
@property
118129
def template_spec(self) -> VMISpec:
119130
return self._spec.template.spec

tests/network/localnet/liblocalnet.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import contextlib
22
import logging
33
import uuid
4-
from typing import Final, Generator
4+
from collections.abc import Generator
5+
from typing import Final
56

67
from kubernetes.client import ApiException
78
from kubernetes.dynamic import DynamicClient
@@ -77,6 +78,8 @@ def localnet_vm(
7778
networks: list[Network],
7879
interfaces: list[Interface],
7980
network_data: cloudinit.NetworkData | None = None,
81+
pod_anti_affinity: bool = True,
82+
node_selector: dict[str, str] | None = None,
8083
) -> BaseVirtualMachine:
8184
"""
8285
Create a Fedora-based Virtual Machine connected to localnet network(s).
@@ -95,6 +98,9 @@ def localnet_vm(
9598
Each Interface should have a name matching a Network, and additional configuration and state.
9699
network_data (cloudinit.NetworkData | None): Cloud-init NetworkData object containing the network
97100
configuration for the VM interfaces. If None, no network configuration is applied via cloud-init.
101+
pod_anti_affinity (bool): When True (default), prevent this VM from being scheduled on the same node
102+
as other VMs with the localnet test label.
103+
node_selector (dict[str, str] | None): Optional VMI nodeSelector (e.g. pin to a worker hostname).
98104
99105
Returns:
100106
BaseVirtualMachine: The configured VM object ready for creation.
@@ -136,8 +142,12 @@ def localnet_vm(
136142
)
137143
vmi_spec = add_volume_disk(vmi_spec=vmi_spec, volume=volume, disk=disk)
138144

139-
vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
140-
vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
145+
if pod_anti_affinity:
146+
vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
147+
vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
148+
149+
if node_selector is not None:
150+
vmi_spec.nodeSelector = node_selector
141151

142152
return fedora_vm(namespace=namespace, name=name, client=client, spec=spec)
143153

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import logging
2+
from collections.abc import Generator
3+
4+
import pytest
5+
from kubernetes.dynamic import DynamicClient
6+
from ocp_resources.namespace import Namespace
7+
8+
from libs.net.vmspec import lookup_iface_status_ip
9+
from libs.vm.spec import Interface, Multus, Network
10+
from libs.vm.vm import BaseVirtualMachine
11+
from tests.network.libs import cloudinit
12+
from tests.network.libs import cluster_user_defined_network as libcudn
13+
from tests.network.localnet.liblocalnet import (
14+
GUEST_1ST_IFACE_NAME,
15+
LOCALNET_OVS_BRIDGE_INTERFACE,
16+
ip_addresses_from_pool,
17+
libnncp,
18+
localnet_vm,
19+
run_vms,
20+
)
21+
from tests.network.localnet.migration_stuntime import libstuntime
22+
from utilities.infra import get_node_selector_dict
23+
24+
LOGGER = logging.getLogger(__name__)
25+
26+
27+
@pytest.fixture()
28+
def localnet_stuntime_server_vm(
29+
unprivileged_client: DynamicClient,
30+
nncp_localnet_on_secondary_node_nic: libnncp.NodeNetworkConfigurationPolicy,
31+
cudn_localnet_ovs_bridge: libcudn.ClusterUserDefinedNetwork,
32+
namespace_localnet_1: Namespace,
33+
ipv4_localnet_address_pool: Generator[str],
34+
ipv6_localnet_address_pool: Generator[str],
35+
) -> Generator[BaseVirtualMachine]:
36+
"""Fedora VM on OVS localnet acting as ping server for stuntime tests."""
37+
with localnet_vm(
38+
namespace=namespace_localnet_1.name,
39+
name="localnet-stuntime-server",
40+
client=unprivileged_client,
41+
networks=[
42+
Network(name=LOCALNET_OVS_BRIDGE_INTERFACE, multus=Multus(networkName=cudn_localnet_ovs_bridge.name))
43+
],
44+
interfaces=[Interface(name=LOCALNET_OVS_BRIDGE_INTERFACE, bridge={})],
45+
network_data=cloudinit.NetworkData(
46+
ethernets={
47+
GUEST_1ST_IFACE_NAME: cloudinit.EthernetDevice(
48+
addresses=ip_addresses_from_pool(
49+
ipv4_pool=ipv4_localnet_address_pool,
50+
ipv6_pool=ipv6_localnet_address_pool,
51+
)
52+
)
53+
}
54+
),
55+
pod_anti_affinity=False,
56+
) as server_vm:
57+
run_vms(vms=(server_vm,))
58+
yield server_vm
59+
60+
61+
@pytest.fixture()
62+
def localnet_stuntime_client_vm(
63+
unprivileged_client: DynamicClient,
64+
cudn_localnet_ovs_bridge: libcudn.ClusterUserDefinedNetwork,
65+
namespace_localnet_1: Namespace,
66+
ipv4_localnet_address_pool: Generator[str],
67+
ipv6_localnet_address_pool: Generator[str],
68+
localnet_stuntime_server_vm: BaseVirtualMachine,
69+
) -> Generator[BaseVirtualMachine]:
70+
"""Fedora VM on OVS localnet acting as ping client, initially on same node as server."""
71+
server_node_name = localnet_stuntime_server_vm.vmi.node.name
72+
with localnet_vm(
73+
namespace=namespace_localnet_1.name,
74+
name="localnet-stuntime-client",
75+
client=unprivileged_client,
76+
networks=[
77+
Network(name=LOCALNET_OVS_BRIDGE_INTERFACE, multus=Multus(networkName=cudn_localnet_ovs_bridge.name))
78+
],
79+
interfaces=[Interface(name=LOCALNET_OVS_BRIDGE_INTERFACE, bridge={})],
80+
network_data=cloudinit.NetworkData(
81+
ethernets={
82+
GUEST_1ST_IFACE_NAME: cloudinit.EthernetDevice(
83+
addresses=ip_addresses_from_pool(
84+
ipv4_pool=ipv4_localnet_address_pool,
85+
ipv6_pool=ipv6_localnet_address_pool,
86+
)
87+
)
88+
}
89+
),
90+
pod_anti_affinity=False,
91+
node_selector=get_node_selector_dict(node_selector=server_node_name),
92+
) as client_vm:
93+
run_vms(vms=(client_vm,))
94+
# Clear node selector to allow migration to any node
95+
client_vm.update_template_node_selector(node_selector=None)
96+
yield client_vm
97+
98+
99+
@pytest.fixture()
100+
def stuntime_active_ping(
101+
request: pytest.FixtureRequest,
102+
localnet_stuntime_server_vm: BaseVirtualMachine,
103+
localnet_stuntime_client_vm: BaseVirtualMachine,
104+
) -> Generator:
105+
"""Active ping from client to server for stuntime measurement.
106+
107+
Ping starts as a precondition before test runs.
108+
Test must call stop_and_get_summary() to get results.
109+
Context manager guarantees cleanup even if test fails.
110+
111+
Requires indirect parametrization with ip_family parameter.
112+
"""
113+
ip_family = request.param
114+
server_ip = str(
115+
lookup_iface_status_ip(
116+
vm=localnet_stuntime_server_vm,
117+
iface_name=LOCALNET_OVS_BRIDGE_INTERFACE,
118+
ip_family=ip_family,
119+
)
120+
)
121+
122+
with libstuntime.continuous_ping(source_vm=localnet_stuntime_client_vm, destination_ip=server_ip) as ping:
123+
yield ping
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Helpers for OVN localnet migration stuntime tests."""
2+
3+
from __future__ import annotations
4+
5+
import ipaddress
6+
import logging
7+
import re
8+
from contextlib import contextmanager
9+
from typing import Final, Generator
10+
11+
from libs.vm.vm import BaseVirtualMachine
12+
13+
LOGGER = logging.getLogger(__name__)
14+
15+
STUNTIME_THRESHOLD_SECONDS: Final[float] = 5.0
16+
STUNTIME_PING_LOG_PATH: Final[str] = "/tmp/stuntime-ping.log"
17+
PING_INTERVAL_SECONDS: Final[float] = 0.1
18+
19+
20+
class InsufficientStuntimeDataError(ValueError):
21+
"""Raised when ping log has too few successful replies to compute stuntime."""
22+
23+
24+
def _get_ping_ipv6_flag(destination_ip: str) -> str:
25+
"""Return the ping IPv6 flag based on the IP address version."""
26+
ip = ipaddress.ip_address(address=destination_ip)
27+
return " -6" if ip.version == 6 else ""
28+
29+
30+
def compute_stuntime(ping_log: str) -> float:
31+
"""Parse ping summary output and compute stuntime from lost packets.
32+
33+
Uses the summary line `<tx> packets transmitted, <rx> received` and converts
34+
packet loss into stuntime using ping interval.
35+
36+
Args:
37+
ping_log: Tail output from ping including summary lines.
38+
39+
Returns:
40+
float: Stuntime in seconds.
41+
42+
Raises:
43+
InsufficientStuntimeDataError: When summary line with transmitted/received is missing.
44+
"""
45+
summary_match = re.search(r"(\d+)\s+packets transmitted,\s+(\d+)\s+received", ping_log)
46+
if not summary_match:
47+
raise InsufficientStuntimeDataError("Insufficient data to compute stuntime (missing ping summary line)")
48+
49+
transmitted_packets = int(summary_match.group(1))
50+
received_packets = int(summary_match.group(2))
51+
lost_packets = transmitted_packets - received_packets
52+
# Add +1 to account for the gap from last successful reply before loss to first successful reply after recovery
53+
stuntime = 0.0 if lost_packets == 0 else (lost_packets + 1) * PING_INTERVAL_SECONDS
54+
LOGGER.info(f"Ping lost={lost_packets}, stuntime={stuntime:.1f}s")
55+
return stuntime
56+
57+
58+
def _verify_ping_reaches_destination(
59+
source_vm: BaseVirtualMachine,
60+
destination_ip: str,
61+
) -> None:
62+
"""Verify network connectivity from source VM to destination IP.
63+
64+
Args:
65+
source_vm: The virtual machine from which to initiate the ping.
66+
destination_ip: The target IP address (IPv4 or IPv6) to ping.
67+
"""
68+
ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
69+
source_vm.console(
70+
commands=[f"ping{ping_ipv6_flag} -q -c 3 {destination_ip}"],
71+
timeout=30,
72+
)
73+
74+
75+
def _stop_continuous_ping_and_read_log(ping_source_vm: BaseVirtualMachine) -> str:
76+
"""Stop the continuous ping process and retrieve the summary statistics.
77+
78+
Args:
79+
ping_source_vm: The virtual machine running the continuous ping.
80+
81+
Returns:
82+
str: Ping summary containing packet transmission statistics.
83+
"""
84+
cmd_pkill = "sudo sh -c 'pkill -SIGINT -x ping || true'"
85+
cmd_tail = f"sudo tail -n 3 {STUNTIME_PING_LOG_PATH}"
86+
lines_by_cmd = ping_source_vm.console(
87+
commands=[cmd_pkill, cmd_tail],
88+
timeout=120,
89+
)
90+
assert lines_by_cmd is not None, "Failed to stop continuous ping and read log"
91+
ping_log = "\n".join(lines_by_cmd[cmd_tail])
92+
LOGGER.info(f"Collected ping summary tail from {ping_source_vm.name}")
93+
return ping_log
94+
95+
96+
class ContinuousPingController:
97+
"""Controller to manage ping lifecycle and retrieve results."""
98+
99+
def __init__(self, source_vm: BaseVirtualMachine):
100+
self._source_vm = source_vm
101+
self._ping_summary: str | None = None
102+
103+
def stop_and_get_summary(self) -> str:
104+
"""Stop the ping and retrieve the summary.
105+
106+
Returns:
107+
str: Ping summary containing packet transmission statistics.
108+
"""
109+
if self._ping_summary is None:
110+
self._ping_summary = _stop_continuous_ping_and_read_log(ping_source_vm=self._source_vm)
111+
return self._ping_summary
112+
113+
114+
@contextmanager
115+
def continuous_ping(
116+
source_vm: BaseVirtualMachine, destination_ip: str
117+
) -> Generator[ContinuousPingController, None, None]:
118+
"""Context manager for continuous ping monitoring during VM operations.
119+
120+
Starts a continuous ping process on entry, and stops it on exit,
121+
guaranteeing cleanup even if exceptions occur.
122+
123+
Args:
124+
source_vm: The virtual machine from which to initiate the continuous ping.
125+
destination_ip: The target IP address (IPv4 or IPv6) to ping continuously.
126+
127+
Yields:
128+
ContinuousPingController: Controller with stop_and_get_summary() method.
129+
130+
Example:
131+
>>> with continuous_ping(client_vm, server_ip) as ping:
132+
... migrate_vm_and_verify(vm=client_vm)
133+
... measured = compute_stuntime(ping_log=ping.stop_and_get_summary())
134+
"""
135+
_verify_ping_reaches_destination(source_vm=source_vm, destination_ip=destination_ip)
136+
ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
137+
source_vm.console(
138+
commands=[
139+
f"ping{ping_ipv6_flag} -O -i {PING_INTERVAL_SECONDS} {destination_ip} >{STUNTIME_PING_LOG_PATH} 2>&1 &",
140+
],
141+
timeout=10,
142+
)
143+
LOGGER.info(f"Started continuous ping from {source_vm.name} to {destination_ip} (log {STUNTIME_PING_LOG_PATH})")
144+
145+
controller = ContinuousPingController(source_vm=source_vm)
146+
try:
147+
yield controller
148+
finally:
149+
controller.stop_and_get_summary()

0 commit comments

Comments
 (0)