Skip to content

Commit 5e240b3

Browse files
committed
net, tests, stuntime: Add OVN localnet migration stuntime scenario
Implement the initial stuntime scenario to serve as a baseline for future performance testing. For now, the global stuntime threshold is set to a 5s placeholder. Once we finish automating the remaining scenarios and have the baseline data to calibrate our expectations, we’ll replace this with a more precise, data-driven value. Technical changes: - Annotate get_node_selector_dict to satisfy strict mypy checks (disallow_untyped_calls) for the new localnet migration fixture. - Stuntime scenarios require two VMs on the same node. To support this, anti-affinity is now configurable; it remains enabled by default to avoid impacting existing callers. Signed-off-by: Anat Wax <awax@redhat.com> Assisted by: Cursor
1 parent 9ec6aeb commit 5e240b3

File tree

7 files changed

+280
-11
lines changed

7 files changed

+280
-11
lines changed

libs/vm/spec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class VMISpec:
3131
volumes: list[Volume] | None = None
3232
terminationGracePeriodSeconds: int | None = None # noqa: N815
3333
affinity: Affinity | None = None
34+
nodeSelector: dict[str, str] | None = None # noqa: N815
3435

3536

3637
@dataclass

libs/vm/vm.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,16 @@ def update_template_annotations(self, template_annotations: dict[str, str]) -> N
114114
}
115115
ResourceEditor(patches=patches).update()
116116

117+
def update_template_node_selector(self, node_selector: dict[str, str] | None) -> None:
118+
"""Update the VM template node selector.
119+
120+
Args:
121+
node_selector: Node selector dictionary to apply to the VM template spec.
122+
Set to None to clear the node selector.
123+
"""
124+
patches = {self: {"spec": {"template": {"spec": {"nodeSelector": node_selector}}}}}
125+
ResourceEditor(patches=patches).update()
126+
117127
@property
118128
def template_spec(self) -> VMISpec:
119129
return self._spec.template.spec

tests/network/localnet/liblocalnet.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import contextlib
22
import logging
33
import uuid
4-
from typing import Final, Generator
4+
from collections.abc import Generator
5+
from typing import Final
56

67
from kubernetes.client import ApiException
78
from kubernetes.dynamic import DynamicClient
@@ -77,6 +78,8 @@ def localnet_vm(
7778
networks: list[Network],
7879
interfaces: list[Interface],
7980
network_data: cloudinit.NetworkData | None = None,
81+
pod_anti_affinity: bool = True,
82+
node_selector: dict[str, str] | None = None,
8083
) -> BaseVirtualMachine:
8184
"""
8285
Create a Fedora-based Virtual Machine connected to localnet network(s).
@@ -95,6 +98,8 @@ def localnet_vm(
9598
Each Interface should have a name matching a Network, and additional configuration and state.
9699
network_data (cloudinit.NetworkData | None): Cloud-init NetworkData object containing the network
97100
configuration for the VM interfaces. If None, no network configuration is applied via cloud-init.
101+
pod_anti_affinity (bool): When True (default), apply localnet pod anti-affinity like other localnet VMs.
102+
node_selector (dict[str, str] | None): Optional VMI nodeSelector (e.g. pin to a worker hostname).
98103
99104
Returns:
100105
BaseVirtualMachine: The configured VM object ready for creation.
@@ -136,8 +141,14 @@ def localnet_vm(
136141
)
137142
vmi_spec = add_volume_disk(vmi_spec=vmi_spec, volume=volume, disk=disk)
138143

139-
vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
140-
vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
144+
if pod_anti_affinity:
145+
vmi_spec.affinity = new_pod_anti_affinity(label=next(iter(LOCALNET_TEST_LABEL.items())))
146+
vmi_spec.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution[0].namespaceSelector = {}
147+
else:
148+
vmi_spec.affinity = None
149+
150+
if node_selector is not None:
151+
vmi_spec.nodeSelector = node_selector
141152

142153
return fedora_vm(namespace=namespace, name=name, client=client, spec=spec)
143154

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import logging
2+
from collections.abc import Generator
3+
4+
import pytest
5+
from kubernetes.dynamic import DynamicClient
6+
from ocp_resources.namespace import Namespace
7+
8+
from libs.vm.spec import Interface, Multus, Network
9+
from libs.vm.vm import BaseVirtualMachine
10+
from tests.network.libs import cloudinit
11+
from tests.network.libs import cluster_user_defined_network as libcudn
12+
from tests.network.localnet.liblocalnet import (
13+
GUEST_1ST_IFACE_NAME,
14+
LOCALNET_OVS_BRIDGE_INTERFACE,
15+
ip_addresses_from_pool,
16+
libnncp,
17+
localnet_vm,
18+
run_vms,
19+
)
20+
from utilities.infra import get_node_selector_dict
21+
22+
LOGGER = logging.getLogger(__name__)
23+
24+
25+
def _create_localnet_stuntime_vm(
26+
namespace: str,
27+
name: str,
28+
client: DynamicClient,
29+
cudn_name: str,
30+
ipv4_pool: Generator[str],
31+
ipv6_pool: Generator[str],
32+
node_selector: dict[str, str] | None = None,
33+
) -> BaseVirtualMachine:
34+
"""Create a localnet VM for stuntime testing with optional node selector."""
35+
return localnet_vm(
36+
namespace=namespace,
37+
name=name,
38+
client=client,
39+
networks=[Network(name=LOCALNET_OVS_BRIDGE_INTERFACE, multus=Multus(networkName=cudn_name))],
40+
interfaces=[Interface(name=LOCALNET_OVS_BRIDGE_INTERFACE, bridge={})],
41+
network_data=cloudinit.NetworkData(
42+
ethernets={
43+
GUEST_1ST_IFACE_NAME: cloudinit.EthernetDevice(
44+
addresses=ip_addresses_from_pool(
45+
ipv4_pool=ipv4_pool,
46+
ipv6_pool=ipv6_pool,
47+
)
48+
)
49+
}
50+
),
51+
pod_anti_affinity=False,
52+
node_selector=node_selector,
53+
)
54+
55+
56+
@pytest.fixture()
57+
def ovs_bridge_localnet_same_node_running_server_client(
58+
unprivileged_client: DynamicClient,
59+
nncp_localnet_on_secondary_node_nic: libnncp.NodeNetworkConfigurationPolicy,
60+
cudn_localnet_ovs_bridge: libcudn.ClusterUserDefinedNetwork,
61+
namespace_localnet_1: Namespace,
62+
ipv4_localnet_address_pool: Generator[str],
63+
ipv6_localnet_address_pool: Generator[str],
64+
) -> Generator[tuple[BaseVirtualMachine, BaseVirtualMachine]]:
65+
"""
66+
Server and client Fedora VMs on OVS localnet, both scheduled on the same worker node.
67+
"""
68+
with _create_localnet_stuntime_vm(
69+
namespace=namespace_localnet_1.name,
70+
name="localnet-stuntime-server",
71+
client=unprivileged_client,
72+
cudn_name=cudn_localnet_ovs_bridge.name,
73+
ipv4_pool=ipv4_localnet_address_pool,
74+
ipv6_pool=ipv6_localnet_address_pool,
75+
) as server_vm:
76+
run_vms(vms=(server_vm,))
77+
server_node_name = server_vm.vmi.node.name
78+
with _create_localnet_stuntime_vm(
79+
namespace=namespace_localnet_1.name,
80+
name="localnet-stuntime-client",
81+
client=unprivileged_client,
82+
cudn_name=cudn_localnet_ovs_bridge.name,
83+
ipv4_pool=ipv4_localnet_address_pool,
84+
ipv6_pool=ipv6_localnet_address_pool,
85+
node_selector=get_node_selector_dict(node_selector=server_node_name),
86+
) as client_vm:
87+
run_vms(vms=(client_vm,))
88+
yield server_vm, client_vm
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""Helpers for OVN localnet migration stuntime tests."""
2+
3+
from __future__ import annotations
4+
5+
import ipaddress
6+
import logging
7+
import re
8+
from typing import Final
9+
10+
from libs.vm.vm import BaseVirtualMachine
11+
12+
LOGGER = logging.getLogger(__name__)
13+
14+
GLOBAL_STUNTIME_THRESHOLD_SECONDS: Final[float] = 5.0
15+
STUNTIME_PING_LOG_PATH: Final[str] = "/tmp/stuntime-ping.log"
16+
PING_INTERVAL_SECONDS: Final[float] = 0.1
17+
18+
19+
class InsufficientStuntimeDataError(ValueError):
20+
"""Raised when ping log has too few successful replies to compute stuntime."""
21+
22+
23+
def _get_ping_ipv6_flag(destination_ip: str) -> str:
24+
"""Return the ping IPv6 flag based on the IP address version."""
25+
ip = ipaddress.ip_address(address=destination_ip)
26+
return " -6" if ip.version == 6 else ""
27+
28+
29+
def compute_stuntime(ping_log: str) -> float:
30+
"""Parse ping summary output and compute stuntime from lost packets.
31+
32+
Uses the summary line `<tx> packets transmitted, <rx> received` and converts
33+
packet loss into stuntime using ping interval.
34+
35+
Args:
36+
ping_log: Tail output from ping including summary lines.
37+
38+
Returns:
39+
float: Stuntime in seconds.
40+
41+
Raises:
42+
InsufficientStuntimeDataError: When summary line with transmitted/received is missing.
43+
"""
44+
summary_match = re.search(r"(\d+)\s+packets transmitted,\s+(\d+)\s+received", ping_log)
45+
if not summary_match:
46+
raise InsufficientStuntimeDataError("Insufficient data to compute stuntime (missing ping summary line)")
47+
48+
transmitted_packets = int(summary_match.group(1))
49+
received_packets = int(summary_match.group(2))
50+
lost_packets = transmitted_packets - received_packets
51+
stuntime = lost_packets * PING_INTERVAL_SECONDS
52+
LOGGER.info(f"Ping lost={lost_packets}, stuntime={stuntime:.1f}s")
53+
return stuntime
54+
55+
56+
def clear_vm_template_node_selector(vm: BaseVirtualMachine) -> None:
57+
"""Clear the VM template node selector to allow migration to any node.
58+
59+
Args:
60+
vm: The virtual machine to update.
61+
"""
62+
vm.update_template_node_selector(node_selector=None)
63+
64+
65+
def verify_ping_reaches_destination(
66+
source_vm: BaseVirtualMachine,
67+
destination_ip: str,
68+
) -> None:
69+
"""Verify network connectivity from source VM to destination IP.
70+
71+
Args:
72+
source_vm: The virtual machine from which to initiate the ping.
73+
destination_ip: The target IP address (IPv4 or IPv6) to ping.
74+
"""
75+
ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
76+
source_vm.console(
77+
commands=[f"ping{ping_ipv6_flag} -q -c 3 {destination_ip}"],
78+
timeout=30,
79+
)
80+
81+
82+
def start_continuous_ping(
83+
source_vm: BaseVirtualMachine,
84+
destination_ip: str,
85+
) -> None:
86+
"""Start a continuous background ping for stuntime measurement.
87+
88+
Args:
89+
source_vm: The virtual machine from which to initiate the continuous ping.
90+
destination_ip: The target IP address (IPv4 or IPv6) to ping continuously.
91+
"""
92+
verify_ping_reaches_destination(source_vm=source_vm, destination_ip=destination_ip)
93+
ping_ipv6_flag = _get_ping_ipv6_flag(destination_ip=destination_ip)
94+
source_vm.console(
95+
commands=[
96+
f"ping{ping_ipv6_flag} -O -i {PING_INTERVAL_SECONDS} {destination_ip} >{STUNTIME_PING_LOG_PATH} 2>&1 &",
97+
],
98+
timeout=10,
99+
)
100+
LOGGER.info(f"Started timestamped ping from {source_vm.name} to {destination_ip} (log {STUNTIME_PING_LOG_PATH})")
101+
102+
103+
def stop_continuous_ping_and_read_log(ping_source_vm: BaseVirtualMachine) -> str:
104+
"""Stop the continuous ping process and retrieve the summary statistics.
105+
106+
Args:
107+
ping_source_vm: The virtual machine running the continuous ping.
108+
109+
Returns:
110+
str: Ping summary containing packet transmission statistics.
111+
"""
112+
cmd_pkill = "sudo sh -c 'pkill -SIGINT -x ping || true'"
113+
cmd_tail = f"sudo tail -n 3 {STUNTIME_PING_LOG_PATH}"
114+
lines_by_cmd = ping_source_vm.console(
115+
commands=[cmd_pkill, cmd_tail],
116+
timeout=120,
117+
)
118+
assert lines_by_cmd is not None, "Failed to stop continuous ping and read log"
119+
ping_log = "\n".join(lines_by_cmd[cmd_tail])
120+
LOGGER.info(f"Collected ping summary tail from {ping_source_vm.name}")
121+
return ping_log

tests/network/localnet/migration_stuntime/test_migration_stuntime.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,23 @@
2020

2121
import pytest
2222

23-
__test__ = False
23+
from libs.net.vmspec import lookup_iface_status_ip
24+
from tests.network.localnet.liblocalnet import LOCALNET_OVS_BRIDGE_INTERFACE
25+
from tests.network.localnet.migration_stuntime.lib_helpers import (
26+
GLOBAL_STUNTIME_THRESHOLD_SECONDS,
27+
clear_vm_template_node_selector,
28+
compute_stuntime,
29+
start_continuous_ping,
30+
stop_continuous_ping_and_read_log,
31+
)
32+
from utilities.virt import migrate_vm_and_verify
33+
34+
MIGRATION_STUNTIME_IP_FAMILY_PARAMS = (
35+
pytest.param(4, id="ipv4", marks=pytest.mark.ipv4),
36+
pytest.param(6, id="ipv6", marks=pytest.mark.ipv6),
37+
)
2438

2539
"""
26-
Parametrize:
27-
- ip_family:
28-
- ipv4 [Markers: ipv4]
29-
- ipv6 [Markers: ipv6]
30-
3140
Preconditions:
3241
- Shared under-test server VM on OVN localnet secondary network, for the IP family from ip_family parametrization.
3342
- Shared under-test client VM on OVN localnet secondary network, for that same IP family,
@@ -37,8 +46,13 @@
3746

3847
@pytest.mark.incremental
3948
class TestMigrationStuntime:
49+
@pytest.mark.parametrize("ip_family", MIGRATION_STUNTIME_IP_FAMILY_PARAMS)
4050
@pytest.mark.polarion("CNV-15258")
41-
def test_client_migrates_off_server_node(self):
51+
def test_client_migrates_off_server_node(
52+
self,
53+
ovs_bridge_localnet_same_node_running_server_client,
54+
ip_family: int,
55+
):
4256
"""
4357
Test that measured stuntime does not exceed the global threshold when the client
4458
VM migrates from the node hosting the server VM into a different node.
@@ -58,6 +72,24 @@ def test_client_migrates_off_server_node(self):
5872
Expected:
5973
- Measured stuntime does not exceed the global threshold.
6074
"""
75+
server_vm, client_vm = ovs_bridge_localnet_same_node_running_server_client
76+
server_ip = str(
77+
lookup_iface_status_ip(
78+
vm=server_vm,
79+
iface_name=LOCALNET_OVS_BRIDGE_INTERFACE,
80+
ip_family=ip_family,
81+
)
82+
)
83+
clear_vm_template_node_selector(vm=client_vm)
84+
start_continuous_ping(
85+
source_vm=client_vm,
86+
destination_ip=server_ip,
87+
)
88+
migrate_vm_and_verify(vm=client_vm)
89+
measured = compute_stuntime(ping_log=stop_continuous_ping_and_read_log(ping_source_vm=client_vm))
90+
assert measured <= GLOBAL_STUNTIME_THRESHOLD_SECONDS, (
91+
f"IPv{ip_family} stuntime {measured}s exceeds global threshold ({GLOBAL_STUNTIME_THRESHOLD_SECONDS}s)"
92+
)
6193

6294
@pytest.mark.polarion("CNV-15259")
6395
def test_client_migrates_between_non_server_nodes(self):
@@ -168,3 +200,9 @@ def test_server_migrates_to_client_node(self):
168200
Expected:
169201
- Measured stuntime does not exceed the global threshold.
170202
"""
203+
204+
test_client_migrates_between_non_server_nodes.__test__ = False
205+
test_client_migrates_to_server_node.__test__ = False
206+
test_server_migrates_off_client_node.__test__ = False
207+
test_server_migrates_between_non_client_nodes.__test__ = False
208+
test_server_migrates_to_client_node.__test__ = False

utilities/infra.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1142,7 +1142,7 @@ def get_node_selector_name(node_selector):
11421142
return node_selector[f"{Resource.ApiGroup.KUBERNETES_IO}/hostname"]
11431143

11441144

1145-
def get_node_selector_dict(node_selector):
1145+
def get_node_selector_dict(node_selector: str) -> dict[str, str]:
11461146
return {f"{Resource.ApiGroup.KUBERNETES_IO}/hostname": node_selector}
11471147

11481148

0 commit comments

Comments
 (0)