Skip to content

Commit 50800dd

Browse files
authored
[VIRT] Fix descheduler tests (RedHatQE#1987)
* Fix for Descheduler test Fix for descheduler test * check boot time instead of ping process It saves more than 15 minutes of the test run * change profile to KubeVirtRelieveAndMigrate change profile from DevKubeVirtRelieveAndMigrate to KubeVirtRelieveAndMigrate * update according to comments update wait_for_migration_finished function to use namespace use cache for is_jira_67515_open
1 parent 466d48c commit 50800dd

12 files changed

Lines changed: 85 additions & 90 deletions

File tree

tests/scale/test_scale_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ def test_mass_vm_live_migration(
449449
for batch in scale_vms:
450450
for vm in batch:
451451
wait_for_migration_finished(
452-
vm=vm,
452+
namespace=vm.namespace,
453453
migration=vm_migration_info[vm.name][MIGRATION_INSTANCE_STR],
454454
)
455455
verify_vm_migrated(

tests/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,9 @@ def hotplug_instance_type_vm_and_verify(vm, client, instance_type):
183183

184184
def verify_hotplug(vm, client, sockets=None, memory_guest=None):
185185
vmim = get_created_migration_job(vm=vm, client=client)
186-
wait_for_migration_finished(vm=vm, migration=vmim, timeout=TIMEOUT_30MIN if "windows" in vm.name else TIMEOUT_10MIN)
186+
wait_for_migration_finished(
187+
namespace=vm.namespace, migration=vmim, timeout=TIMEOUT_30MIN if "windows" in vm.name else TIMEOUT_10MIN
188+
)
187189
wait_for_ssh_connectivity(vm=vm)
188190
vmi_spec_domain = vm.vmi.instance.spec.domain
189191
if sockets:

tests/virt/node/descheduler/conftest.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,33 @@
55
from ocp_resources.deployment import Deployment
66
from ocp_resources.pod_disruption_budget import PodDisruptionBudget
77
from ocp_resources.resource import Resource, ResourceEditor
8+
from ocp_resources.virtual_machine_instance_migration import VirtualMachineInstanceMigration
89
from ocp_utilities.infra import get_pods_by_name_prefix
910

1011
from tests.virt.node.descheduler.constants import (
1112
DESCHEDULER_LABEL_KEY,
1213
DESCHEDULER_LABEL_VALUE,
1314
DESCHEDULER_TEST_LABEL,
14-
RUNNING_PING_PROCESS_NAME_IN_VM,
1515
)
1616
from tests.virt.node.descheduler.utils import (
1717
calculate_vm_deployment,
1818
create_kube_descheduler,
1919
deploy_vms,
20-
start_vms_with_process,
2120
vm_nodes,
2221
vms_per_nodes,
2322
wait_vmi_failover,
2423
)
25-
from tests.virt.utils import build_node_affinity_dict, get_non_terminated_pods, start_stress_on_vm
26-
from utilities.constants import TIMEOUT_5SEC
24+
from tests.virt.utils import (
25+
build_node_affinity_dict,
26+
get_boot_time_for_multiple_vms,
27+
get_non_terminated_pods,
28+
start_stress_on_vm,
29+
)
30+
from utilities.constants import TIMEOUT_5MIN, TIMEOUT_5SEC
2731
from utilities.infra import wait_for_pods_deletion
2832
from utilities.virt import (
2933
node_mgmt_console,
34+
wait_for_migration_finished,
3035
wait_for_node_schedulable_status,
3136
)
3237

@@ -57,7 +62,7 @@ def descheduler_kubevirt_relieve_and_migrate_profile(
5762
):
5863
with create_kube_descheduler(
5964
admin_client=admin_client,
60-
profiles=["DevKubeVirtRelieveAndMigrate"],
65+
profiles=["KubeVirtRelieveAndMigrate"],
6166
profile_customizations={
6267
"devActualUtilizationProfile": "PrometheusCPUCombined",
6368
},
@@ -124,14 +129,10 @@ def vms_orig_nodes_before_node_drain(deployed_vms_for_descheduler_test):
124129

125130

126131
@pytest.fixture(scope="class")
127-
def vms_started_process_for_node_drain(
132+
def vms_boot_time_before_node_drain(
128133
deployed_vms_for_descheduler_test,
129134
):
130-
return start_vms_with_process(
131-
vms=deployed_vms_for_descheduler_test,
132-
process_name=RUNNING_PING_PROCESS_NAME_IN_VM,
133-
args=LOCALHOST,
134-
)
135+
yield get_boot_time_for_multiple_vms(vm_list=deployed_vms_for_descheduler_test)
135136

136137

137138
@pytest.fixture(scope="class")
@@ -161,6 +162,13 @@ def drain_uncordon_node(
161162
wait_vmi_failover(vm=vm, orig_node=vms_orig_nodes_before_node_drain[vm.name])
162163

163164

165+
@pytest.fixture()
166+
def all_existing_migrations_completed(admin_client, namespace):
167+
# Descheduler may trigger multiple migrations, need to wait when all succeeded
168+
for migration in VirtualMachineInstanceMigration.get(dyn_client=admin_client, namespace=namespace):
169+
wait_for_migration_finished(namespace=namespace.name, migration=migration, timeout=TIMEOUT_5MIN)
170+
171+
164172
@pytest.fixture(scope="class")
165173
def node_with_min_memory_labeled_for_descheduler_test(node_with_least_available_memory):
166174
with ResourceEditor(patches={node_with_least_available_memory: {"metadata": {"labels": DESCHEDULER_TEST_LABEL}}}):
@@ -237,14 +245,10 @@ def deployed_vms_on_labeled_node(
237245

238246

239247
@pytest.fixture(scope="class")
240-
def vms_started_process_for_utilization_imbalance(
248+
def vms_boot_time_before_utilization_imbalance(
241249
deployed_vms_for_utilization_imbalance,
242250
):
243-
return start_vms_with_process(
244-
vms=deployed_vms_for_utilization_imbalance,
245-
process_name=RUNNING_PING_PROCESS_NAME_IN_VM,
246-
args=LOCALHOST,
247-
)
251+
yield get_boot_time_for_multiple_vms(vm_list=deployed_vms_for_utilization_imbalance)
248252

249253

250254
@pytest.fixture(scope="class")
@@ -317,6 +321,7 @@ def node_to_run_stress(schedulable_nodes, deployed_vms_for_descheduler_test):
317321
vm_per_node_counters = vms_per_nodes(vms=vm_nodes(vms=deployed_vms_for_descheduler_test))
318322
for node in schedulable_nodes:
319323
if vm_per_node_counters[node.name] > 0:
324+
LOGGER.info(f"Node to run stress: {node.name}")
320325
return node
321326

322327
raise ValueError("No suitable node to run stress")

tests/virt/node/descheduler/test_descheduler.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
from tests.virt.node.descheduler.constants import DESCHEDULER_TEST_LABEL
77
from tests.virt.node.descheduler.utils import (
8-
assert_running_process_after_failover,
98
assert_vms_consistent_virt_launcher_pods,
109
assert_vms_distribution_after_failover,
1110
verify_at_least_one_vm_migrated,
1211
)
12+
from tests.virt.utils import verify_linux_boot_time
1313

1414
LOGGER = logging.getLogger(__name__)
1515

@@ -39,7 +39,7 @@ def test_descheduler_evicts_vm_after_drain_uncordon(
3939
self,
4040
schedulable_nodes,
4141
deployed_vms_for_descheduler_test,
42-
vms_started_process_for_node_drain,
42+
vms_boot_time_before_node_drain,
4343
drain_uncordon_node,
4444
):
4545
assert_vms_distribution_after_failover(
@@ -55,20 +55,21 @@ def test_descheduler_evicts_vm_after_drain_uncordon(
5555
def test_no_migrations_storm(
5656
self,
5757
deployed_vms_for_descheduler_test,
58+
all_existing_migrations_completed,
5859
):
5960
LOGGER.info(NO_MIGRATION_STORM_ASSERT_MESSAGE)
6061
assert_vms_consistent_virt_launcher_pods(running_vms=deployed_vms_for_descheduler_test)
6162

6263
@pytest.mark.dependency(depends=[f"{TESTS_CLASS_NAME}::test_no_migrations_storm"])
6364
@pytest.mark.polarion("CNV-8288")
64-
def test_running_process_after_migrations_complete(
65+
def test_boot_time_after_migrations_complete(
6566
self,
6667
deployed_vms_for_descheduler_test,
67-
vms_started_process_for_node_drain,
68+
vms_boot_time_before_node_drain,
6869
):
69-
assert_running_process_after_failover(
70-
vms_list=deployed_vms_for_descheduler_test,
71-
process_dict=vms_started_process_for_node_drain,
70+
verify_linux_boot_time(
71+
vm_list=deployed_vms_for_descheduler_test,
72+
initial_boot_time=vms_boot_time_before_node_drain,
7273
)
7374

7475

@@ -95,7 +96,7 @@ def test_descheduler_evicts_vm_from_utilization_imbalance(
9596
node_with_least_available_memory,
9697
node_with_min_memory_labeled_for_descheduler_test,
9798
deployed_vms_for_utilization_imbalance,
98-
vms_started_process_for_utilization_imbalance,
99+
vms_boot_time_before_utilization_imbalance,
99100
utilization_imbalance,
100101
node_with_max_memory_labeled_for_descheduler_test,
101102
):
@@ -111,20 +112,21 @@ def test_descheduler_evicts_vm_from_utilization_imbalance(
111112
def test_no_migrations_storm(
112113
self,
113114
deployed_vms_for_utilization_imbalance,
115+
all_existing_migrations_completed,
114116
):
115117
LOGGER.info(NO_MIGRATION_STORM_ASSERT_MESSAGE)
116118
assert_vms_consistent_virt_launcher_pods(running_vms=deployed_vms_for_utilization_imbalance)
117119

118120
@pytest.mark.dependency(depends=[f"{TESTS_CLASS_NAME}::test_no_migrations_storm"])
119121
@pytest.mark.polarion("CNV-8919")
120-
def test_running_process_after_migrations_complete(
122+
def test_boot_time_after_migrations_complete(
121123
self,
122124
deployed_vms_for_utilization_imbalance,
123-
vms_started_process_for_utilization_imbalance,
125+
vms_boot_time_before_utilization_imbalance,
124126
):
125-
assert_running_process_after_failover(
126-
vms_list=deployed_vms_for_utilization_imbalance,
127-
process_dict=vms_started_process_for_utilization_imbalance,
127+
verify_linux_boot_time(
128+
vm_list=deployed_vms_for_utilization_imbalance,
129+
initial_boot_time=vms_boot_time_before_utilization_imbalance,
128130
)
129131

130132

tests/virt/node/descheduler/test_descheduler_psi_metrics.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pytest
44

55
from tests.virt.node.descheduler.utils import verify_at_least_one_vm_migrated, wait_for_overutilized_soft_taint
6+
from utilities.constants import TIMEOUT_15MIN
67

78
LOGGER = logging.getLogger(__name__)
89

@@ -45,5 +46,6 @@ def test_rebalancing_when_node_overloaded(
4546
def test_soft_taint_removed_when_node_not_overloaded(
4647
self,
4748
node_to_run_stress,
49+
all_existing_migrations_completed,
4850
):
49-
wait_for_overutilized_soft_taint(node=node_to_run_stress, taint_expected=False)
51+
wait_for_overutilized_soft_taint(node=node_to_run_stress, taint_expected=False, wait_timeout=TIMEOUT_15MIN)

tests/virt/node/descheduler/utils.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44

55
from ocp_resources.deployment import Deployment
66
from ocp_resources.kube_descheduler import KubeDescheduler
7+
from ocp_resources.resource import ResourceEditor
78
from ocp_resources.virtual_machine import VirtualMachine
89
from timeout_sampler import TimeoutExpiredError, TimeoutSampler
910

1011
from tests.virt.node.descheduler.constants import (
1112
DESCHEDULER_DEPLOYMENT_NAME,
1213
DESCHEDULER_SOFT_TAINT_KEY,
1314
DESCHEDULING_INTERVAL_120SEC,
14-
RUNNING_PING_PROCESS_NAME_IN_VM,
1515
)
16+
from tests.virt.utils import is_jira_67515_open
1617
from utilities.constants import (
1718
TIMEOUT_1MIN,
1819
TIMEOUT_5MIN,
@@ -25,9 +26,7 @@
2526
from utilities.virt import (
2627
VirtualMachineForTests,
2728
fedora_vm_body,
28-
fetch_pid_from_linux_vm,
2929
running_vm,
30-
start_and_fetch_processid_on_linux_vm,
3130
)
3231

3332
LOGGER = logging.getLogger(__name__)
@@ -108,23 +107,6 @@ def wait_vmi_failover(vm, orig_node):
108107
raise
109108

110109

111-
def assert_running_process_after_failover(vms_list, process_dict):
112-
LOGGER.info(f"Verify {RUNNING_PING_PROCESS_NAME_IN_VM} is running after migrations.")
113-
failed_vms = []
114-
for vm in vms_list:
115-
vm_name = vm.name
116-
new_pid = None
117-
try:
118-
new_pid = fetch_pid_from_linux_vm(vm=vm, process_name=RUNNING_PING_PROCESS_NAME_IN_VM)
119-
except (ValueError, AssertionError):
120-
failed_vms.append(vm_name)
121-
continue
122-
if new_pid != process_dict[vm_name]:
123-
failed_vms.append(vm_name)
124-
125-
assert not failed_vms, f"The following VMs process ID has changed after migration: {failed_vms}"
126-
127-
128110
def assert_vms_distribution_after_failover(vms, nodes, all_nodes=True):
129111
def _get_vms_per_nodes():
130112
return vms_per_nodes(vms=vm_nodes(vms=vms))
@@ -212,16 +194,6 @@ def _vms_launcher_pod_names():
212194
LOGGER.info("No VMs were migrated.")
213195

214196

215-
def start_vms_with_process(vms, process_name, args):
216-
vms_process_id_dict = {}
217-
for vm in vms:
218-
vms_process_id_dict[vm.name] = start_and_fetch_processid_on_linux_vm(
219-
vm=vm, process_name=process_name, args=args
220-
)
221-
222-
return vms_process_id_dict
223-
224-
225197
def deploy_vms(
226198
vm_prefix,
227199
client,
@@ -261,7 +233,9 @@ def deploy_vms(
261233
vm.delete()
262234

263235
for vm in vms:
264-
vm.wait_deleted()
236+
# Due to the bug - VM may hang in terminating state, need to remove the finalizer from VMI
237+
if not vm.wait_deleted() and is_jira_67515_open():
238+
ResourceEditor(patches={vm.vmi: {"metadata": {"finalizers": []}}}).update()
265239

266240

267241
def verify_at_least_one_vm_migrated(vms, node_before):
@@ -296,10 +270,10 @@ def create_kube_descheduler(admin_client, profiles, profile_customizations):
296270
yield kd
297271

298272

299-
def wait_for_overutilized_soft_taint(node, taint_expected):
273+
def wait_for_overutilized_soft_taint(node, taint_expected, wait_timeout=TIMEOUT_10MIN):
300274
taint_key = f"{DESCHEDULER_SOFT_TAINT_KEY}/overutilized"
301275
sampler = TimeoutSampler(
302-
wait_timeout=TIMEOUT_10MIN,
276+
wait_timeout=wait_timeout,
303277
sleep=TIMEOUT_5SEC,
304278
func=lambda: any(taint_key in taint.values() for taint in node.instance.spec.taints),
305279
)

tests/virt/node/log_verbosity/test_log_virt_launcher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from tests.virt.node.log_verbosity.constants import (
88
VIRT_LOG_VERBOSITY_LEVEL_6,
99
)
10+
from tests.virt.utils import is_jira_67515_open
1011
from utilities.constants import MIGRATION_POLICY_VM_LABEL, TIMEOUT_1MIN, TIMEOUT_5SEC
11-
from utilities.infra import is_jira_open
1212
from utilities.virt import (
1313
VirtualMachineForTests,
1414
fedora_vm_body,
@@ -82,7 +82,7 @@ def vm_for_migration_progress_test(
8282
running_vm(vm=vm)
8383
yield vm
8484
# Due to the bug - migration job should be removed before stopping the VM
85-
if is_jira_open(jira_id="CNV-67515"):
85+
if is_jira_67515_open():
8686
clean_up_migration_jobs(client=admin_client, vm=vm)
8787

8888

tests/virt/upgrade/conftest.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
vm_from_template,
1818
wait_for_automatic_vm_migrations,
1919
)
20+
from tests.virt.utils import get_boot_time_for_multiple_vms
2021
from utilities.constants import (
2122
ES_LIVE_MIGRATE_IF_POSSIBLE,
2223
OS_FLAVOR_RHEL,
@@ -311,11 +312,8 @@ def _vm_is_migrateable(vm):
311312

312313

313314
@pytest.fixture(scope="session")
314-
def linux_boot_time_before_upgrade(virt_migratable_vms):
315-
boot_time_dict = {}
316-
for vm in virt_migratable_vms:
317-
boot_time_dict[vm.name] = get_vm_boot_time(vm=vm)
318-
yield boot_time_dict
315+
def linux_boot_time_before_upgrade(vms_for_upgrade):
316+
return get_boot_time_for_multiple_vms(vm_list=vms_for_upgrade)
319317

320318

321319
@pytest.fixture(scope="session")

tests/virt/upgrade/test_upgrade_virt.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,11 @@
1616
)
1717
from tests.virt.upgrade.utils import (
1818
mismatching_src_pvc_names,
19-
verify_linux_boot_time,
2019
verify_run_strategy_vmi_status,
2120
verify_vms_ssh_connectivity,
2221
verify_windows_boot_time,
2322
)
24-
from tests.virt.utils import assert_migration_post_copy_mode
23+
from tests.virt.utils import assert_migration_post_copy_mode, verify_linux_boot_time
2524
from utilities.constants import DATA_SOURCE_NAME, DEPENDENCY_SCOPE_SESSION
2625
from utilities.exceptions import ResourceValueError
2726
from utilities.virt import migrate_vm_and_verify, vm_console_run_commands

tests/virt/upgrade/utils.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,6 @@ def verify_run_strategy_vmi_status(run_strategy_vmi_list):
164164
return run_strategy_vmi_list
165165

166166

167-
def verify_linux_boot_time(vm_list, initial_boot_time):
168-
rebooted_vms = {}
169-
for vm in vm_list:
170-
current_boot_time = get_vm_boot_time(vm=vm)
171-
if initial_boot_time[vm.name] != current_boot_time:
172-
rebooted_vms[vm.name] = {"initial": initial_boot_time[vm.name], "current": current_boot_time}
173-
assert not rebooted_vms, f"Boot time changed for VMs:\n {rebooted_vms}"
174-
175-
176167
def verify_windows_boot_time(windows_vm, initial_boot_time):
177168
current_boot_time = get_vm_boot_time(vm=windows_vm)
178169
assert initial_boot_time == current_boot_time, (

0 commit comments

Comments
 (0)