diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index 5d3410f5be..c5b09d6f67 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -477,8 +477,17 @@ def get_upcoming_maintenance(lkp: util.Lookup) -> Dict[str, Tuple[str, datetime] upc_maint_map = {} for node, inst in lkp.instances().items(): - if inst.resource_status.upcoming_maintenance: - upc_maint_map[node + "_maintenance"] = (node, inst.resource_status.upcoming_maintenance.window_start_time) + um = inst.resource_status.upcoming_maintenance + if not um: + continue + if um.type != "SCHEDULED": + log.warning(f"Maintenance event: can not handle non-scheduled maintenance of type {um.type} for node {node=}, skipping") + continue + if not um.window_start_time: + log.error(f"Maintenance event: {node=} upcoming scheduled maintenance doesn't have start time, skipping") + continue + + upc_maint_map[node + "_maintenance"] = (node, um.window_start_time) return upc_maint_map diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py index d6b42f82e5..435cfc3fb9 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py @@ -429,20 +429,33 @@ def test_parse_gcp_timestamp(got: str, want: datetime): [ (None, None), (dict( + type="Might", windowStartTime="2025-01-15T00:00:00Z", somethingToIgnore="past failures", - ), UpcomingMaintenance(window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), + ), UpcomingMaintenance( + type="Might", + window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), (dict( + type="And", startTimeWindow=dict( earliest="2025-01-15T00:00:00Z"), somethingToIgnore="past failures", - ), UpcomingMaintenance(window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), + ), UpcomingMaintenance( + type="And", + window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), (dict( + type="Magic", windowStartTime="2025-01-15T00:00:00Z", startTimeWindow=dict( earliest="2025-01-25T00:00:00Z"), # ignored somethingToIgnore="past failures", - ), UpcomingMaintenance(window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), + ), UpcomingMaintenance( + type="Magic", + window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC))), + ( + dict(type="III"), + UpcomingMaintenance(type="III", window_start_time=None), + ), ]) def tests_parse_UpcomingMaintenance_OK(got: dict, want: Optional[UpcomingMaintenance]): assert UpcomingMaintenance.from_json(got) == want @@ -453,8 +466,8 @@ def tests_parse_UpcomingMaintenance_OK(got: dict, want: Optional[UpcomingMainten [ {}, dict( - windowStartTime=dict( - earliest="2025-01-15T00:00:00Z")), + # no type, + windowStartTime=dict(earliest="2025-01-15T00:00:00Z")), ]) def tests_parse_UpcomingMaintenance_FAIL(got: dict): with pytest.raises(ValueError): @@ -483,10 +496,13 @@ def tests_parse_UpcomingMaintenance_FAIL(got: dict): upcoming_maintenance=None)), (dict( physicalHost="/aaa/bbb/ccc", - upcomingMaintenance=dict(windowStartTime="2025-01-15T00:00:00Z")), + upcomingMaintenance=dict( + type="Lilac", + windowStartTime="2025-01-15T00:00:00Z")), InstanceResourceStatus( physical_host="/aaa/bbb/ccc", upcoming_maintenance=UpcomingMaintenance( + type="Lilac", window_start_time=datetime(2025, 1, 15, 0, 0, tzinfo=UTC)))), ]) def test_parse_InstanceResourceStatus(got: dict, want: Optional[InstanceResourceStatus]): diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 06389e2df7..bd908f215c 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -181,22 +181,24 @@ def sockets(self) -> int: @dataclass(frozen=True) class UpcomingMaintenance: - window_start_time: datetime + type: str + window_start_time: Optional[datetime] @classmethod def from_json(cls, jo: Optional[dict]) -> Optional["UpcomingMaintenance"]: if jo is None: return None try: + type = jo["type"] if "windowStartTime" in jo: ts = parse_gcp_timestamp(jo["windowStartTime"]) elif "startTimeWindow" in jo: ts = parse_gcp_timestamp(jo["startTimeWindow"]["earliest"]) else: - raise Exception("Neither windowStartTime nor startTimeWindow are found") + ts = None except BaseException as e: raise ValueError(f"Unexpected format for upcomingMaintenance: {jo}") from e - return cls(window_start_time=ts) + return cls(type=type, window_start_time=ts) @dataclass(frozen=True) class InstanceResourceStatus: