|
18 | 18 |
|
19 | 19 | from iris.cluster.providers.gcp.controller import GcpControllerProvider |
20 | 20 | from iris.cluster.providers.gcp.fake import InMemoryGcpService |
21 | | -from iris.cluster.providers.gcp.handles import GcpVmSliceHandle, _build_gce_resource_name |
| 21 | +from iris.cluster.providers.gcp.handles import GcpSliceHandle, GcpVmSliceHandle, _build_gce_resource_name |
22 | 22 | from iris.cluster.providers.remote_exec import DirectSshRemoteExec, GceRemoteExec, GcloudRemoteExec |
23 | 23 | from iris.cluster.providers.gcp.workers import ( |
24 | 24 | GcpWorkerProvider, |
| 25 | + RESERVED_TPU_CLOUD_READY_TIMEOUT, |
| 26 | + _run_tpu_bootstrap, |
25 | 27 | _run_vm_slice_bootstrap, |
26 | 28 | _validate_slice_config, |
27 | 29 | ) |
@@ -929,6 +931,49 @@ def test_gcp_tpu_slice_os_login_prefers_external_ip_for_direct_ssh(): |
929 | 931 | assert status.workers[0]._remote_exec.host == "34.1.2.3" |
930 | 932 |
|
931 | 933 |
|
| 934 | +# ============================================================================= |
| 935 | +# Section 6: TPU Slice Bootstrap Tests |
| 936 | +# ============================================================================= |
| 937 | + |
| 938 | + |
| 939 | +class _ImmediateDeadline: |
| 940 | + def expired(self) -> bool: |
| 941 | + return True |
| 942 | + |
| 943 | + |
| 944 | +def test_reserved_tpu_bootstrap_uses_extended_cloud_timeout(): |
| 945 | + """Reserved TPU bootstrap uses the longer queued-resource timeout.""" |
| 946 | + gcp_service = InMemoryGcpService(mode=ServiceMode.DRY_RUN, project_id="test-project") |
| 947 | + handle = GcpSliceHandle( |
| 948 | + _slice_id="test-reserved-tpu", |
| 949 | + _zone="us-central2-b", |
| 950 | + _project_id="test-project", |
| 951 | + _labels={}, |
| 952 | + _created_at=Timestamp.now(), |
| 953 | + _label_prefix="iris", |
| 954 | + _accelerator_variant="v4-32", |
| 955 | + _gcp_service=gcp_service, |
| 956 | + _ssh_config=config_pb2.SshConfig(), |
| 957 | + _bootstrapping=True, |
| 958 | + _is_queued_resource=True, |
| 959 | + ) |
| 960 | + worker_config = config_pb2.WorkerConfig(port=10001) |
| 961 | + seen_deadlines = [] |
| 962 | + |
| 963 | + def _fake_deadline_from_now(duration): |
| 964 | + seen_deadlines.append(duration.to_seconds()) |
| 965 | + return _ImmediateDeadline() |
| 966 | + |
| 967 | + with unittest.mock.patch( |
| 968 | + "iris.cluster.providers.gcp.workers.Deadline.from_now", |
| 969 | + side_effect=_fake_deadline_from_now, |
| 970 | + ): |
| 971 | + with pytest.raises(InfraError, match=rf"within {RESERVED_TPU_CLOUD_READY_TIMEOUT}s"): |
| 972 | + _run_tpu_bootstrap(gcp_service, "test-project", handle, worker_config) |
| 973 | + |
| 974 | + assert seen_deadlines == [RESERVED_TPU_CLOUD_READY_TIMEOUT] |
| 975 | + |
| 976 | + |
932 | 977 | # ============================================================================= |
933 | 978 | # Section 6: VM Slice Bootstrap Tests |
934 | 979 | # |
|
0 commit comments