diff --git a/lib/iris/src/iris/cluster/platform/base.py b/lib/iris/src/iris/cluster/platform/base.py index ccb94d09f5..03264d7c3d 100644 --- a/lib/iris/src/iris/cluster/platform/base.py +++ b/lib/iris/src/iris/cluster/platform/base.py @@ -30,9 +30,11 @@ import datetime import logging +import os import socket import threading import uuid +from pathlib import Path from collections.abc import Callable from contextlib import AbstractContextManager from dataclasses import dataclass, field @@ -95,9 +97,17 @@ def find_free_port(start: int = -1) -> int: s.bind(("", 0)) return s.getsockname()[1] for port in range(start, start + 1000): + lock = Path(f"/tmp/iris/port_{port}") + try: + os.kill(int(lock.read_text()), 0) + continue # port locked by a live process + except (FileNotFoundError, ValueError, ProcessLookupError, PermissionError): + pass with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: try: s.bind(("", port)) + lock.parent.mkdir(parents=True, exist_ok=True) + lock.write_text(str(os.getpid())) return port except OSError: continue diff --git a/lib/iris/src/iris/cluster/platform/coreweave.py b/lib/iris/src/iris/cluster/platform/coreweave.py index 3bd442b37b..0f81cc6b4b 100644 --- a/lib/iris/src/iris/cluster/platform/coreweave.py +++ b/lib/iris/src/iris/cluster/platform/coreweave.py @@ -1500,7 +1500,7 @@ def _coreweave_tunnel( exits (e.g. konnectivity timeout), it is relaunched automatically. """ if local_port is None: - local_port = find_free_port() + local_port = find_free_port(start=10000) proc: subprocess.Popen | None = None diff --git a/lib/iris/src/iris/cluster/platform/gcp.py b/lib/iris/src/iris/cluster/platform/gcp.py index 082d1a1caa..904b95c19b 100644 --- a/lib/iris/src/iris/cluster/platform/gcp.py +++ b/lib/iris/src/iris/cluster/platform/gcp.py @@ -1609,7 +1609,7 @@ def _gcp_tunnel( Picks a free port automatically if none is specified. """ if local_port is None: - local_port = find_free_port() + local_port = find_free_port(start=10000) labels = Labels(label_prefix) label_filter = f"labels.{labels.iris_controller}=true AND status=RUNNING"