ray-project
diff --git a/‎.buildkite/_images.rayci.yml‎
Lines changed: 1 addition & 0 deletions b/‎.buildkite/_images.rayci.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.buildkite/release/build.rayci.yml‎
Lines changed: 3 additions & 12 deletions b/‎.buildkite/release/build.rayci.yml‎
Lines changed: 3 additions & 12 deletions
diff --git a/‎ci/docker/ray-image.Dockerfile‎
Lines changed: 18 additions & 0 deletions b/‎ci/docker/ray-image.Dockerfile‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎ci/ray_ci/test_ray_docker_container.py‎
Lines changed: 3 additions & 0 deletions b/‎ci/ray_ci/test_ray_docker_container.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/raydepsets/configs/rayimg.depsets.yaml‎
Lines changed: 90 additions & 5 deletions b/‎ci/raydepsets/configs/rayimg.depsets.yaml‎
Lines changed: 90 additions & 5 deletions
diff --git a/‎ci/raydepsets/configs/release_compiled_graph_gpu_cu130.depsets.yaml‎
Lines changed: 0 additions & 20 deletions b/‎ci/raydepsets/configs/release_compiled_graph_gpu_cu130.depsets.yaml‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎ci/raydepsets/configs/release_gpu_cu130.depsets.yaml‎
Lines changed: 45 additions & 0 deletions b/‎ci/raydepsets/configs/release_gpu_cu130.depsets.yaml‎
Lines changed: 45 additions & 0 deletions
@@ -234,6 +234,7 @@ steps:
         - "12.6.3"
         - "12.8.1"
         - "12.9.1"
+        - "13.0.0"
     env:
       PYTHON_VERSION: "{{array.python}}"
       CUDA_VERSION: "{{array.cuda}}"
 
@@ -26,10 +26,7 @@ steps:
         - "3.13"
       cuda:
         - "12.3.2-cudnn9"
-      adjustments:
-        - with:
-            python: "3.12"
-            cuda: "13.0.0-cudnn"
+        - "13.0.0-cudnn"
     env:
       PYTHON_VERSION: "{{array.python}}"
       CUDA_VERSION: "{{array.cuda}}"
@@ -95,17 +92,14 @@ steps:
     array:
       gpu:
         - "cu12.3.2-cudnn9"
+        - "cu13.0.0-cudnn"
       python:
         # This list should be kept in sync with the list of supported Python in
         # release test suite
         - "3.10"
         - "3.11"
         - "3.12"
         - "3.13"
-      adjustments:
-        - with:
-            python: "3.12"
-            gpu: "cu13.0.0-cudnn"
     env:
       PYTHON_VERSION: "{{array.python}}"
       GPU: "{{array.gpu}}"
@@ -156,15 +150,12 @@ steps:
     array:
       gpu:
         - cu12.3.2-cudnn9
+        - cu13.0.0-cudnn
       python:
         - "3.10"
         - "3.11"
         - "3.12"
         - "3.13"
-      adjustments:
-        - with:
-            python: "3.12"
-            gpu: cu13.0.0-cudnn
 
   - name: ray-llm-anyscale-cuda-build
     label: "wanda: ray-llm-anyscale py{{array.python}} {{array.gpu}}"
 
@@ -17,6 +17,7 @@ FROM ${RAY_WHEEL_IMAGE} AS wheel-source
 FROM ${BASE_IMAGE}
 
 ARG IMAGE_TYPE=ray
+ARG PLATFORM=cpu
 ARG RAY_COMMIT=unknown-commit
 ARG RAY_VERSION=3.0.0.dev0
 
@@ -47,10 +48,27 @@ else
   RAY_EXTRAS="all"
 fi
 
+# TODO(cu130): ray[all]'s cgraph extra hard-pins cupy-cuda12x, so this install
+# always pulls the CUDA-12 build even on cu130 images (no PEP 508 marker exists
+# to select cupy by CUDA version). Until the cgraph extra can resolve cupy per
+# CUDA runtime (or cupy ships a unified package), we patch it up with the
+# uninstall/reinstall swap below. Drop that swap once this install can pick the
+# right cupy directly.
 $HOME/anaconda3/bin/pip --no-cache-dir install \
     -c /home/ray/requirements_compiled.txt \
     "${WHEEL_FILE}[${RAY_EXTRAS}]"
 
+# ray[all]'s cgraph extra hard-pins cupy-cuda12x (a CUDA-12 build), but cu130
+# images ship a CUDA-13 runtime where that build is broken. Swap it for the
+# matching CUDA-13 build. cupy-cuda12x and cupy-cuda13x both own the top-level
+# `cupy` package and cannot coexist, so this is an uninstall-then-install.
+# Scoped to IMAGE_TYPE=ray (covers ray + ray-extra); ray-llm flows through this
+# same Dockerfile but manages cupy via its own llm locks, so leave it untouched.
+if [[ "${IMAGE_TYPE}" == "ray" && "${PLATFORM}" == cu13* ]]; then
+    $HOME/anaconda3/bin/pip --no-cache-dir uninstall -y cupy-cuda12x
+    $HOME/anaconda3/bin/pip --no-cache-dir install "cupy-cuda13x==13.6.0"
+fi
+
 $HOME/anaconda3/bin/pip freeze > /home/ray/pip-freeze.txt
 
 echo "Ray version: $($HOME/anaconda3/bin/python -c 'import ray; print(ray.__version__)')"
 
@@ -469,6 +469,9 @@ def test_get_platform_tag(self) -> None:
         container = RayDockerContainer(v, "cu12.9.1-cudnn", "ray")
         assert container._get_platform_tag() == "-cu129"
 
+        container = RayDockerContainer(v, "cu13.0.0-cudnn", "ray")
+        assert container._get_platform_tag() == "-cu130"
+
     def test_should_upload(self) -> None:
         v = DEFAULT_PYTHON_TAG_VERSION
         test_cases = [
 
@@ -1,21 +1,49 @@
 build_arg_sets:
+  # Regular (non-GPU) build args: no CUDA_CODE, so depsets that template
+  # ${CUDA_CODE} cannot accidentally pull a CUDA index for a CPU Python build.
   py310:
     PYTHON_VERSION: "3.10"
     PYTHON_SHORT: "310"
   py311:
     PYTHON_VERSION: "3.11"
     PYTHON_SHORT: "311"
-    CUDA_CODE: cu128
   py312:
     PYTHON_VERSION: "3.12"
     PYTHON_SHORT: "312"
-    CUDA_CODE: cu130
   py313:
     PYTHON_VERSION: "3.13"
     PYTHON_SHORT: "313"
   py314:
     PYTHON_VERSION: "3.14"
     PYTHON_SHORT: "314"
+  py310_cu128:
+    PYTHON_VERSION: "3.10"
+    PYTHON_SHORT: "310"
+    CUDA_CODE: cu128
+  py311_cu128:
+    PYTHON_VERSION: "3.11"
+    PYTHON_SHORT: "311"
+    CUDA_CODE: cu128
+  py310_cu130:
+    PYTHON_VERSION: "3.10"
+    PYTHON_SHORT: "310"
+    CUDA_CODE: cu130
+  py311_cu130:
+    PYTHON_VERSION: "3.11"
+    PYTHON_SHORT: "311"
+    CUDA_CODE: cu130
+  py312_cu130:
+    PYTHON_VERSION: "3.12"
+    PYTHON_SHORT: "312"
+    CUDA_CODE: cu130
+  py313_cu130:
+    PYTHON_VERSION: "3.13"
+    PYTHON_SHORT: "313"
+    CUDA_CODE: cu130
+  py314_cu130:
+    PYTHON_VERSION: "3.14"
+    PYTHON_SHORT: "314"
+    CUDA_CODE: cu130
 
 depsets:
   - name: ray_img_depset_${PYTHON_SHORT}
@@ -39,6 +67,29 @@ depsets:
       - ci/raydepsets/pre_hooks/build-placeholder-wheel.sh
       - ci/raydepsets/pre_hooks/remove-compiled-headers.sh ${PYTHON_VERSION}
 
+  # cu130 variant of the core ray image deps: ray[all] pins cupy-cuda12x (a
+  # CUDA 12.x build) unconditionally, which is broken on the cu130 CUDA-13
+  # runtime. Relax it out here so the cu130 gpu base layer can pin the matching
+  # cupy-cuda13x build instead.
+  # TODO(cu130): this relax exists only because the cgraph extra can't select
+  # cupy by CUDA version (no PEP 508 marker for it) — same root cause as the
+  # cupy swap in ci/docker/ray-image.Dockerfile. Drop this relax (and the gpu
+  # base's requirements_byod_gpu_cu130.in) once cupy resolves correctly per CUDA
+  # runtime, so the cu130 chain no longer needs a special-cased core image lock.
+  - name: ray_img_cu130_${PYTHON_SHORT}
+    operation: relax
+    source_depset: ray_img_depset_${PYTHON_SHORT}
+    packages:
+      - cupy-cuda12x
+    output: python/deplocks/ray_img/ray_img_cu130_py${PYTHON_SHORT}.lock
+    # py3.10-3.13 only: cupy-cuda13x==13.6.0 has no cp314 wheel, and no cu130
+    # gpu release test / published image targets py3.14.
+    build_arg_sets:
+      - py310_cu130
+      - py311_cu130
+      - py312_cu130
+      - py313_cu130
+
   - name: ray_base_extra_testdeps_${PYTHON_SHORT}
     operation: expand
     requirements:
@@ -76,8 +127,8 @@ depsets:
       - --python-version=${PYTHON_VERSION}
       - --python-platform=linux
     build_arg_sets:
-      - py311
-      - py312
+      - py311_cu128
+      - py312_cu130
 
   - name: ray_base_extra_testdeps_gpu_${PYTHON_SHORT}
     operation: expand
@@ -96,8 +147,42 @@ depsets:
       - --python-version=${PYTHON_VERSION}
       - --python-platform=linux
     build_arg_sets:
-      - py310
+      - py310_cu128
+
+  - name: ray_base_extra_testdeps_gpu_${CUDA_CODE}_${PYTHON_SHORT}
+    operation: expand
+    requirements:
+      # cu130 gpu base: the cupy-cuda12x-relaxed core image + the matching
+      # cupy-cuda13x==13.6.0 build (requirements_byod_gpu_cu130.in) + base
+      # layers. torch is layered per-test via python_depset (which expands this
+      # base, so it inherits cupy-cuda13x). The published rayproject/ray cu130
+      # image performs the same 12x->13x swap at Docker build time, so all
+      # layers agree on cupy-cuda13x==13.6.0.
+      - release/ray_release/byod/requirements_byod_gpu_cu130.in
+      - docker/base-deps/requirements.in
+      - docker/base-extra/requirements.in
+    constraints:
+      - /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
+    depsets:
+      - ray_img_cu130_${PYTHON_SHORT}
+    output: python/deplocks/base_extra_testdeps/ray-gpu-${CUDA_CODE}-base_extra_testdeps_py${PYTHON_VERSION}.lock
+    append_flags:
+      - --index https://download.pytorch.org/whl/${CUDA_CODE}
+      - --unsafe-package ray
+      - --python-version=${PYTHON_VERSION}
+      - --python-platform=linux
+    # py3.10-3.13 only: cupy-cuda13x==13.6.0 has no cp314 wheel, and no cu130
+    # gpu release test / published image targets py3.14.
+    build_arg_sets:
+      - py310_cu130
+      - py311_cu130
+      - py312_cu130
+      - py313_cu130
 
+  # ray-ml only ships a CUDA 12.x (cu128) image and is Python 3.10 only
+  # (requirements_ml_byod_<ver>.in exists only for 3.10). The single lock below
+  # is consumed by the ray-ml base-extra-testdeps image build (byod.Dockerfile,
+  # IMAGE_TYPE=ray-ml), which reads the non-cuda-coded filename.
   - name: ray_ml_base_extra_testdeps_cuda_${PYTHON_SHORT}
     operation: expand
     requirements:
 
@@ -0,0 +1,45 @@
+build_arg_sets:
+  py310:
+    PYTHON_VERSION: "3.10"
+    PYTHON_SHORT: "310"
+  py311:
+    PYTHON_VERSION: "3.11"
+    PYTHON_SHORT: "311"
+  py312:
+    PYTHON_VERSION: "3.12"
+    PYTHON_SHORT: "312"
+  py313:
+    PYTHON_VERSION: "3.13"
+    PYTHON_SHORT: "313"
+
+depsets:
+  # Shared torch layer for cu130 GPU release tests. Expands the gpu-cu130 base
+  # image deps (which carry cupy-cuda13x==13.6.0 via the relax in
+  # rayimg.depsets.yaml) with a CUDA 13.x torch build (torch is not in ray[all],
+  # so the core ray image lacks it). Consumed via `python_depset` by
+  # compiled_graphs_GPU_cu130 and jobs_check_cuda_available (cu130 variants).
+  # Because the base lock already pins cupy-cuda13x==13.6.0, this full-closure
+  # install is idempotent with the published image's Docker-build cupy swap — no
+  # post_build_script needed.
+  - name: gpu_cu130_py${PYTHON_SHORT}
+    operation: expand
+    depsets:
+      - ray_base_extra_testdeps_gpu_cu130_${PYTHON_SHORT}
+    requirements:
+      - release/ray_release/byod/requirements_gpu_cu130.in
+    # Constrain to the gpu-cu130 base image lock so this torch layer stays a
+    # consistent superset of the image it is installed onto (e.g. cupy-cuda13x
+    # matches the base instead of floating to latest).
+    constraints:
+      - python/deplocks/base_extra_testdeps/ray-gpu-cu130-base_extra_testdeps_py${PYTHON_VERSION}.lock
+    output: release/ray_release/byod/gpu_cu130_py${PYTHON_VERSION}.lock
+    append_flags:
+      - --index https://download.pytorch.org/whl/cu130
+      - --python-version=${PYTHON_VERSION}
+      - --unsafe-package ray
+      - --python-platform=linux
+    build_arg_sets:
+      - py310
+      - py311
+      - py312
+      - py313