From 629452ea050e2f57e2243172828e385b2a48a08f Mon Sep 17 00:00:00 2001
From: Evelynn-V <liwenlin0223l@gmail.com>
Date: Tue, 3 Feb 2026 15:34:47 +0800
Subject: [PATCH 1/5] add register_collective_backend api for customized
 collective libs

Signed-off-by: Evelynn-V <liwenlin0223l@gmail.com>
---
 python/ray/util/collective/__init__.py        |  6 ++
 .../ray/util/collective/backend_registry.py   | 47 +++++++++++++++
 python/ray/util/collective/collective.py      | 60 ++++++++++---------
 .../collective_group/base_collective_group.py |  6 ++
 .../collective_group/nccl_collective_group.py | 27 ++++++++-
 .../torch_gloo_collective_group.py            | 11 +++-
 .../examples/gloo_allreduce_example.py        | 55 +++++++++++++++++
 7 files changed, 181 insertions(+), 31 deletions(-)
 create mode 100644 python/ray/util/collective/backend_registry.py
 create mode 100644 python/ray/util/collective/examples/gloo_allreduce_example.py

diff --git a/python/ray/util/collective/__init__.py b/python/ray/util/collective/__init__.py
index 09423ad37c11..68bf95031031 100644
--- a/python/ray/util/collective/__init__.py
+++ b/python/ray/util/collective/__init__.py
@@ -1,3 +1,7 @@
+from ray.util.collective.backend_registry import (
+    get_backend_registry,
+    register_collective_backend,
+)
 from ray.util.collective.collective import (
     allgather,
     allgather_multigpu,
@@ -50,4 +54,6 @@
     "recv",
     "recv_multigpu",
     "get_group_handle",
+    "get_backend_registry",
+    "register_collective_backend",
 ]
diff --git a/python/ray/util/collective/backend_registry.py b/python/ray/util/collective/backend_registry.py
new file mode 100644
index 000000000000..7c8e44f11a14
--- /dev/null
+++ b/python/ray/util/collective/backend_registry.py
@@ -0,0 +1,47 @@
+from typing import Dict, Type
+
+from .collective_group.base_collective_group import BaseGroup
+
+
+class BackendRegistry:
+    _instance = None
+    _map: Dict[str, Type[BaseGroup]] = {}
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(BackendRegistry, cls).__new__(cls)
+        return cls._instance
+
+    def put(self, name: str, group_cls: Type[BaseGroup]) -> None:
+        if not issubclass(group_cls, BaseGroup):
+            raise TypeError(f"{group_cls} is not a subclass of BaseGroup")
+        if name.upper() in self._map:
+            raise ValueError(f"Backend {name} already registered")
+        self._map[name.upper()] = group_cls
+
+    def get(self, name: str) -> Type[BaseGroup]:
+        name = name.upper()
+        if name not in self._map:
+            raise ValueError(f"Backend {name} not registered")
+        return self._map[name]
+
+    def check(self, name: str) -> bool:
+        try:
+            cls = self.get(name)
+            return cls.check_backend_availability()
+        except (ValueError, AttributeError):
+            return False
+
+    def list_backends(self) -> list:
+        return list(self._map.keys())
+
+
+_global_registry = BackendRegistry()
+
+
+def register_collective_backend(name: str, group_cls: Type[BaseGroup]) -> None:
+    _global_registry.put(name, group_cls)
+
+
+def get_backend_registry() -> BackendRegistry:
+    return _global_registry
diff --git a/python/ray/util/collective/collective.py b/python/ray/util/collective/collective.py
index 8803da0219eb..cc2cbed0a00e 100644
--- a/python/ray/util/collective/collective.py
+++ b/python/ray/util/collective/collective.py
@@ -13,6 +13,10 @@
 import ray.experimental.internal_kv as _internal_kv
 from . import types
 from ray._common.network_utils import find_free_port, is_ipv6
+from ray.util.collective.backend_registry import (
+    get_backend_registry,
+    register_collective_backend,
+)
 from ray.util.collective.collective_group.torch_gloo_collective_group import (
     get_master_address_metadata_key as _get_master_addr_key,
 )
@@ -38,6 +42,11 @@
 except ImportError:
     _TORCH_DISTRIBUTED_AVAILABLE = False
 
+if _NCCL_AVAILABLE:
+    register_collective_backend("NCCL", NCCLGroup)
+if _TORCH_DISTRIBUTED_AVAILABLE:
+    register_collective_backend("GLOO", TorchGLOOGroup)
+
 
 def nccl_available():
     global _LOG_NCCL_WARNING
@@ -57,10 +66,6 @@ def gloo_available():
     return _TORCH_DISTRIBUTED_AVAILABLE
 
 
-def torch_distributed_available():
-    return _TORCH_DISTRIBUTED_AVAILABLE
-
-
 def get_address_and_port() -> Tuple[str, int]:
     """Returns the IP address and a free port on this node."""
     addr = ray.util.get_node_ip_address()
@@ -78,18 +83,25 @@ class GroupManager(object):
 
     def __init__(self):
         self._name_group_map = {}
+        self._registry = get_backend_registry()
 
     def create_collective_group(
-        self, backend, world_size, rank, group_name, gloo_timeout
+        self, backend, world_size, rank, group_name, gloo_timeout=None
     ):
         """The entry to create new collective groups in the manager.
 
         Put the registration and the group information into the manager
         metadata as well.
         """
-        backend = types.Backend(backend)
-        if backend == types.Backend.GLOO:
-            # Rendezvous: ensure a MASTER_ADDR:MASTER_PORT is published in internal_kv.
+        backend = backend.upper()
+        backend_cls = self._registry.get(backend)
+
+        if not backend_cls.check_backend_availability():
+            raise RuntimeError(
+                f"Backend {backend} is not available. Please check the installation."
+            )
+
+        if backend == "GLOO":
             metadata_key = _get_master_addr_key(group_name)
             if rank == 0:
                 addr, port = get_address_and_port()
@@ -112,13 +124,9 @@ def create_collective_group(
             logger.debug(
                 "Creating torch.distributed GLOO group: '{}'...".format(group_name)
             )
-            g = TorchGLOOGroup(world_size, rank, group_name, gloo_timeout)
-        elif backend == types.Backend.NCCL:
-            _check_backend_availability(backend)
-            logger.debug("Creating NCCL group: '{}'...".format(group_name))
-            g = NCCLGroup(world_size, rank, group_name)
+            g = backend_cls(world_size, rank, group_name, gloo_timeout)
         else:
-            raise RuntimeError(f"Unexpected backend: {backend}")
+            g = backend_cls(world_size, rank, group_name)
 
         self._name_group_map[group_name] = g
         return self._name_group_map[group_name]
@@ -188,10 +196,15 @@ def init_collective_group(
     """
     _check_inside_actor()
     backend = types.Backend(backend)
-    _check_backend_availability(backend)
+
     global _group_mgr
     global _group_mgr_lock
 
+    backend_cls = _group_mgr._registry.get(backend)
+    if backend_cls is None:
+        raise ValueError("Backend '{}' is not supported.".format(backend))
+    if not backend_cls.check_backend_availability():
+        raise RuntimeError("Backend '{}' is not available.".format(backend))
     # TODO(Hao): implement a group auto-counter.
     if not group_name:
         raise ValueError("group_name '{}' needs to be a string.".format(group_name))
@@ -231,7 +244,11 @@ def create_collective_group(
         None
     """
     backend = types.Backend(backend)
-    _check_backend_availability(backend)
+    backend_cls = _group_mgr._registry.get(backend)
+    if backend_cls is None:
+        raise ValueError("Backend '{}' is not supported.".format(backend))
+    if not backend_cls.check_backend_availability():
+        raise RuntimeError("Backend '{}' is not available.".format(backend))
 
     name = "info_" + group_name
     try:
@@ -805,17 +822,6 @@ def _check_single_tensor_input(tensor):
     )
 
 
-def _check_backend_availability(backend: types.Backend):
-    """Check whether the backend is available."""
-    if backend == types.Backend.GLOO:
-        # Now we have deprecated pygloo, and use torch_gloo in all cases.
-        if not torch_distributed_available():
-            raise RuntimeError("torch.distributed is not available.")
-    elif backend == types.Backend.NCCL:
-        if not nccl_available():
-            raise RuntimeError("NCCL is not available.")
-
-
 def _check_inside_actor():
     """Check if currently it is inside a Ray actor/task."""
     worker = ray._private.worker.global_worker
diff --git a/python/ray/util/collective/collective_group/base_collective_group.py b/python/ray/util/collective/collective_group/base_collective_group.py
index eff07fb16c67..0ce3911efbb1 100644
--- a/python/ray/util/collective/collective_group/base_collective_group.py
+++ b/python/ray/util/collective/collective_group/base_collective_group.py
@@ -50,6 +50,12 @@ def backend(cls):
         """The backend of this collective group."""
         raise NotImplementedError()
 
+    @classmethod
+    @abstractmethod
+    def check_backend_availability(cls) -> bool:
+        """Check if the backend is available."""
+        raise NotImplementedError()
+
     @abstractmethod
     def allreduce(self, tensor, allreduce_options=AllReduceOptions()):
         raise NotImplementedError()
diff --git a/python/ray/util/collective/collective_group/nccl_collective_group.py b/python/ray/util/collective/collective_group/nccl_collective_group.py
index 07e3da29686a..2c5987d79389 100644
--- a/python/ray/util/collective/collective_group/nccl_collective_group.py
+++ b/python/ray/util/collective/collective_group/nccl_collective_group.py
@@ -2,9 +2,6 @@
 import logging
 import time
 
-import cupy
-import torch
-
 import ray
 from ray.util.collective.collective_group import nccl_util
 from ray.util.collective.collective_group.base_collective_group import BaseGroup
@@ -25,6 +22,18 @@
 
 logger = logging.getLogger(__name__)
 
+global _LOG_NCCL_WARNING, _NCCL_AVAILABLE
+
+try:
+    import cupy
+    import torch
+
+    _NCCL_AVAILABLE = True
+    _LOG_NCCL_WARNING = False
+except ImportError:
+    _NCCL_AVAILABLE = False
+    _LOG_NCCL_WARNING = True
+
 
 class Rendezvous:
     """A rendezvous class for different actor/task processes to meet.
@@ -165,6 +174,18 @@ def destroy_group(self):
     def backend(cls):
         return Backend.NCCL
 
+    @classmethod
+    def check_backend_availability(cls) -> bool:
+        global _LOG_NCCL_WARNING, _NCCL_AVAILABLE
+        if ray.get_gpu_ids() and _LOG_NCCL_WARNING:
+            logger.warning(
+                "NCCL seems unavailable. Please install Cupy "
+                "following the guide at: "
+                "https://docs.cupy.dev/en/stable/install.html."
+            )
+            _LOG_NCCL_WARNING = False
+        return _NCCL_AVAILABLE
+
     def allreduce(self, tensors, allreduce_options=AllReduceOptions()):
         """AllReduce tensors across the collective group following options.
 
diff --git a/python/ray/util/collective/collective_group/torch_gloo_collective_group.py b/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
index cf06728739c3..96068108ac3e 100644
--- a/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
+++ b/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import torch
-import torch.distributed as dist
 
 import ray.experimental.internal_kv as internal_kv
 from ray.util.collective.collective_group.base_collective_group import BaseGroup
@@ -23,6 +22,12 @@
 if TYPE_CHECKING:
     import torch
 
+try:
+    import torch.distributed as dist
+
+    _TORCH_DISTRIBUTED_AVAILABLE = True
+except ImportError:
+    _TORCH_DISTRIBUTED_AVAILABLE = False
 
 TORCH_REDUCE_OP_MAP = {
     ReduceOp.SUM: dist.ReduceOp.SUM,
@@ -108,6 +113,10 @@ def backend(cls):
         """The backend of this collective group."""
         return Backend.GLOO
 
+    @classmethod
+    def check_backend_availability(cls) -> bool:
+        return _TORCH_DISTRIBUTED_AVAILABLE
+
     def _check_tensor_input(self, tensor: List["torch.Tensor"]) -> "torch.Tensor":
         """ray.util.collective wraps tensor arguments in a list.
         Accept a single torch.Tensor or numpy.ndarray and unwrap/convert it.
diff --git a/python/ray/util/collective/examples/gloo_allreduce_example.py b/python/ray/util/collective/examples/gloo_allreduce_example.py
new file mode 100644
index 000000000000..cee7d05a3e94
--- /dev/null
+++ b/python/ray/util/collective/examples/gloo_allreduce_example.py
@@ -0,0 +1,55 @@
+import torch
+
+import ray
+from ray.util.collective import (
+    allreduce,
+    create_collective_group,
+    init_collective_group,
+)
+from ray.util.collective.backend_registry import get_backend_registry
+from ray.util.collective.types import Backend, ReduceOp
+
+
+def test_gloo_via_registry():
+    ray.init()
+
+    registry = get_backend_registry()
+    assert "GLOO" in registry.list_backends()
+    assert registry.check("GLOO")
+
+    @ray.remote
+    class Worker:
+        def __init__(self, rank):
+            self.rank = rank
+            self.tensor = None
+
+        def setup(self, world_size):
+            init_collective_group(
+                world_size=world_size,
+                rank=self.rank,
+                backend=Backend.GLOO,
+                group_name="default",
+                gloo_timeout=30000,
+            )
+
+        def compute(self):
+            self.tensor = torch.tensor([self.rank + 1], dtype=torch.float32)
+            allreduce(self.tensor, op=ReduceOp.SUM)
+            return self.tensor.item()
+
+    actors = [Worker.remote(rank=i) for i in range(2)]
+    create_collective_group(
+        actors=actors,
+        world_size=2,
+        ranks=[0, 1],
+        backend=Backend.GLOO,
+        group_name="default",
+        gloo_timeout=30000,
+    )
+
+    ray.get([a.setup.remote(2) for a in actors])
+    results = ray.get([a.compute.remote() for a in actors])
+
+    assert results == [3.0, 3.0], f"Expected [3.0, 3.0], got {results}"
+
+    ray.shutdown()

From 1761527ba084d239dd2b2a1617693fcd9885ae05 Mon Sep 17 00:00:00 2001
From: Evelynn-V <liwenlin0223l@gmail.com>
Date: Thu, 5 Feb 2026 15:20:40 +0800
Subject: [PATCH 2/5] Fix the review comments and add the NCCL test

Signed-off-by: Evelynn-V <liwenlin0223l@gmail.com>
---
 ci/lint/pydoclint-baseline.txt                |  1 -
 .../ray/util/collective/backend_registry.py   |  3 +-
 python/ray/util/collective/collective.py      | 10 +---
 .../collective_group/nccl_collective_group.py |  3 +-
 .../torch_gloo_collective_group.py            | 17 +++---
 ....py => gloo_allreduce_register_example.py} |  6 +--
 .../nccl_allreduce_register_example.py        | 54 +++++++++++++++++++
 python/ray/util/collective/types.py           | 21 --------
 8 files changed, 70 insertions(+), 45 deletions(-)
 rename python/ray/util/collective/examples/{gloo_allreduce_example.py => gloo_allreduce_register_example.py} (91%)
 create mode 100644 python/ray/util/collective/examples/nccl_allreduce_register_example.py

diff --git a/ci/lint/pydoclint-baseline.txt b/ci/lint/pydoclint-baseline.txt
index 683e71488691..4eb5bed0b370 100644
--- a/ci/lint/pydoclint-baseline.txt
+++ b/ci/lint/pydoclint-baseline.txt
@@ -2281,7 +2281,6 @@ python/ray/util/client/worker.py
 --------------------
 python/ray/util/collective/collective.py
     DOC101: Function `init_collective_group`: Docstring contains fewer arguments than in function signature.
-    DOC107: Function `init_collective_group`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints
     DOC103: Function `init_collective_group`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [gloo_timeout: int].
     DOC202: Function `init_collective_group` has a return section in docstring, but there are no return statements or annotations
     DOC101: Function `create_collective_group`: Docstring contains fewer arguments than in function signature.
diff --git a/python/ray/util/collective/backend_registry.py b/python/ray/util/collective/backend_registry.py
index 7c8e44f11a14..3e11f3f1d2a6 100644
--- a/python/ray/util/collective/backend_registry.py
+++ b/python/ray/util/collective/backend_registry.py
@@ -5,11 +5,12 @@
 
 class BackendRegistry:
     _instance = None
-    _map: Dict[str, Type[BaseGroup]] = {}
+    _map: Dict[str, Type[BaseGroup]]
 
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super(BackendRegistry, cls).__new__(cls)
+            cls._instance._map = {}
         return cls._instance
 
     def put(self, name: str, group_cls: Type[BaseGroup]) -> None:
diff --git a/python/ray/util/collective/collective.py b/python/ray/util/collective/collective.py
index cc2cbed0a00e..f6545b692773 100644
--- a/python/ray/util/collective/collective.py
+++ b/python/ray/util/collective/collective.py
@@ -179,7 +179,7 @@ def is_group_initialized(group_name):
 def init_collective_group(
     world_size: int,
     rank: int,
-    backend=types.Backend.NCCL,
+    backend: str = "NCCL",
     group_name: str = "default",
     gloo_timeout: int = 30000,
 ):
@@ -195,14 +195,11 @@ def init_collective_group(
         None
     """
     _check_inside_actor()
-    backend = types.Backend(backend)
 
     global _group_mgr
     global _group_mgr_lock
 
     backend_cls = _group_mgr._registry.get(backend)
-    if backend_cls is None:
-        raise ValueError("Backend '{}' is not supported.".format(backend))
     if not backend_cls.check_backend_availability():
         raise RuntimeError("Backend '{}' is not available.".format(backend))
     # TODO(Hao): implement a group auto-counter.
@@ -225,7 +222,7 @@ def create_collective_group(
     actors,
     world_size: int,
     ranks: List[int],
-    backend=types.Backend.NCCL,
+    backend: str = "NCCL",
     group_name: str = "default",
     gloo_timeout: int = 30000,
 ):
@@ -243,10 +240,7 @@ def create_collective_group(
     Returns:
         None
     """
-    backend = types.Backend(backend)
     backend_cls = _group_mgr._registry.get(backend)
-    if backend_cls is None:
-        raise ValueError("Backend '{}' is not supported.".format(backend))
     if not backend_cls.check_backend_availability():
         raise RuntimeError("Backend '{}' is not available.".format(backend))
 
diff --git a/python/ray/util/collective/collective_group/nccl_collective_group.py b/python/ray/util/collective/collective_group/nccl_collective_group.py
index 2c5987d79389..40432c0bd2cf 100644
--- a/python/ray/util/collective/collective_group/nccl_collective_group.py
+++ b/python/ray/util/collective/collective_group/nccl_collective_group.py
@@ -10,7 +10,6 @@
 from ray.util.collective.types import (
     AllGatherOptions,
     AllReduceOptions,
-    Backend,
     BarrierOptions,
     BroadcastOptions,
     RecvOptions,
@@ -172,7 +171,7 @@ def destroy_group(self):
 
     @classmethod
     def backend(cls):
-        return Backend.NCCL
+        return "NCCL"
 
     @classmethod
     def check_backend_availability(cls) -> bool:
diff --git a/python/ray/util/collective/collective_group/torch_gloo_collective_group.py b/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
index 96068108ac3e..2b337fdf007b 100644
--- a/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
+++ b/python/ray/util/collective/collective_group/torch_gloo_collective_group.py
@@ -9,7 +9,6 @@
 from ray.util.collective.types import (
     AllGatherOptions,
     AllReduceOptions,
-    Backend,
     BarrierOptions,
     BroadcastOptions,
     RecvOptions,
@@ -26,15 +25,15 @@
     import torch.distributed as dist
 
     _TORCH_DISTRIBUTED_AVAILABLE = True
+    TORCH_REDUCE_OP_MAP = {
+        ReduceOp.SUM: dist.ReduceOp.SUM,
+        ReduceOp.PRODUCT: dist.ReduceOp.PRODUCT,
+        ReduceOp.MIN: dist.ReduceOp.MIN,
+        ReduceOp.MAX: dist.ReduceOp.MAX,
+    }
 except ImportError:
     _TORCH_DISTRIBUTED_AVAILABLE = False
-
-TORCH_REDUCE_OP_MAP = {
-    ReduceOp.SUM: dist.ReduceOp.SUM,
-    ReduceOp.PRODUCT: dist.ReduceOp.PRODUCT,
-    ReduceOp.MIN: dist.ReduceOp.MIN,
-    ReduceOp.MAX: dist.ReduceOp.MAX,
-}
+    TORCH_REDUCE_OP_MAP = None
 
 
 def get_master_address_metadata_key(group_name: str):
@@ -111,7 +110,7 @@ def destroy_group(self):
     @classmethod
     def backend(cls):
         """The backend of this collective group."""
-        return Backend.GLOO
+        return "GLOO"
 
     @classmethod
     def check_backend_availability(cls) -> bool:
diff --git a/python/ray/util/collective/examples/gloo_allreduce_example.py b/python/ray/util/collective/examples/gloo_allreduce_register_example.py
similarity index 91%
rename from python/ray/util/collective/examples/gloo_allreduce_example.py
rename to python/ray/util/collective/examples/gloo_allreduce_register_example.py
index cee7d05a3e94..f019ebcbd8de 100644
--- a/python/ray/util/collective/examples/gloo_allreduce_example.py
+++ b/python/ray/util/collective/examples/gloo_allreduce_register_example.py
@@ -7,7 +7,7 @@
     init_collective_group,
 )
 from ray.util.collective.backend_registry import get_backend_registry
-from ray.util.collective.types import Backend, ReduceOp
+from ray.util.collective.types import ReduceOp
 
 
 def test_gloo_via_registry():
@@ -27,7 +27,7 @@ def setup(self, world_size):
             init_collective_group(
                 world_size=world_size,
                 rank=self.rank,
-                backend=Backend.GLOO,
+                backend="GLOO",
                 group_name="default",
                 gloo_timeout=30000,
             )
@@ -42,7 +42,7 @@ def compute(self):
         actors=actors,
         world_size=2,
         ranks=[0, 1],
-        backend=Backend.GLOO,
+        backend="GLOO",
         group_name="default",
         gloo_timeout=30000,
     )
diff --git a/python/ray/util/collective/examples/nccl_allreduce_register_example.py b/python/ray/util/collective/examples/nccl_allreduce_register_example.py
new file mode 100644
index 000000000000..514799b88ddb
--- /dev/null
+++ b/python/ray/util/collective/examples/nccl_allreduce_register_example.py
@@ -0,0 +1,54 @@
+import torch
+
+import ray
+from ray.util.collective import (
+    allreduce,
+    create_collective_group,
+    init_collective_group,
+)
+from ray.util.collective.backend_registry import get_backend_registry
+from ray.util.collective.types import ReduceOp
+
+
+def test_nccl_via_registry():
+    ray.init(num_gpus=8)
+
+    registry = get_backend_registry()
+    assert "NCCL" in registry.list_backends()
+    assert registry.check("NCCL")
+
+    @ray.remote(num_gpus=1)
+    class Worker:
+        def __init__(self, rank):
+            self.rank = rank
+            self.tensor = None
+
+        def setup(self, world_size):
+            init_collective_group(
+                world_size=world_size,
+                rank=self.rank,
+                backend="NCCL",
+                group_name="default",
+            )
+
+        def compute(self):
+            device = torch.cuda.current_device()
+            self.tensor = torch.tensor([float(self.rank + 1)], device=device)
+            allreduce(self.tensor, op=ReduceOp.SUM, group_name="default")
+            return self.tensor.cpu().item()
+
+    actors = [Worker.remote(rank=i) for i in range(2)]
+    create_collective_group(
+        actors=actors,
+        world_size=2,
+        ranks=[0, 1],
+        backend="NCCL",
+        group_name="default",
+    )
+
+    ray.get([a.setup.remote(2) for a in actors])
+    results = ray.get([a.compute.remote() for a in actors])
+
+    assert results == [3.0, 3.0], f"Expected [3.0, 3.0], got {results}"
+
+    ray.shutdown()
diff --git a/python/ray/util/collective/types.py b/python/ray/util/collective/types.py
index 23d43cdae005..9c494dc65b4e 100644
--- a/python/ray/util/collective/types.py
+++ b/python/ray/util/collective/types.py
@@ -31,27 +31,6 @@ def torch_available():
     return _TORCH_AVAILABLE
 
 
-class Backend(object):
-    """A class to represent different backends."""
-
-    NCCL = "NCCL"
-    GLOO = "GLOO"
-    UNRECOGNIZED = "unrecognized"
-
-    def __new__(cls, name: str):
-        upper_name = name.upper()
-        backend = getattr(Backend, upper_name, Backend.UNRECOGNIZED)
-        if backend == Backend.UNRECOGNIZED:
-            if upper_name == "TORCH_GLOO":
-                return Backend.GLOO
-            raise ValueError(
-                "Unrecognized backend: '{}'. Only NCCL and GLOO are supported".format(
-                    name
-                )
-            )
-        return backend
-
-
 class ReduceOp(Enum):
     SUM = 0
     PRODUCT = 1

From 4c6c39b58570814ca52fe96c0af826c5b8fccc8e Mon Sep 17 00:00:00 2001
From: Evelynn-V <liwenlin0223l@gmail.com>
Date: Thu, 5 Feb 2026 16:26:25 +0800
Subject: [PATCH 3/5] Fix the construction of the CI Chinese documents

Signed-off-by: Evelynn-V <liwenlin0223l@gmail.com>
---
 ci/lint/pydoclint-baseline.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/lint/pydoclint-baseline.txt b/ci/lint/pydoclint-baseline.txt
index 4eb5bed0b370..7b840aacb654 100644
--- a/ci/lint/pydoclint-baseline.txt
+++ b/ci/lint/pydoclint-baseline.txt
@@ -2281,6 +2281,7 @@ python/ray/util/client/worker.py
 --------------------
 python/ray/util/collective/collective.py
     DOC101: Function `init_collective_group`: Docstring contains fewer arguments than in function signature.
+    DOC107: Function `init_collective_group`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints.
     DOC103: Function `init_collective_group`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [gloo_timeout: int].
     DOC202: Function `init_collective_group` has a return section in docstring, but there are no return statements or annotations
     DOC101: Function `create_collective_group`: Docstring contains fewer arguments than in function signature.

From 5b5f82d8457f23e9597258a2403ce1ae1a39788d Mon Sep 17 00:00:00 2001
From: Evelynn-V <liwenlin0223l@gmail.com>
Date: Thu, 5 Feb 2026 16:50:28 +0800
Subject: [PATCH 4/5] change ci/lint/pydoclint-baseline.txt

Signed-off-by: Evelynn-V <liwenlin0223l@gmail.com>
---
 ci/lint/pydoclint-baseline.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/lint/pydoclint-baseline.txt b/ci/lint/pydoclint-baseline.txt
index 7b840aacb654..4eb5bed0b370 100644
--- a/ci/lint/pydoclint-baseline.txt
+++ b/ci/lint/pydoclint-baseline.txt
@@ -2281,7 +2281,6 @@ python/ray/util/client/worker.py
 --------------------
 python/ray/util/collective/collective.py
     DOC101: Function `init_collective_group`: Docstring contains fewer arguments than in function signature.
-    DOC107: Function `init_collective_group`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints.
     DOC103: Function `init_collective_group`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [gloo_timeout: int].
     DOC202: Function `init_collective_group` has a return section in docstring, but there are no return statements or annotations
     DOC101: Function `create_collective_group`: Docstring contains fewer arguments than in function signature.

From 2e368ee295e9ea39ab40c08a44a359f7dd7ddce8 Mon Sep 17 00:00:00 2001
From: Evelynn-V <liwenlin0223l@gmail.com>
Date: Fri, 6 Feb 2026 09:28:13 +0800
Subject: [PATCH 5/5] reset type.backend

Signed-off-by: Evelynn-V <liwenlin0223l@gmail.com>
---
 python/ray/util/collective/types.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/python/ray/util/collective/types.py b/python/ray/util/collective/types.py
index 9c494dc65b4e..23d43cdae005 100644
--- a/python/ray/util/collective/types.py
+++ b/python/ray/util/collective/types.py
@@ -31,6 +31,27 @@ def torch_available():
     return _TORCH_AVAILABLE
 
 
+class Backend(object):
+    """A class to represent different backends."""
+
+    NCCL = "NCCL"
+    GLOO = "GLOO"
+    UNRECOGNIZED = "unrecognized"
+
+    def __new__(cls, name: str):
+        upper_name = name.upper()
+        backend = getattr(Backend, upper_name, Backend.UNRECOGNIZED)
+        if backend == Backend.UNRECOGNIZED:
+            if upper_name == "TORCH_GLOO":
+                return Backend.GLOO
+            raise ValueError(
+                "Unrecognized backend: '{}'. Only NCCL and GLOO are supported".format(
+                    name
+                )
+            )
+        return backend
+
+
 class ReduceOp(Enum):
     SUM = 0
     PRODUCT = 1