Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tests/workers/reward_manager/test_registry_on_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,21 @@ def method(setup):

assert OriginalClass().method() == 42
assert REWARD_MANAGER_REGISTRY["return_test"] == OriginalClass


def test_idempotent_registration_same_class(setup):
"""Test that registering the same class twice is idempotent (no error).
This is important for distributed environments like Ray where modules
may be imported multiple times across workers.
"""

@register("idempotent_test")
class IdempotentManager:
pass

# Register the same class again - should not raise
decorated = register("idempotent_test")(IdempotentManager)

assert decorated == IdempotentManager
assert REWARD_MANAGER_REGISTRY["idempotent_test"] == IdempotentManager
30 changes: 26 additions & 4 deletions verl/workers/reward_manager/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,41 @@
REWARD_MANAGER_REGISTRY: dict[str, type[AbstractRewardManager]] = {}


def _get_class_identifier(cls: type) -> str:
"""Get a unique identifier for a class based on its module and qualified name.
This is used instead of direct class comparison because in distributed environments
(like Ray), the same class may be imported multiple times with different object ids.
"""
return f"{cls.__module__}.{cls.__qualname__}"


def register(name: str) -> Callable[[type[AbstractRewardManager]], type[AbstractRewardManager]]:
"""Decorator to register a reward manager class with a given name.
Args:
name: `(str)`
The name of the reward manager.
Note:
Registration is idempotent - registering the same class multiple times
(identified by module and qualname) will not raise an error. This is
necessary for distributed environments like Ray where modules may be
imported multiple times across workers.
"""

def decorator(cls: type[AbstractRewardManager]) -> type[AbstractRewardManager]:
if name in REWARD_MANAGER_REGISTRY and REWARD_MANAGER_REGISTRY[name] != cls:
raise ValueError(
f"Reward manager {name} has already been registered: {REWARD_MANAGER_REGISTRY[name]} vs {cls}"
)
if name in REWARD_MANAGER_REGISTRY:
existing_cls = REWARD_MANAGER_REGISTRY[name]
existing_id = _get_class_identifier(existing_cls)
new_id = _get_class_identifier(cls)
if existing_id != new_id:
raise ValueError(
f"Reward manager '{name}' has already been registered with a different class: "
f"{existing_id} vs {new_id}"
)
# Same class (by qualname) - idempotent registration, skip silently
return cls
REWARD_MANAGER_REGISTRY[name] = cls
return cls
Comment on lines +48 to 60
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation for idempotent registration has a potential issue. In the case of re-registering the same class (where existing_id == new_id), the function returns early without updating the REWARD_MANAGER_REGISTRY.

If a module is reloaded (e.g., during development or with importlib.reload), a new class object is created. While it has the same identifier, it's a different object in memory. The registry would continue to hold a reference to the old, stale class object, which could lead to unexpected behavior.

It's safer to always update the registry with the new class object. This simplifies the logic and makes it more robust against module reloads.

I suggest refactoring the decorator function to check for conflicts first, and then perform the registration/update.

        if name in REWARD_MANAGER_REGISTRY:
            existing_id = _get_class_identifier(REWARD_MANAGER_REGISTRY[name])
            new_id = _get_class_identifier(cls)
            if existing_id != new_id:
                raise ValueError(
                    f"Reward manager '{name}' has already been registered with a different class: "
                    f"{existing_id} vs {new_id}"
                )
        REWARD_MANAGER_REGISTRY[name] = cls
        return cls


Expand Down