[reward] feat: Add warning for ignored rate limits after initialization

JoyboyBrian · JoyboyBrian · commit d8d3491da434 · 2025-12-29T22:35:20.000-08:00
- Introduced a warning mechanism in RateLimitedRewardManager to alert users when attempts are made to change global RPM/TPM settings after the class has been initialized with default values.
- Updated the constructor to ensure that the warning is logged if new configurations are ignored due to prior initialization.
- Enhanced test coverage to verify the warning behavior when changing rate limits post-initialization.
diff --git a/tests/experimental/reward_loop/test_rate_limited_reward_manager_on_cpu.py b/tests/experimental/reward_loop/test_rate_limited_reward_manager_on_cpu.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import asyncio
+import logging
 import time
 
 import pytest
@@ -469,6 +470,32 @@ async def test_class_initialization_once(self, tokenizer):
         # Should be the same object
         assert first_semaphore is second_semaphore
 
+    def test_warn_when_rate_limits_are_ignored_due_to_prior_init(self, tokenizer, caplog):
+        """Warn when a new config attempts to change global RPM/TPM after the class has been initialized."""
+        caplog.set_level(logging.WARNING)
+
+        # First instantiation without a config (legacy signature) initializes global limiters with defaults.
+        _ = RateLimitedRewardManager(
+            tokenizer=tokenizer,
+            compute_score=mock_async_reward_function,
+            num_examine=0,
+            reward_fn_key="data_source",
+        )
+
+        # Second instantiation attempts to set RPM limits, but will be ignored due to global initialization.
+        config = DictConfig({"reward_model": {"max_concurrent": 10, "max_rpm": 60, "timeout": 10.0}})
+        _ = RateLimitedRewardManager(
+            config=config,
+            tokenizer=tokenizer,
+            compute_score=mock_async_reward_function,
+        )
+
+        assert any(
+            "RateLimitedRewardManager has already been initialized" in record.getMessage()
+            and "ignored" in record.getMessage()
+            for record in caplog.records
+        ), "Expected a warning when attempting to change global rate limits after initialization."
+
     @pytest.mark.asyncio
     async def test_extra_info_handling(self, tokenizer):
         """Test that extra_info is properly passed to reward function."""
diff --git a/verl/experimental/reward_loop/reward_manager/limited.py b/verl/experimental/reward_loop/reward_manager/limited.py
@@ -264,27 +264,58 @@ class RateLimitedRewardManager(RewardManagerBase):
     @classmethod
     def init_class(cls, config: DictConfig, tokenizer: AutoTokenizer):
         """Initialize class state shared across all instances."""
-        # Check if already initialized before calling parent
+        # Check if already initialized before calling parent.
+        #
+        # NOTE: This class owns a *global*, class-level set of rate limiters. Once the class has been
+        # initialized, subsequent instantiations cannot change the shared limiters. This is by design,
+        # but it can be surprising (and dangerous) when the first initialization happens with default
+        # values (often "unlimited") and later code tries to apply limits.
         if cls._class_initialized:
+            rm_cfg = config.get("reward_model") or {}
+            incoming_max_rpm = rm_cfg.get("max_rpm", None)
+            incoming_max_tpm = rm_cfg.get("max_tpm", None)
+
+            # Warn when a caller is trying to change the global RPM/TPM limits after initialization.
+            # This commonly happens if the first instance was created without a config (legacy signature),
+            # which initializes the global limiters to their defaults and locks them in.
+            if (incoming_max_rpm != cls._max_rpm) or (incoming_max_tpm != cls._max_tpm):
+                if (
+                    incoming_max_rpm is not None
+                    or incoming_max_tpm is not None
+                    or cls._max_rpm is not None
+                    or cls._max_tpm is not None
+                ):
+                    logger.warning(
+                        "RateLimitedRewardManager has already been initialized and its rate limiters are shared "
+                        "globally across instances. The incoming (max_rpm/max_tpm) settings will be ignored. "
+                        "This can lead to unexpected behavior (e.g., exceeding API rate limits) if the first "
+                        "initialization used defaults (often unlimited). "
+                        f"Existing: max_rpm={cls._max_rpm}, max_tpm={cls._max_tpm}. "
+                        f"Incoming: max_rpm={incoming_max_rpm}, max_tpm={incoming_max_tpm}. "
+                        "To apply different limits, ensure the first RateLimitedRewardManager created in this "
+                        "process uses the desired configuration (or restart/reset the process)."
+                    )
             return
 
         super().init_class(config, tokenizer)
 
+        rm_cfg = config.get("reward_model") or {}
+
         # Concurrency limiter
-        cls._max_concurrent = config.reward_model.get("max_concurrent", 1)
+        cls._max_concurrent = rm_cfg.get("max_concurrent", 1)
         cls._semaphore = asyncio.Semaphore(cls._max_concurrent)
 
         # Request rate limiter (RPM)
-        cls._max_rpm = config.reward_model.get("max_rpm", None)
+        cls._max_rpm = rm_cfg.get("max_rpm", None)
         if cls._max_rpm is not None:
             requests_per_second = cls._max_rpm / 60.0
             cls._rpm_limiter = AsyncTokenBucket(rate_limit=requests_per_second, max_tokens=requests_per_second)
         else:
             cls._rpm_limiter = None
 
         # Token rate limiter (TPM)
-        cls._max_tpm = config.reward_model.get("max_tpm", None)
-        cls._estimated_tokens_per_request = config.reward_model.get("estimated_tokens_per_request", 2000)
+        cls._max_tpm = rm_cfg.get("max_tpm", None)
+        cls._estimated_tokens_per_request = rm_cfg.get("estimated_tokens_per_request", 2000)
         if cls._max_tpm is not None:
             tokens_per_second = cls._max_tpm / 60.0
             cls._tpm_limiter = AsyncTokenBucket(rate_limit=tokens_per_second, max_tokens=tokens_per_second)