@@ -264,27 +264,58 @@ class RateLimitedRewardManager(RewardManagerBase):
264264 @classmethod
265265 def init_class (cls , config : DictConfig , tokenizer : AutoTokenizer ):
266266 """Initialize class state shared across all instances."""
267- # Check if already initialized before calling parent
267+ # Check if already initialized before calling parent.
268+ #
269+ # NOTE: This class owns a *global*, class-level set of rate limiters. Once the class has been
270+ # initialized, subsequent instantiations cannot change the shared limiters. This is by design,
271+ # but it can be surprising (and dangerous) when the first initialization happens with default
272+ # values (often "unlimited") and later code tries to apply limits.
268273 if cls ._class_initialized :
274+ rm_cfg = config .get ("reward_model" ) or {}
275+ incoming_max_rpm = rm_cfg .get ("max_rpm" , None )
276+ incoming_max_tpm = rm_cfg .get ("max_tpm" , None )
277+
278+ # Warn when a caller is trying to change the global RPM/TPM limits after initialization.
279+ # This commonly happens if the first instance was created without a config (legacy signature),
280+ # which initializes the global limiters to their defaults and locks them in.
281+ if (incoming_max_rpm != cls ._max_rpm ) or (incoming_max_tpm != cls ._max_tpm ):
282+ if (
283+ incoming_max_rpm is not None
284+ or incoming_max_tpm is not None
285+ or cls ._max_rpm is not None
286+ or cls ._max_tpm is not None
287+ ):
288+ logger .warning (
289+ "RateLimitedRewardManager has already been initialized and its rate limiters are shared "
290+ "globally across instances. The incoming (max_rpm/max_tpm) settings will be ignored. "
291+ "This can lead to unexpected behavior (e.g., exceeding API rate limits) if the first "
292+ "initialization used defaults (often unlimited). "
293+ f"Existing: max_rpm={ cls ._max_rpm } , max_tpm={ cls ._max_tpm } . "
294+ f"Incoming: max_rpm={ incoming_max_rpm } , max_tpm={ incoming_max_tpm } . "
295+ "To apply different limits, ensure the first RateLimitedRewardManager created in this "
296+ "process uses the desired configuration (or restart/reset the process)."
297+ )
269298 return
270299
271300 super ().init_class (config , tokenizer )
272301
302+ rm_cfg = config .get ("reward_model" ) or {}
303+
273304 # Concurrency limiter
274- cls ._max_concurrent = config . reward_model .get ("max_concurrent" , 1 )
305+ cls ._max_concurrent = rm_cfg .get ("max_concurrent" , 1 )
275306 cls ._semaphore = asyncio .Semaphore (cls ._max_concurrent )
276307
277308 # Request rate limiter (RPM)
278- cls ._max_rpm = config . reward_model .get ("max_rpm" , None )
309+ cls ._max_rpm = rm_cfg .get ("max_rpm" , None )
279310 if cls ._max_rpm is not None :
280311 requests_per_second = cls ._max_rpm / 60.0
281312 cls ._rpm_limiter = AsyncTokenBucket (rate_limit = requests_per_second , max_tokens = requests_per_second )
282313 else :
283314 cls ._rpm_limiter = None
284315
285316 # Token rate limiter (TPM)
286- cls ._max_tpm = config . reward_model .get ("max_tpm" , None )
287- cls ._estimated_tokens_per_request = config . reward_model .get ("estimated_tokens_per_request" , 2000 )
317+ cls ._max_tpm = rm_cfg .get ("max_tpm" , None )
318+ cls ._estimated_tokens_per_request = rm_cfg .get ("estimated_tokens_per_request" , 2000 )
288319 if cls ._max_tpm is not None :
289320 tokens_per_second = cls ._max_tpm / 60.0
290321 cls ._tpm_limiter = AsyncTokenBucket (rate_limit = tokens_per_second , max_tokens = tokens_per_second )
0 commit comments