Skip to content

Commit a3a17eb

Browse files
committed
update
1 parent 39edc60 commit a3a17eb

File tree

1 file changed

+3
-3
lines changed
  • verl/experimental/reward_loop/reward_manager

1 file changed

+3
-3
lines changed

verl/experimental/reward_loop/reward_manager/remote.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import inspect
16+
import itertools
1617

1718
import ray
1819

@@ -66,11 +67,10 @@ def __init__(self, config, tokenizer, compute_score=None, reward_router_address=
6667
).remote(self.compute_score)
6768
for _ in range(num_reward_workers)
6869
]
69-
self._curr_worker_idx = -1
70+
self.reward_worker_pool = itertools.cycle(self.reward_worker)
7071

7172
def choose_reward_worker(self):
72-
self._curr_worker_idx = (self._curr_worker_idx + 1) % len(self.reward_worker)
73-
return self.reward_worker[self._curr_worker_idx]
73+
return next(self.reward_worker_pool)
7474

7575
async def run_single(self, data: DataProto) -> dict:
7676
assert len(data) == 1, "Only support single data item"

0 commit comments

Comments
 (0)