Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/ascend_tutorial/ascend_profiling_en.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Data collection based on FSDP backend on Ascend devices(en)
Performance data collection based on FSDP or MindSpeed(Megatron) on Ascend devices(en)
==========================================================================================

Last updated: 08/14/2025.

This is a tutorial for data collection using the GRPO or DAPO algorithm
based on FSDP on Ascend devices.
based on FSDP or MindSpeed(Megatron) on Ascend devices.

Configuration
-------------
Expand Down
6 changes: 3 additions & 3 deletions docs/ascend_tutorial/ascend_profiling_zh.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Data collection based on FSDP backend on Ascend devices(zh)
Performance data collection based on FSDP or MindSpeed(Megatron) on Ascend devices(zh)
====================================

在昇腾设备上基于FSDP后端进行数据采集
在昇腾设备上基于FSDP或MindSpeed(Megatron)后端进行性能数据采集

Last updated: 08/14/2025.

这是一份在昇腾设备上基于FSDP后端使用GRPO或DAPO算法进行数据采集的教程
这是一份在昇腾设备上基于FSDP或MindSpeed(Megatron)后端,使用GRPO或DAPO算法进行数据采集的教程

配置
----
Expand Down
6 changes: 3 additions & 3 deletions verl/workers/fsdp_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ def init_model(self):
)

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="critic"))
@DistProfiler.annotate(color="cyan")
@DistProfiler.annotate(color="cyan", role="compute_values")
def compute_values(self, data: DataProto):
if self._is_offload_param:
load_fsdp_model_to_gpu(self.critic_module)
Expand All @@ -1506,7 +1506,7 @@ def compute_values(self, data: DataProto):
return output

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="critic"))
@DistProfiler.annotate(color="pink")
@DistProfiler.annotate(color="pink", role="critic_update")
def update_critic(self, data: DataProto):
if self._is_offload_param:
load_fsdp_model_to_gpu(self.critic_module)
Expand Down Expand Up @@ -1874,7 +1874,7 @@ def _switch_chat_template(self, data: DataProto):
return DataProto.from_dict(rm_inputs)

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="reward"))
@DistProfiler.annotate(color="brown")
@DistProfiler.annotate(color="brown", role="compute_rm_score")
def compute_rm_score(self, data: DataProto):
import itertools

Expand Down
14 changes: 7 additions & 7 deletions verl/workers/megatron_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ async def trainer_mode(self):

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="actor"))
@GPUMemoryLogger(role="update_actor", logger=logger)
@DistProfiler.annotate(color="red")
@DistProfiler.annotate(color="red", role="actor_update")
def update_actor(self, data: DataProto):
assert self._is_actor
if self._is_offload_param:
Expand Down Expand Up @@ -767,7 +767,7 @@ def update_actor(self, data: DataProto):

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="rollout"))
@GPUMemoryLogger(role="generate_sequences", logger=logger)
@DistProfiler.annotate(color="red")
@DistProfiler.annotate(color="red", role="rollout_generate")
def generate_sequences(self, prompts: DataProto):
assert self._is_rollout
prompts = prompts.to(get_device_name())
Expand Down Expand Up @@ -817,7 +817,7 @@ def generate_sequences(self, prompts: DataProto):

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="actor"))
@GPUMemoryLogger(role="compute_ref_log_prob", logger=logger)
@DistProfiler.annotate(color="olive")
@DistProfiler.annotate(color="olive", role="ref_compute_log_prob")
def compute_ref_log_prob(self, data: DataProto):
assert self._is_ref
if self._ref_is_offload_param:
Expand All @@ -839,7 +839,7 @@ def compute_ref_log_prob(self, data: DataProto):

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="actor"))
@GPUMemoryLogger(role="compute_log_prob", logger=logger)
@DistProfiler.annotate(color="blue")
@DistProfiler.annotate(color="blue", role="actor_compute_log_prob")
def compute_log_prob(self, data: DataProto):
assert self._is_actor
if self._is_offload_param:
Expand Down Expand Up @@ -1207,7 +1207,7 @@ def init_model(self):
)

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="critic"))
@DistProfiler.annotate(color="cyan")
@DistProfiler.annotate(color="cyan", role="compute_values")
def compute_values(self, data: DataProto):
micro_batch_size = self.config.ppo_micro_batch_size_per_gpu
data.meta_info["micro_batch_size"] = micro_batch_size
Expand All @@ -1224,7 +1224,7 @@ def compute_values(self, data: DataProto):
return output

@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="critic"))
@DistProfiler.annotate(color="pink")
@DistProfiler.annotate(color="pink", role="critic_update")
def update_critic(self, data: DataProto):
data = data.to(get_device_id())

Expand Down Expand Up @@ -1448,7 +1448,7 @@ def init_model(self):
# TODO: reward model use itself tokenizer instead of sft tokenizer
# the input_ids, responses, attention_mask and position_ids may be different!
@register(dispatch_mode=make_nd_compute_dataproto_dispatch_fn(mesh_name="reward"))
@DistProfiler.annotate(color="brown")
@DistProfiler.annotate(color="brown", role="compute_rm_score")
def compute_rm_score(self, data: DataProto):
data.meta_info["micro_batch_size"] = self.config.micro_batch_size_per_gpu
data.meta_info["max_token_len"] = self.config.forward_max_token_len_per_gpu
Expand Down
Loading