Skip to content

Commit 4a6b453

Browse files
fix(isp.py): fix isp overlap backward allgather twice when activation ckpt 0.x (#366)
1 parent 86c3b6c commit 4a6b453

File tree

6 files changed

+8
-8
lines changed

6 files changed

+8
-8
lines changed

internlm/core/parallel/comm/isp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,8 +488,8 @@ def _pre_forward_hook_for_module(self, module: nn.Module, *args): # pylint: dis
488488
self._wait_handle(module)
489489

490490
def _post_forward_hook_for_module(self, module: nn.Module, *args): # pylint: disable=W0613
491-
self._clear_handle(module)
492491
if not ((self._module_to_index[module] < self._ckpt_block_num) and self.is_forward is False):
492+
self._clear_handle(module)
493493
self._clear_weight(module)
494494

495495
def _pre_backward_hook_for_module(self, module: nn.Module, *args): # pylint: disable=W0613

internlm/model/modeling_internlm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def __init__(
138138
mlp_layer_fusion=mlp_layer_fusion,
139139
multiple_of=multiple_of,
140140
# TODO: to support more activation functions
141-
activation_type="swiglu" if use_swiglu else "swiglu",
141+
activation_type="swiglu" if use_swiglu else "gelu",
142142
)
143143

144144
self.use_swiglu = use_swiglu

internlm/model/modeling_internlm2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def __init__(
165165
mlp_layer_fusion=mlp_layer_fusion,
166166
multiple_of=multiple_of,
167167
# TODO: to support more activation functions
168-
activation_type="swiglu" if use_swiglu else "swiglu",
168+
activation_type="swiglu" if use_swiglu else "gelu",
169169
)
170170

171171
self.use_swiglu = use_swiglu

internlm/model/modeling_llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def __init__(
157157
mlp_layer_fusion=mlp_layer_fusion,
158158
multiple_of=multiple_of,
159159
# TODO: to support more activation functions
160-
activation_type="swiglu" if use_swiglu else "swiglu",
160+
activation_type="swiglu" if use_swiglu else "gelu",
161161
)
162162

163163
self.use_swiglu = use_swiglu

internlm/model/modeling_mixtral.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def __init__(
138138
mlp_layer_fusion=mlp_layer_fusion,
139139
multiple_of=multiple_of,
140140
# TODO: to support more activation functions
141-
activation_type="swiglu" if use_swiglu else "swiglu",
141+
activation_type="swiglu" if use_swiglu else "gelu",
142142
)
143143
else:
144144
# replace mlp by MoE module. The expert in MoE is a FeedForward module.
@@ -156,7 +156,7 @@ def __init__(
156156
mlp_layer_fusion=mlp_layer_fusion,
157157
multiple_of=multiple_of,
158158
# TODO: to support more activation functions
159-
activation_type="swiglu" if use_swiglu else "swiglu",
159+
activation_type="swiglu" if use_swiglu else "gelu",
160160
)
161161

162162
self.use_swiglu = use_swiglu

internlm/model/modeling_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def __init__(
129129
mlp_layer_fusion=mlp_layer_fusion,
130130
multiple_of=multiple_of,
131131
# TODO: to support more activation functions
132-
activation_type="swiglu" if use_swiglu else "swiglu",
132+
activation_type="swiglu" if use_swiglu else "gelu",
133133
)
134134
else:
135135
# replace mlp by MoE module. The expert in MoE is a FeedForward module.
@@ -147,7 +147,7 @@ def __init__(
147147
mlp_layer_fusion=mlp_layer_fusion,
148148
multiple_of=multiple_of,
149149
# TODO: to support more activation functions
150-
activation_type="swiglu" if use_swiglu else "swiglu",
150+
activation_type="swiglu" if use_swiglu else "gelu",
151151
)
152152

153153
self.use_swiglu = use_swiglu

0 commit comments

Comments
 (0)