File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed
bionemo-recipes/recipes/esm2_native_te Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -153,7 +153,8 @@ def main(args: DictConfig) -> float | None:
153153 loss .backward ()
154154
155155 # Compute and clip gradient norms.
156- total_norm = torch .nn .utils .clip_grad_norm_ (model .parameters (), max_norm = 1.0 ).item ()
156+ # This is causing training to hang in 25.12 torch base image for multi-process mFSDP.
157+ # total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0).item()
157158
158159 # Step optimizer.
159160 optimizer .step ()
@@ -164,7 +165,7 @@ def main(args: DictConfig) -> float | None:
164165 step = step ,
165166 batch = batch ,
166167 outputs = outputs ,
167- grad_norm = total_norm ,
168+ grad_norm = 0.0 , # total_norm,
168169 lr = optimizer .param_groups [0 ]["lr" ],
169170 )
170171
You can’t perform that action at this time.
0 commit comments