Skip to content

Commit c4946a1

Browse files
authored
Merge branch 'master' into zenflow_z1_2_example
2 parents 0b18cca + 3d83278 commit c4946a1

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "training/DeepSpeed-Domino/Megatron-LM"]
22
path = training/DeepSpeed-Domino/Megatron-LM
3-
url = git@github.com:NVIDIA/Megatron-LM.git
3+
url = https://github.com/NVIDIA/Megatron-LM.git

applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
from huggingface_hub import snapshot_download
1313
from transformers.integrations.deepspeed import HfDeepSpeedConfig
14+
from transformers.modeling_utils import no_init_weights
1415

1516
from dschat.utils.model.reward_model import RewardModel
1617
from dschat.utils.utils import load_state_dict_into_model, print_rank_0
@@ -99,7 +100,8 @@ def create_hf_model(model_class,
99100
dschf = None
100101
if rlhf_training:
101102
# the weight loading is handled by create critic model
102-
model = model_class.from_config(model_config)
103+
with no_init_weights():
104+
model = model_class.from_config(model_config)
103105
else:
104106
model = model_class.from_pretrained(
105107
model_name_or_path,

0 commit comments

Comments
 (0)