Skip to content

TypeError: (InvalidType) Type promotion only support calculations between floating-point numbers and between complex and real numbers. But got different data type x: int64, y: int32. (at /paddle/paddle/phi/common/type_promotion.h:228) #1392

@Cipheeer

Description

@Cipheeer

[环境]
paddlepaddle-gpu==3.2.0
erniekit==0.0.0 /home/aistudio/ERNIE

[运行]
erniekit train ERNIE/examples/configs/ERNIE-4.5-21B-A3B/sft/run_sft_lora_8k.yaml

[报错]
Traceback (most recent call last):
File "/home/ERNIE-develop/erniekit/launcher.py", line 46, in
launch()
File "/home/ERNIE-develop/erniekit/launcher.py", line 34, in launch
run_tuner()
File "/home/ERNIE-develop/erniekit/train/tuner.py", line 65, in run_tuner
_training_function(config={"args": args})
File "/home/ERNIE-develop/erniekit/train/tuner.py", line 49, in _training_function
run_sft(model_args, data_args, generating_args, finetuning_args)
File "/home/ERNIE-develop/erniekit/train/sft/workflow.py", line 532, in run_sft
train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 1172, in train
return self._inner_training_loop(
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 1424, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, step_control=step_control)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 2782, in training_step
loss = self.compute_loss(model, inputs)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 2722, in compute_loss
outputs = model(**inputs)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1917, in forward
outputs = self.ernie(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1379, in forward
layer_outputs = self.recompute_training(
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1252, in recompute_training
hidden_states = recompute(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/utils/init.py", line 150, in recompute
return fleet.recompute.recompute(function, *args, **kwargs)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/recompute/recompute.py", line 715, in recompute
return RecomputeFunction.apply(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/recompute/recompute.py", line 238, in forward
outputs = run_function(*args, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1248, in custom_forward
return module(*inputs, output_gate_logits=False)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 732, in forward
hidden_states, _, router_loss, gate_logits = self.mlp(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/moe/moe_all_gather_layer.py", line 1122, in forward
AlltoAllSmart.apply(
File "/home/ERNIE-develop/ernie/moe/moe_all_gather_layer.py", line 406, in forward
distributed_input_to_alltoall_out = paddle.maximum(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/base/dygraph/generated_tensor_methods_patch.py", line 73, in _maximum
return _C_ops.maximum(*args, **kwargs)
TypeError: (InvalidType) Type promotion only support calculations between floating-point numbers and between complex and real numbers. But got different data type x: int64, y: int32. (at /paddle/paddle/phi/common/type_promotion.h:228)

LAUNCH INFO 2025-12-05 03:28:16,962 Pod failed
LAUNCH ERROR 2025-12-05 03:28:16,962 Container failed !!!
Container rank 0 status failed cmd ['/opt/conda/envs/python35-paddle120-env/bin/python', '-u', '/home/ERNIE-develop/erniekit/launcher.py', 'train', 'ERNIE/examples/configs/ERNIE-4.5-21B-A3B/sft/run_sft_lora_8k.yaml'] code 1 log erniekit_dist_log/workerlog.0
LAUNCH INFO 2025-12-05 03:28:16,962 ------------------------- ERROR LOG DETAIL -------------------------
ers/trainer/trainer.py", line 1424, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, step_control=step_control)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 2782, in training_step
loss = self.compute_loss(model, inputs)
File "/opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleformers/trainer/trainer.py", line 2722, in compute_loss
outputs = model(**inputs)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1917, in forward
outputs = self.ernie(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1379, in forward
layer_outputs = self.recompute_training(
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1252, in recompute_training
hidden_states = recompute(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/utils/init.py", line 150, in recompute
return fleet.recompute.recompute(function, *args, **kwargs)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/recompute/recompute.py", line 715, in recompute
return RecomputeFunction.apply(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/distributed/fleet/recompute/recompute.py", line 238, in forward
outputs = run_function(*args, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 1248, in custom_forward
return module(*inputs, output_gate_logits=False)
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/modeling_moe.py", line 732, in forward
hidden_states, _, router_loss, gate_logits = self.mlp(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/nn/layer/layers.py", line 1576, in call
return self.forward(*inputs, **kwargs)
File "/home/ERNIE-develop/ernie/moe/moe_all_gather_layer.py", line 1122, in forward
AlltoAllSmart.apply(
File "/home/ERNIE-develop/ernie/moe/moe_all_gather_layer.py", line 406, in forward
distributed_input_to_alltoall_out = paddle.maximum(
File "/home/aistudio/external-libraries/lib/python3.10/site-packages/paddle/base/dygraph/generated_tensor_methods_patch.py", line 73, in _maximum
return _C_ops.maximum(*args, **kwargs)
TypeError: (InvalidType) Type promotion only support calculations between floating-point numbers and between complex and real numbers. But got different data type x: int64, y: int32. (at /paddle/paddle/phi/common/type_promotion.h:228)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions