[eager_fail_to_run] Fail to run torchrec_dlrm with --bfloat16 #2572
Open
Description
python benchmarks/dynamo/torchbench.py --accuracy --no-translation-validation --inference --bfloat16 --backend inductor --disable-cudagraphs --device cuda --only torchrec_dlrm
Traceback (most recent call last):
File "/data/users/ivankobzarev/a/pytorch/benchmarks/dynamo/common.py", line 2744, in validate_model
self.model_iter_fn(model, example_inputs)
File "/data/users/ivankobzarev/a/pytorch/benchmarks/dynamo/torchbench.py", line 444, in forward_pass
return mod(*inputs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torchrec/models/dlrm.py", line 897, in forward
logits = self.model(batch.dense_features, batch.sparse_features)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torchrec/models/dlrm.py", line 576, in forward
concatenated_dense = self.inter_arch(
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torchrec/models/dlrm.py", line 288, in forward
return self.crossnet(combined_values.reshape([B, -1]))
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ivankobzarev/local/a/pytorch-env/lib/python3.10/site-packages/torchrec/modules/crossnet.py", line 177, in forward
x_l_v = torch.nn.functional.linear(x_l, self.V_kernels[layer])
RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::BFloat16
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data/users/ivankobzarev/a/pytorch/benchmarks/dynamo/common.py", line 4857, in run
) = runner.load_model(
File "/data/users/ivankobzarev/a/pytorch/benchmarks/dynamo/torchbench.py", line 372, in load_model
self.validate_model(model, example_inputs)
File "/data/users/ivankobzarev/a/pytorch/benchmarks/dynamo/common.py", line 2746, in validate_model
raise RuntimeError("Eager run failed") from e
RuntimeError: Eager run failed
eager_fail_to_run
Metadata
Assignees
Labels
No labels