Description
bash training_scripts/single_node/run_1.3b_lora.sh
Traceback (most recent call last):
File "main.py", line 328, in
main()
File "main.py", line 301, in main
model.backward(loss)
File "/home/kemove/anaconda3/envs/deepspeed/lib/python3.8/site-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/home/kemove/anaconda3/envs/deepspeed/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1827, in backward
self.optimizer.backward(loss, retain_graph=retain_graph)
File "/home/kemove/anaconda3/envs/deepspeed/lib/python3.8/site-packages/deepspeed/runtime/fp16/fused_optimizer.py", line 353, in backward
scaled_loss.backward(create_graph=create_graph, retain_graph=retain_graph)
File "/home/kemove/anaconda3/envs/deepspeed/lib/python3.8/site-packages/torch/_tensor.py", line 487, in backward
torch.autograd.backward(
File "/home/kemove/anaconda3/envs/deepspeed/lib/python3.8/site-packages/torch/autograd/init.py", line 197, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling cublasCreate(handle)