Open
Description
I'm working on integrating XLA backend with DeepSpeed and encounter this:
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/__init__.py", line 209, in initialize
engine = PipelineEngine(args=args,
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/runtime/pipe/engine.py", line 239, in __init__
p2p.send(self.loss, self.next_stage)
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/runtime/pipe/p2p.py", line 60, in send
return dist.send(tensor, dest_rank)
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/comm/comm.py", line 117, in log_wrapper
return func(*args, **kwargs)
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/comm/comm.py", line 358, in send
return cdb.send(tensor=tensor, dst=dst, group=group, tag=tag)
File "/home/kojoe/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
return fn(*args, **kwargs)
File "/home/kojoe/.local/lib/python3.10/site-packages/deepspeed/comm/torch.py", line 296, in send
return torch.distributed.send(tensor=tensor, dst=dst, group=group, tag=tag)
File "/home/kojoe/.local/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 83, in wrapper
return func(*args, **kwargs)
File "/home/kojoe/.local/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 2148, in send
default_pg.send([tensor], dst, tag).wait()
File "/usr/local/lib/python3.10/dist-packages/torch_xla/distributed/xla_backend.py", line 249, in send
channel_id = self.make_send_channel_id(dst_rank, tag)
File "/usr/local/lib/python3.10/dist-packages/torch_xla/distributed/xla_backend.py", line 242, in make_send_channel_id
raise NotImplementedError
NotImplementedError