@@ -60,10 +60,10 @@ limitations under the License.
6060#include " xla/stream_executor/stream_executor.h"
6161#include " xla/tsl/platform/env.h"
6262#include " xla/tsl/platform/logging.h"
63+ #include " xla/tsl/platform/status_macros.h"
6364#include " xla/tsl/platform/threadpool.h"
6465#include " xla/util.h"
6566#include " tsl/platform/casts.h"
66- #include " tsl/platform/numbers.h"
6767#include " tsl/profiler/lib/traceme.h"
6868
6969namespace xla ::gpu {
@@ -239,12 +239,6 @@ bool NcclCollectives::SupportsOneSidedComm() const {
239239 return NCCL_VERSION_CODE >= 22900 ;
240240}
241241
242- size_t NcclCollectives::SymmetricMemoryAlignment () const {
243- // TODO(ezhulenev): Query memory alignment from CUDA executor for multicast
244- // memory (CU_MULTICAST_GRANULARITY_MINIMUM). Find how to query it for NCCL.
245- return 4096 ;
246- }
247-
248242static absl::StatusOr<ncclConfig_t> AsNcclConfig (
249243 const GpuCollectives::Config& config,
250244 const se::StreamExecutor* stream_executor) {
@@ -504,44 +498,6 @@ static absl::StatusOr<xla::gpu::GpuCollectives*> GetNvshmemCollectives() {
504498 return nvshmem_collectives;
505499}
506500
507- absl::StatusOr<void *> NcclCollectives::Allocate (uint64_t bytes) {
508- if (xla::GetDebugOptionsFromFlags ().xla_gpu_experimental_enable_nvshmem ()) {
509- ASSIGN_OR_RETURN (auto * nvshmem_collectives, GetNvshmemCollectives ());
510- return nvshmem_collectives->Allocate (bytes);
511- }
512-
513- void * ptr = nullptr ;
514- ncclResult_t res = ncclMemAlloc (&ptr, bytes);
515- if (res != ncclSuccess) {
516- return Internal (
517- " Failed to allocate %s (%llu bytes) from device collective memory: %s, "
518- " Last NCCL warning(error) log entry (may be unrelated): %s" ,
519- tsl::strings::HumanReadableNumBytes (bytes), bytes,
520- ncclGetErrorString (res), ncclGetLastError (nullptr ));
521- }
522- VLOG (2 ) << " Allocated collective memory " << ptr << " of " << bytes
523- << " bytes" ;
524- return ptr;
525- }
526-
527- absl::Status NcclCollectives::Deallocate (void * location) {
528- if (xla::GetDebugOptionsFromFlags ().xla_gpu_experimental_enable_nvshmem ()) {
529- ASSIGN_OR_RETURN (auto * nvshmem_collectives, GetNvshmemCollectives ());
530- return nvshmem_collectives->Deallocate (location);
531- }
532-
533- ncclResult_t res = ncclMemFree (location);
534- if (res != ncclSuccess) {
535- return Internal (
536- " Failed to free device collective memory at %p; result: %s, Last NCCL "
537- " warning(error) log entry (may be unrelated): %s" ,
538- location, ncclGetErrorString (res), ncclGetLastError (nullptr ));
539- }
540-
541- VLOG (2 ) << " Deallocated collective memory " << location;
542- return absl::OkStatus ();
543- }
544-
545501absl::StatusOr<GpuCollectives::CliqueIdCallback>
546502NcclCollectives::InitializeTopology (const Topology& topology) {
547503 if (xla::GetDebugOptionsFromFlags ().xla_gpu_experimental_enable_nvshmem ()) {
0 commit comments