Skip to content

Commit deca42f

Browse files
committed
Merge branch 'main' into qinghuazhou/mscclpp_datatype_revise
2 parents c2b4d21 + a19bca9 commit deca42f

32 files changed

Lines changed: 254 additions & 215 deletions

apps/nccl/src/allgather.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ class AllgatherAlgo6 : public mscclpp::AlgorithmBuilder {
217217

218218
private:
219219
bool disableChannelCache_;
220-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
220+
std::vector<mscclpp::Connection> conns_;
221221
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> memorySemaphores_;
222222
const int nChannelsPerConnection_ = 35;
223223

@@ -236,7 +236,7 @@ class AllgatherAlgo8 : public mscclpp::AlgorithmBuilder {
236236
mscclpp::Algorithm build() override;
237237

238238
private:
239-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
239+
std::vector<mscclpp::Connection> conns_;
240240

241241
void initialize(std::shared_ptr<mscclpp::Communicator> comm,
242242
std::unordered_map<std::string, std::shared_ptr<void>>& extras);

apps/nccl/src/allreduce.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,7 +1139,7 @@ class AllreducePacket : public mscclpp::AlgorithmBuilder {
11391139
size_t scratchBufferSize_;
11401140
std::shared_ptr<char> scratchBuffer_;
11411141
const int nSegmentsForScratchBuffer_ = 2;
1142-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
1142+
std::vector<mscclpp::Connection> conns_;
11431143

11441144
std::shared_ptr<uint32_t> deviceFlag7_;
11451145
std::shared_ptr<uint32_t> deviceFlag28_;
@@ -1166,7 +1166,7 @@ class AllreduceNvls : public mscclpp::AlgorithmBuilder {
11661166
uint32_t nSwitchChannels_;
11671167
std::shared_ptr<mscclpp::DeviceHandle<mscclpp::BaseMemoryChannel>> memoryChannelsDeviceHandle_;
11681168
std::vector<mscclpp::BaseMemoryChannel> baseChannels_;
1169-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
1169+
std::vector<mscclpp::Connection> conns_;
11701170
};
11711171

11721172
class AllreduceNvlsWithCopy : public mscclpp::AlgorithmBuilder {
@@ -1190,7 +1190,7 @@ class AllreduceNvlsWithCopy : public mscclpp::AlgorithmBuilder {
11901190
uint32_t nSwitchChannels_;
11911191
std::shared_ptr<mscclpp::DeviceHandle<mscclpp::BaseMemoryChannel>> memoryChannelsDeviceHandle_;
11921192
std::vector<mscclpp::BaseMemoryChannel> baseChannels_;
1193-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
1193+
std::vector<mscclpp::Connection> conns_;
11941194
};
11951195

11961196
class Allreduce8 : public mscclpp::AlgorithmBuilder {
@@ -1211,7 +1211,7 @@ class Allreduce8 : public mscclpp::AlgorithmBuilder {
12111211
size_t scratchBufferSize_;
12121212
std::shared_ptr<mscclpp::Communicator> comm_;
12131213
int nChannelsPerConnection_;
1214-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
1214+
std::vector<mscclpp::Connection> conns_;
12151215
std::shared_ptr<char> scratchBuffer_;
12161216
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> outputSemaphores_;
12171217
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> inputScratchSemaphores_;

apps/nccl/src/broadcast.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ class BroadcastAlgo6 : public mscclpp::AlgorithmBuilder {
172172
void* output, size_t, mscclpp::DataType);
173173
mscclpp::AlgorithmCtxKey generateBroadcastContextKey(const void*, void*, size_t, mscclpp::DataType);
174174

175-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
175+
std::vector<mscclpp::Connection> conns_;
176176
size_t scratchMemSize_;
177177
std::shared_ptr<char> scratchBuffer_;
178178
};

apps/nccl/src/common.cu

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,41 @@ std::vector<mscclpp::RegisteredMemory> setupRemoteMemories(std::shared_ptr<msccl
2020
}
2121

2222
std::vector<mscclpp::MemoryChannel> setupMemoryChannels(
23-
const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
23+
const std::vector<mscclpp::Connection>& connections,
2424
const std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>>& memorySemaphores,
2525
const std::vector<mscclpp::RegisteredMemory>& remoteMemories, mscclpp::RegisteredMemory localMemory,
2626
int nChannelsPerConnection) {
2727
std::vector<mscclpp::MemoryChannel> channels;
2828
size_t nConnections = connections.size();
2929
for (int idx = 0; idx < nChannelsPerConnection; ++idx) {
3030
for (size_t cid = 0; cid < nConnections; ++cid) {
31-
if (connections[cid]->transport() == mscclpp::Transport::CudaIpc) {
31+
if (connections[cid].transport() == mscclpp::Transport::CudaIpc) {
3232
channels.emplace_back(memorySemaphores[idx * nConnections + cid], remoteMemories[cid], localMemory, nullptr);
3333
}
3434
}
3535
}
3636
return channels;
3737
}
3838

39-
std::vector<std::shared_ptr<mscclpp::Connection>> setupConnections(std::shared_ptr<mscclpp::Communicator> comm) {
40-
std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connectionFutures;
39+
std::vector<mscclpp::Connection> setupConnections(std::shared_ptr<mscclpp::Communicator> comm) {
40+
std::vector<std::shared_future<mscclpp::Connection>> connectionFutures;
4141
for (int i = 0; i < comm->bootstrap()->getNranks(); i++) {
4242
if (i == comm->bootstrap()->getRank()) continue;
4343
connectionFutures.push_back(comm->connect(mscclpp::Transport::CudaIpc, i));
4444
}
45-
std::vector<std::shared_ptr<mscclpp::Connection>> connections;
45+
std::vector<mscclpp::Connection> connections;
4646
std::transform(connectionFutures.begin(), connectionFutures.end(), std::back_inserter(connections),
4747
[](const auto& future) { return future.get(); });
4848
return connections;
4949
}
5050

5151
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> setupMemorySemaphores(
52-
std::shared_ptr<mscclpp::Communicator> comm, const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
52+
std::shared_ptr<mscclpp::Communicator> comm, const std::vector<mscclpp::Connection>& connections,
5353
int nChannelsPerConnection) {
5454
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> memorySemaphores;
5555
for (int idx = 0; idx < nChannelsPerConnection; ++idx) {
5656
for (size_t cid = 0; cid < connections.size(); ++cid) {
57-
if (connections[cid]->transport() == mscclpp::Transport::CudaIpc) {
57+
if (connections[cid].transport() == mscclpp::Transport::CudaIpc) {
5858
memorySemaphores.emplace_back(
5959
std::make_shared<mscclpp::MemoryDevice2DeviceSemaphore>(*(comm), connections[cid]));
6060
}
@@ -117,14 +117,14 @@ std::shared_ptr<mscclpp::DeviceHandle<mscclpp::SwitchChannel>> setupNvlsChannelD
117117
}
118118

119119
std::vector<mscclpp::BaseMemoryChannel> setupBaseMemoryChannels(
120-
const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
120+
const std::vector<mscclpp::Connection>& connections,
121121
const std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>>& memorySemaphores,
122122
int nChannelsPerConnection) {
123123
std::vector<mscclpp::BaseMemoryChannel> channels;
124124
size_t nConnections = connections.size();
125125
for (int idx = 0; idx < nChannelsPerConnection; ++idx) {
126126
for (size_t cid = 0; cid < nConnections; ++cid) {
127-
if (connections[cid]->transport() == mscclpp::Transport::CudaIpc) {
127+
if (connections[cid].transport() == mscclpp::Transport::CudaIpc) {
128128
channels.emplace_back(memorySemaphores[idx * nConnections + cid]);
129129
}
130130
}

apps/nccl/src/common.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ std::vector<mscclpp::RegisteredMemory> setupRemoteMemories(std::shared_ptr<msccl
3333
mscclpp::RegisteredMemory localMemory);
3434

3535
std::vector<mscclpp::MemoryChannel> setupMemoryChannels(
36-
const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
36+
const std::vector<mscclpp::Connection>& connections,
3737
const std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>>& memorySemaphores,
3838
const std::vector<mscclpp::RegisteredMemory>& remoteMemories, mscclpp::RegisteredMemory localMemory,
3939
int nChannelsPerConnection);
4040

41-
std::vector<std::shared_ptr<mscclpp::Connection>> setupConnections(std::shared_ptr<mscclpp::Communicator> comm);
41+
std::vector<mscclpp::Connection> setupConnections(std::shared_ptr<mscclpp::Communicator> comm);
4242

4343
std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>> setupMemorySemaphores(
44-
std::shared_ptr<mscclpp::Communicator> comm, const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
44+
std::shared_ptr<mscclpp::Communicator> comm, const std::vector<mscclpp::Connection>& connections,
4545
int nChannelsPerConnection);
4646

4747
std::shared_ptr<mscclpp::DeviceHandle<mscclpp::MemoryChannel>> setupMemoryChannelDeviceHandles(
@@ -57,7 +57,7 @@ std::shared_ptr<mscclpp::DeviceHandle<mscclpp::SwitchChannel>> setupNvlsChannelD
5757
const std::vector<mscclpp::SwitchChannel>& nvlsChannels);
5858

5959
std::vector<mscclpp::BaseMemoryChannel> setupBaseMemoryChannels(
60-
const std::vector<std::shared_ptr<mscclpp::Connection>>& connections,
60+
const std::vector<mscclpp::Connection>& connections,
6161
const std::vector<std::shared_ptr<mscclpp::MemoryDevice2DeviceSemaphore>>& memorySemaphores,
6262
int nChannelsPerConnection);
6363

docs/tutorials/01-basic-concepts.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ The connection is created by calling `connect` on the context object:
8383
8484
```cpp
8585
// From gpu_ping_pong.cu, lines 76 and 82
86-
std::shared_ptr<mscclpp::Connection> conn0 = ctx->connect(/*localEndpoint*/ ep0, /*remoteEndpoint*/ ep1);
87-
std::shared_ptr<mscclpp::Connection> conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
86+
mscclpp::Connection conn0 = ctx->connect(/*localEndpoint*/ ep0, /*remoteEndpoint*/ ep1);
87+
mscclpp::Connection conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
8888
```
8989

9090
The `localEndpoint` and `remoteEndpoint` parameters specify which endpoints are used for the connection. A connection is asymmetric by nature, meaning that we need to create one connection for each endpoint. In this case, `conn0` is created for `ep0` to communicate with `ep1`, and `conn1` is created for `ep1` to communicate with `ep0`.
@@ -101,7 +101,7 @@ sendToProcessB(serializedEp0); // send serializedEp0 to Process B using any IPC
101101
mscclpp::Endpoint ep1 = ctx->createEndpoint({transport, {mscclpp::DeviceType::GPU, 1}});
102102
std::vector<char> serializedEp0 = recvFromProcessA(); // receive serializedEp0 from Process A
103103
mscclpp::Endpoint ep0 = mscclpp::Endpoint::deserialize(serializedEp0);
104-
std::shared_ptr<mscclpp::Connection> conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
104+
mscclpp::Connection conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
105105
```
106106
107107
## SemaphoreStub and Semaphore

examples/customized-collective-algorithm/customized_allgather.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,18 @@ class AllgatherAlgoBuilder : public mscclpp::AlgorithmBuilder {
107107
}
108108

109109
private:
110-
std::vector<std::shared_ptr<mscclpp::Connection>> conns_;
110+
std::vector<mscclpp::Connection> conns_;
111111
std::shared_ptr<mscclpp::ProxyService> proxyService_;
112112
int worldSize_;
113113

114114
void initialize(std::shared_ptr<mscclpp::Communicator> comm) {
115-
std::vector<std::shared_future<std::shared_ptr<mscclpp::Connection>>> connectionFutures;
115+
std::vector<std::shared_future<mscclpp::Connection>> connectionFutures;
116116
worldSize_ = comm->bootstrap()->getNranks();
117117
for (int i = 0; i < worldSize_; i++) {
118118
if (i == comm->bootstrap()->getRank()) continue;
119119
connectionFutures.push_back(comm->connect(mscclpp::Transport::CudaIpc, i));
120120
}
121-
std::vector<std::shared_ptr<mscclpp::Connection>> connections;
121+
std::vector<mscclpp::Connection> connections;
122122
std::transform(connectionFutures.begin(), connectionFutures.end(), std::back_inserter(connections),
123123
[](const auto& future) { return future.get(); });
124124
this->conns_ = std::move(connections);

examples/tutorials/01-basic-concepts/gpu_ping_pong.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ int main() {
7373
log("GPU 0: Creating a connection and a semaphore stub ...");
7474

7575
MSCCLPP_CUDATHROW(cudaSetDevice(0));
76-
std::shared_ptr<mscclpp::Connection> conn0 = ctx->connect(/*localEndpoint*/ ep0, /*remoteEndpoint*/ ep1);
76+
mscclpp::Connection conn0 = ctx->connect(/*localEndpoint*/ ep0, /*remoteEndpoint*/ ep1);
7777
mscclpp::SemaphoreStub semaStub0(conn0);
7878

7979
log("GPU 1: Creating a connection and a semaphore stub ...");
8080

8181
MSCCLPP_CUDATHROW(cudaSetDevice(1));
82-
std::shared_ptr<mscclpp::Connection> conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
82+
mscclpp::Connection conn1 = ctx->connect(/*localEndpoint*/ ep1, /*remoteEndpoint*/ ep0);
8383
mscclpp::SemaphoreStub semaStub1(conn1);
8484

8585
log("GPU 0: Creating a semaphore and a memory channel ...");

include/mscclpp/core.hpp

Lines changed: 29 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ struct EndpointConfig {
426426

427427
class Context;
428428
class Connection;
429+
class BaseConnection;
429430
class RegisteredMemory;
430431
class SemaphoreStub;
431432
class Semaphore;
@@ -475,7 +476,7 @@ class Endpoint {
475476
std::shared_ptr<Impl> pimpl_;
476477

477478
friend class Context;
478-
friend class Connection;
479+
friend class BaseConnection;
479480
};
480481

481482
/// Context for communication. This provides a low-level interface for forming connections in use-cases
@@ -522,8 +523,8 @@ class Context : public std::enable_shared_from_this<Context> {
522523
///
523524
/// @param localEndpoint The local endpoint.
524525
/// @param remoteEndpoint The remote endpoint.
525-
/// @return A shared pointer to the connection.
526-
std::shared_ptr<Connection> connect(const Endpoint& localEndpoint, const Endpoint& remoteEndpoint);
526+
/// @return A connection object.
527+
Connection connect(const Endpoint& localEndpoint, const Endpoint& remoteEndpoint);
527528

528529
private:
529530
Context();
@@ -532,7 +533,7 @@ class Context : public std::enable_shared_from_this<Context> {
532533
std::unique_ptr<Impl> pimpl_;
533534

534535
friend class Endpoint;
535-
friend class Connection;
536+
friend class BaseConnection;
536537
friend class RegisteredMemory;
537538
friend class SemaphoreStub;
538539
};
@@ -579,7 +580,7 @@ class RegisteredMemory {
579580
std::shared_ptr<Impl> pimpl_;
580581

581582
friend class Context;
582-
friend class Connection;
583+
friend class BaseConnection;
583584
friend class SemaphoreStub;
584585
friend class Semaphore;
585586
};
@@ -588,12 +589,7 @@ class RegisteredMemory {
588589
class Connection {
589590
public:
590591
/// Constructor.
591-
/// @param context The context associated with the connection.
592-
/// @param localEndpoint The local endpoint of the connection.
593-
Connection(std::shared_ptr<Context> context, const Endpoint& localEndpoint);
594-
595-
/// Destructor.
596-
virtual ~Connection() = default;
592+
Connection() = default;
597593

598594
/// Write data from a source RegisteredMemory to a destination RegisteredMemory.
599595
///
@@ -602,28 +598,27 @@ class Connection {
602598
/// @param src The source RegisteredMemory.
603599
/// @param srcOffset The offset in bytes from the start of the source RegisteredMemory.
604600
/// @param size The number of bytes to write.
605-
virtual void write(RegisteredMemory dst, uint64_t dstOffset, RegisteredMemory src, uint64_t srcOffset,
606-
uint64_t size) = 0;
601+
void write(RegisteredMemory dst, uint64_t dstOffset, RegisteredMemory src, uint64_t srcOffset, uint64_t size);
607602

608603
/// Update an 8-byte value in a destination RegisteredMemory and synchronize the change with the remote process.
609604
///
610605
/// @param dst The destination RegisteredMemory.
611606
/// @param dstOffset The offset in bytes from the start of the destination RegisteredMemory.
612607
/// @param src A pointer to the value to update.
613608
/// @param newValue The new value to write.
614-
virtual void updateAndSync(RegisteredMemory dst, uint64_t dstOffset, uint64_t* src, uint64_t newValue) = 0;
609+
void updateAndSync(RegisteredMemory dst, uint64_t dstOffset, uint64_t* src, uint64_t newValue);
615610

616611
/// Flush any pending writes to the remote process.
617612
/// @param timeoutUsec Timeout in microseconds. Default: -1 (no timeout)
618-
virtual void flush(int64_t timeoutUsec = -1) = 0;
613+
void flush(int64_t timeoutUsec = -1);
619614

620615
/// Get the transport used by the local process.
621616
/// @return The transport used by the local process.
622-
virtual Transport transport() const = 0;
617+
Transport transport() const;
623618

624619
/// Get the transport used by the remote process.
625620
/// @return The transport used by the remote process.
626-
virtual Transport remoteTransport() const = 0;
621+
Transport remoteTransport() const;
627622

628623
/// Get the context associated with this connection.
629624
/// @return A shared pointer to the context associated with this connection.
@@ -637,22 +632,23 @@ class Connection {
637632
/// @return The maximum number of write requests that can be queued.
638633
int getMaxWriteQueueSize() const;
639634

640-
protected:
641-
static const Endpoint::Impl& getImpl(const Endpoint& endpoint);
642-
static const RegisteredMemory::Impl& getImpl(const RegisteredMemory& memory);
643-
static Context::Impl& getImpl(Context& context);
635+
private:
636+
Connection(std::shared_ptr<BaseConnection> impl);
637+
std::shared_ptr<BaseConnection> impl_;
644638

645-
std::shared_ptr<Context> context_;
646-
Endpoint localEndpoint_;
647-
int maxWriteQueueSize_;
639+
friend class Context;
640+
friend class Communicator;
641+
friend class SemaphoreStub;
642+
friend class Semaphore;
643+
friend class ProxyService;
648644
};
649645

650646
/// SemaphoreStub object only used for constructing Semaphore, not for direct use by the user.
651647
class SemaphoreStub {
652648
public:
653649
/// Constructor.
654-
/// @param connection A shared pointer to the connection associated with this semaphore.
655-
SemaphoreStub(std::shared_ptr<Connection> connection);
650+
/// @param connection The connection associated with this semaphore.
651+
SemaphoreStub(const Connection& connection);
656652

657653
/// Get the memory associated with this semaphore.
658654
/// @return A reference to the registered memory for this semaphore.
@@ -687,8 +683,8 @@ class Semaphore {
687683
Semaphore(const SemaphoreStub& localStub, const SemaphoreStub& remoteStub);
688684

689685
/// Get the connection associated with this semaphore.
690-
/// @return A shared pointer to the connection.
691-
std::shared_ptr<Connection> connection() const;
686+
/// @return The connection.
687+
Connection& connection();
692688

693689
/// Get the local memory associated with this semaphore.
694690
/// @return A reference to the local registered memory.
@@ -874,34 +870,23 @@ class Communicator {
874870
/// @param localEndpoint The local endpoint.
875871
/// @param remoteRank The rank of the remote process.
876872
/// @param tag The tag to use for identifying the send and receive.
877-
/// @return A future of shared pointer to the connection.
873+
/// @return A future of the connection.
878874
///
879-
std::shared_future<std::shared_ptr<Connection>> connect(const Endpoint& localEndpoint, int remoteRank, int tag = 0);
875+
std::shared_future<Connection> connect(const Endpoint& localEndpoint, int remoteRank, int tag = 0);
880876

881877
/// Connect to a remote rank. Wrapper of `connect(localEndpoint, remoteRank, tag)`.
882878
/// @param localConfig The configuration for the local endpoint.
883879
/// @param remoteRank The rank of the remote process.
884880
/// @param tag The tag to use for identifying the send and receive.
885-
/// @return A future of shared pointer to the connection.
886-
std::shared_future<std::shared_ptr<Connection>> connect(const EndpointConfig& localConfig, int remoteRank,
887-
int tag = 0);
888-
889-
[[deprecated("Use connect(localConfig, remoteRank, tag) instead. This will be removed in a future release.")]] std::
890-
shared_future<std::shared_ptr<Connection>>
891-
connect(int remoteRank, int tag, EndpointConfig localConfig);
892-
893-
[[deprecated("Use connect() instead. This will be removed in a future release.")]] NonblockingFuture<
894-
std::shared_ptr<Connection>>
895-
connectOnSetup(int remoteRank, int tag, EndpointConfig localConfig) {
896-
return connect(localConfig, remoteRank, tag);
897-
}
881+
/// @return A future of the connection.
882+
std::shared_future<Connection> connect(const EndpointConfig& localConfig, int remoteRank, int tag = 0);
898883

899884
/// Build a semaphore for cross-process synchronization.
900885
/// @param connection The connection associated with this semaphore.
901886
/// @param remoteRank The rank of the remote process.
902887
/// @param tag The tag to use for identifying the operation.
903888
/// @return A future of the built semaphore.
904-
std::shared_future<Semaphore> buildSemaphore(std::shared_ptr<Connection> connection, int remoteRank, int tag = 0);
889+
std::shared_future<Semaphore> buildSemaphore(const Connection& connection, int remoteRank, int tag = 0);
905890

906891
/// Get the remote rank a connection is connected to.
907892
///

0 commit comments

Comments
 (0)