Skip to content

Commit 109f4bc

Browse files
committed
handle errors gracefuly to prevent SEGV
oob_allgather_test() do not check isend() call success, leading to the possibility to use oob_req->reqs[] un-initialized upon error and thus to SEGV. Signed-off-by: Bruno Faccini <[email protected]>
1 parent 01da1c4 commit 109f4bc

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

ompi/mca/coll/ucc/coll_ucc_module.c

+12-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2021 Mellanox Technologies. All rights reserved.
33
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
44
* All Rights reserved.
5-
* Copyright (c) 2022-2024 NVIDIA Corporation. All rights reserved.
5+
* Copyright (c) 2022-2025 NVIDIA Corporation. All rights reserved.
66
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
77
* $COPYRIGHT$
88
*
@@ -150,7 +150,7 @@ static ucc_status_t oob_allgather_test(void *req)
150150
size_t msglen = oob_req->msglen;
151151
int probe_count = 5;
152152
int rank, size, sendto, recvfrom, recvdatafrom,
153-
senddatafrom, completed, probe;
153+
senddatafrom, completed, probe, rc;
154154

155155
size = ompi_comm_size(comm);
156156
rank = ompi_comm_rank(comm);
@@ -175,10 +175,16 @@ static ucc_status_t oob_allgather_test(void *req)
175175
senddatafrom = (rank - oob_req->iter + size) % size;
176176
tmprecv = (char*)oob_req->rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)msglen;
177177
tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen;
178-
MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
178+
rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
179179
MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0]));
180-
MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
180+
if (OMPI_SUCCESS != rc) {
181+
return UCC_ERR_NO_MESSAGE;
182+
}
183+
rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
181184
MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1]));
185+
if (OMPI_SUCCESS != rc) {
186+
return UCC_ERR_NO_MESSAGE;
187+
}
182188
}
183189
probe = 0;
184190
do {
@@ -206,6 +212,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen,
206212
oob_req->msglen = msglen;
207213
oob_req->oob_coll_ctx = oob_coll_ctx;
208214
oob_req->iter = 0;
215+
oob_req->reqs[0] = MPI_REQUEST_NULL;
216+
oob_req->reqs[1] = MPI_REQUEST_NULL;
209217
*req = oob_req;
210218
return UCC_OK;
211219
}

0 commit comments

Comments
 (0)