-
Notifications
You must be signed in to change notification settings - Fork 128
TL/UCP: fix ep close timeout #1271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,20 +7,19 @@ | |
| #include "tl_ucp.h" | ||
| #include "tl_ucp_ep.h" | ||
|
|
||
| //NOLINTNEXTLINE | ||
| // NOLINTNEXTLINE | ||
| static void ucc_tl_ucp_err_handler(void *arg, ucp_ep_h ep, ucs_status_t status) | ||
| { | ||
| /* In case we don't have OOB barrier, errors are expected. | ||
| * This cb will suppress UCX from raising errors*/ | ||
| ; | ||
| } | ||
|
|
||
| static inline ucc_status_t ucc_tl_ucp_connect_ep(ucc_tl_ucp_context_t *ctx, | ||
| int is_service, ucp_ep_h *ep, | ||
| void *ucp_address) | ||
| static inline ucc_status_t ucc_tl_ucp_connect_ep( | ||
| ucc_tl_ucp_context_t *ctx, int is_service, ucp_ep_h *ep, void *ucp_address) | ||
| { | ||
| ucp_worker_h worker = | ||
| (is_service) ? ctx->service_worker.ucp_worker : ctx->worker.ucp_worker; | ||
| ucp_worker_h worker = (is_service) ? ctx->service_worker.ucp_worker | ||
| : ctx->worker.ucp_worker; | ||
| ucp_ep_params_t ep_params; | ||
| ucs_status_t status; | ||
| if (*ep) { | ||
|
|
@@ -34,28 +33,33 @@ static inline ucc_status_t ucc_tl_ucp_connect_ep(ucc_tl_ucp_context_t *ctx, | |
| ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER; | ||
| ep_params.err_handler.cb = ucc_tl_ucp_err_handler; | ||
| ep_params.err_handler.arg = NULL; | ||
| ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | | ||
| UCP_EP_PARAM_FIELD_ERR_HANDLER; | ||
| ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | | ||
| UCP_EP_PARAM_FIELD_ERR_HANDLER; | ||
| } | ||
| status = ucp_ep_create(worker, &ep_params, ep); | ||
|
|
||
| if (ucc_unlikely(UCS_OK != status)) { | ||
| tl_error(ctx->super.super.lib, "ucp returned connect error: %s", | ||
| ucs_status_string(status)); | ||
| tl_error( | ||
| ctx->super.super.lib, | ||
| "ucp returned connect error: %s", | ||
| ucs_status_string(status)); | ||
| return ucs_status_to_ucc_status(status); | ||
| } | ||
| return UCC_OK; | ||
| } | ||
|
|
||
| ucc_status_t ucc_tl_ucp_connect_team_ep(ucc_tl_ucp_team_t *team, | ||
| ucc_rank_t core_rank, ucp_ep_h *ep) | ||
| ucc_status_t ucc_tl_ucp_connect_team_ep( | ||
| ucc_tl_ucp_team_t *team, ucc_rank_t core_rank, ucp_ep_h *ep) | ||
| { | ||
| ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(team); | ||
| ucc_tl_ucp_context_t *ctx = UCC_TL_UCP_TEAM_CTX(team); | ||
| int use_service_worker = USE_SERVICE_WORKER(team); | ||
| void *addr; | ||
|
|
||
| addr = ucc_get_team_ep_addr(UCC_TL_CORE_CTX(team), UCC_TL_CORE_TEAM(team), | ||
| core_rank, ucc_tl_ucp.super.super.id); | ||
| addr = ucc_get_team_ep_addr( | ||
| UCC_TL_CORE_CTX(team), | ||
| UCC_TL_CORE_TEAM(team), | ||
| core_rank, | ||
| ucc_tl_ucp.super.super.id); | ||
| addr = use_service_worker ? TL_UCP_EP_ADDR_WORKER_SERVICE(addr) | ||
| : TL_UCP_EP_ADDR_WORKER(addr); | ||
|
|
||
|
|
@@ -65,8 +69,8 @@ ucc_status_t ucc_tl_ucp_connect_team_ep(ucc_tl_ucp_team_t *team, | |
| /* Finds next non-NULL ep in the storage and returns that handle | ||
| for closure. In case of "hash" storage it pops the item, | ||
| in case of "array" sets it to NULL */ | ||
| static inline ucp_ep_h get_next_ep_to_close(ucc_tl_ucp_worker_t * worker, | ||
| ucc_tl_ucp_context_t *ctx, int *i) | ||
| static inline ucp_ep_h get_next_ep_to_close( | ||
| ucc_tl_ucp_worker_t *worker, ucc_tl_ucp_context_t *ctx, int *i) | ||
| { | ||
| ucp_ep_h ep = NULL; | ||
| ucc_rank_t size; | ||
|
|
@@ -84,39 +88,82 @@ static inline ucp_ep_h get_next_ep_to_close(ucc_tl_ucp_worker_t * worker, | |
| return ep; | ||
| } | ||
|
|
||
| void ucc_tl_ucp_close_eps(ucc_tl_ucp_worker_t * worker, | ||
| ucc_tl_ucp_context_t *ctx) | ||
| void ucc_tl_ucp_close_eps( | ||
| ucc_tl_ucp_worker_t *worker, ucc_tl_ucp_context_t *ctx) | ||
| { | ||
| int i = 0; | ||
| ucp_ep_h ep; | ||
| ucs_status_t status; | ||
| ucs_status_ptr_t close_req; | ||
| ucp_request_param_t param; | ||
| int i = 0; | ||
| int n_reqs = 0; | ||
| int n_inflight; | ||
| int j; | ||
| ucp_ep_h ep; | ||
| ucs_status_t status; | ||
| ucs_status_ptr_t close_req; | ||
| ucp_request_param_t param; | ||
| size_t max_eps; | ||
| ucs_status_ptr_t *reqs; | ||
|
|
||
| param.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; | ||
| param.flags = 0; // 0 means FLUSH | ||
| ep = get_next_ep_to_close(worker, ctx, &i); | ||
| while (ep) { | ||
| close_req = ucp_ep_close_nbx(ep, ¶m); | ||
| max_eps = worker->eps | ||
| ? (size_t)ctx->super.super.ucc_context->params.oob.n_oob_eps | ||
| : kh_size(worker->ep_hash); | ||
|
Comment on lines
+110
to
+112
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When On very large jobs (e.g., 100k-process runs) where a single context closes only a handful of endpoints, this over-allocates a potentially large array during teardown — at precisely the time when memory pressure may already be elevated. Consider counting actual non-NULL entries in if (worker->eps) {
size_t n_oob = ctx->super.super.ucc_context->params.oob.n_oob_eps;
for (size_t k = 0; k < n_oob; k++) {
if (worker->eps[k]) max_eps++;
}
} else {
max_eps = kh_size(worker->ep_hash);
} |
||
| if (max_eps == 0) { | ||
| return; | ||
| } | ||
| reqs = (ucs_status_ptr_t *)ucc_calloc(max_eps, sizeof(*reqs), "close_reqs"); | ||
| if (!reqs) { | ||
| tl_error( | ||
| ctx->super.super.lib, "failed to allocate close requests array"); | ||
| return; | ||
| } | ||
ikryukov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /* Use graceful flush with OOB, force close otherwise */ | ||
| param.op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS; | ||
| param.flags = UCC_TL_CTX_HAS_OOB(ctx) ? 0 : UCP_EP_CLOSE_FLAG_FORCE; | ||
ikryukov marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ep = get_next_ep_to_close(worker, ctx, &i); | ||
| while (ep) { | ||
| close_req = ucp_ep_close_nbx(ep, ¶m); | ||
| if (UCS_PTR_IS_PTR(close_req)) { | ||
| reqs[n_reqs++] = close_req; | ||
| } else { | ||
| status = UCS_PTR_STATUS(close_req); | ||
| ucc_assert(status <= UCS_OK); | ||
| if (status != UCS_OK) { | ||
| tl_error( | ||
| ctx->super.super.lib, | ||
| "error during ucp ep close, ep %p, status %s", | ||
| ep, | ||
| ucs_status_string(status)); | ||
| } | ||
| } | ||
| ep = get_next_ep_to_close(worker, ctx, &i); | ||
| } | ||
|
|
||
| n_inflight = n_reqs; | ||
| while (n_inflight > 0) { | ||
| ucp_worker_progress(ctx->worker.ucp_worker); | ||
| if (ctx->cfg.service_worker != 0) { | ||
| ucp_worker_progress(ctx->service_worker.ucp_worker); | ||
| } | ||
| n_inflight = 0; | ||
| for (j = 0; j < n_reqs; j++) { | ||
| if (!reqs[j]) { | ||
| continue; | ||
| } | ||
| status = ucp_request_check_status(reqs[j]); | ||
| if (status != UCS_INPROGRESS) { | ||
| ucc_assert(status <= UCS_OK); | ||
| if (status != UCS_OK) { | ||
| tl_error( | ||
| ctx->super.super.lib, | ||
| "error during ucp ep close, status %s", | ||
| ucs_status_string(status)); | ||
| } | ||
| ucp_request_free(reqs[j]); | ||
| reqs[j] = NULL; | ||
| } else { | ||
| n_inflight++; | ||
| } | ||
| } | ||
| } | ||
ikryukov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (UCS_PTR_IS_PTR(close_req)) { | ||
| do { | ||
| ucp_worker_progress(ctx->worker.ucp_worker); | ||
| if (ctx->cfg.service_worker != 0) { | ||
| ucp_worker_progress(ctx->service_worker.ucp_worker); | ||
| } | ||
| status = ucp_request_check_status(close_req); | ||
| } while (status == UCS_INPROGRESS); | ||
| ucp_request_free(close_req); | ||
| } else { | ||
| status = UCS_PTR_STATUS(close_req); | ||
| } | ||
| ucc_assert(status <= UCS_OK); | ||
| if (status != UCS_OK) { | ||
| tl_error(ctx->super.super.lib, | ||
| "error during ucp ep close, ep %p, status %s", | ||
| ep, ucs_status_string(status)); | ||
| } | ||
| ep = get_next_ep_to_close(worker, ctx, &i); | ||
| } | ||
| ucc_free(reqs); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable is declared without an initializer. In the sequential-fallback path, if
timed_outis already 1 on entry to an iteration, the do/while wait-loop is skipped (line 135–150), sostatusis never assigned before the guard at line 156. While the guard at line 156if (!timed_out)prevents use of uninitializedstatus, static analyzers can still flag this as a potential issue. Initializing at declaration removes the ambiguity: