Skip to content

Commit 8fb2eaa

Browse files
committed
prov/shm: add unexpected message buffering on demand when needed
In order to support unlimited unexpected messaging, add a flag SMR_BUFFER_RECV for the sender to let the receiver know that resources are limited and the whole message should get buffered on the target. This allows the command to be immediately returned to the sender so that the sender is never blocked due to unexpected messages at the target. Buffering unexpected messages hurts performance so the default is to wait until only a single command is left before requesting buffering, but an environment variable is also added to toggle this for either debugging purposes or workarounds. Signed-off-by: Alexia Ingerson <alexia.ingerson@intel.com>
1 parent e48c9a9 commit 8fb2eaa

File tree

5 files changed

+234
-71
lines changed

5 files changed

+234
-71
lines changed

man/fi_shm.7.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,14 @@ The *shm* provider checks for the following environment variables:
176176
chunks. This environment variable is provided to fine tune performance
177177
on different systems. Default 262144
178178

179+
*FI_SHM_BUFFER_THRESHOLD*
180+
: When to start requesting forced unexpected messaging buffering. When this
181+
threshold is reached, the sender will notify the receiver to force buffering
182+
of the entire message if it is unexpected. If the message is matched when
183+
received, it has no effect. Requesting unexpected message buffering allows
184+
shm to support unlimited unexpected messaging (memory permitting).
185+
Default: 1
186+
179187
# SEE ALSO
180188

181189
[`fabric`(7)](fabric.7.html),

prov/shm/src/smr_ep.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,10 @@ static ssize_t smr_do_inject(struct smr_ep *ep, struct smr_region *peer_smr,
478478
smr_generic_format(cmd, tx_id, rx_id, op, tag, data, op_flags);
479479
smr_format_inject(ep, cmd, pend);
480480

481+
if (smr_freestack_avail(smr_cmd_stack(ep->region)) <=
482+
smr_env.buffer_threshold)
483+
cmd->hdr.op_flags |= SMR_BUFFER_RECV;
484+
481485
return FI_SUCCESS;
482486
}
483487

@@ -499,6 +503,10 @@ static ssize_t smr_do_iov(struct smr_ep *ep, struct smr_region *peer_smr,
499503
smr_generic_format(cmd, tx_id, rx_id, op, tag, data, op_flags);
500504
smr_format_iov(cmd, pend);
501505

506+
if (smr_freestack_avail(smr_cmd_stack(ep->region)) <=
507+
smr_env.buffer_threshold)
508+
cmd->hdr.op_flags |= SMR_BUFFER_RECV;
509+
502510
return FI_SUCCESS;
503511
}
504512

@@ -566,6 +574,10 @@ static ssize_t smr_do_ipc(struct smr_ep *ep, struct smr_region *peer_smr,
566574

567575
smr_format_tx_pend(pend, cmd, context, desc, iov, iov_count, op_flags);
568576

577+
if (smr_freestack_avail(smr_cmd_stack(ep->region)) <=
578+
smr_env.buffer_threshold)
579+
cmd->hdr.op_flags |= SMR_BUFFER_RECV;
580+
569581
return FI_SUCCESS;
570582
}
571583

prov/shm/src/smr_init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct smr_env smr_env = {
4242
.use_dsa_sar = false,
4343
.max_gdrcopy_size = 3072,
4444
.use_xpmem = false,
45+
.buffer_threshold = 1,
4546
};
4647

4748
static void smr_init_env(void)
@@ -51,6 +52,8 @@ static void smr_init_env(void)
5152
fi_param_get_bool(&smr_prov, "disable_cma", &smr_env.disable_cma);
5253
fi_param_get_bool(&smr_prov, "use_dsa_sar", &smr_env.use_dsa_sar);
5354
fi_param_get_bool(&smr_prov, "use_xpmem", &smr_env.use_xpmem);
55+
fi_param_get_size_t(&smr_prov, "buffer_threshold",
56+
&smr_env.buffer_threshold);
5457
}
5558

5659
static void smr_resolve_addr(const char *node, const char *service,
@@ -216,6 +219,9 @@ SHM_INI
216219
fi_param_define(&smr_prov, "use_xpmem", FI_PARAM_BOOL,
217220
"Enable XPMEM over CMA when possible "
218221
"(default: false)");
222+
fi_param_define(&smr_prov, "buffer_threshold", FI_PARAM_SIZE_T,
223+
"When to start requesting forced unexpected messaging "
224+
"buffering. (default: 1)");
219225

220226
smr_init_env();
221227

0 commit comments

Comments
 (0)