Skip to content

Commit 532e73b

Browse files
authored
Merge pull request #43 from zheng871026/huawei
Reduce the memory of the step structure
2 parents 2c0f0e9 + 2ac3e46 commit 532e73b

File tree

3 files changed

+28
-28
lines changed

3 files changed

+28
-28
lines changed

builtin/ops/builtin_cb.inl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ static int ucg_builtin_comp_recv_one_cb(ucg_builtin_request_t *req,
239239
static int ucg_builtin_comp_recv_noncontig_one_cb(ucg_builtin_request_t *req,
240240
uint64_t offset, void *data, size_t length)
241241
{
242-
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state.dt.generic.state,
242+
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state,
243243
offset, data, length);
244244
(void) ucg_builtin_comp_step_cb(req, NULL);
245245
return 1;
@@ -257,7 +257,7 @@ static int ucg_builtin_comp_recv_one_then_send_cb(ucg_builtin_request_t *req,
257257
static int ucg_builtin_comp_recv_noncontig_one_then_send_cb(ucg_builtin_request_t *req,
258258
uint64_t offset, void *data, size_t length)
259259
{
260-
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state.dt.generic.state,
260+
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state,
261261
offset, data, length);
262262
req->recv_comp = 1;
263263
(void) ucg_builtin_step_execute(req, NULL);
@@ -274,7 +274,7 @@ static int ucg_builtin_comp_recv_many_cb(ucg_builtin_request_t *req,
274274
static int ucg_builtin_comp_recv_noncontig_many_cb(ucg_builtin_request_t *req,
275275
uint64_t offset, void *data, size_t length)
276276
{
277-
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state.dt.generic.state,
277+
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state,
278278
offset, data, length);
279279
return ucg_builtin_comp_step_check_cb(req);
280280
}
@@ -289,7 +289,7 @@ static int ucg_builtin_comp_recv_many_then_send_pipe_cb(ucg_builtin_request_t *r
289289
static int ucg_builtin_comp_recv_noncontig_many_then_send_pipe_cb(ucg_builtin_request_t *req,
290290
uint64_t offset, void *data, size_t length)
291291
{
292-
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state.dt.generic.state,
292+
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state,
293293
offset, data, length);
294294
return ucg_builtin_comp_send_check_frag_cb(req, offset);
295295
}
@@ -307,7 +307,7 @@ static int ucg_builtin_comp_recv_many_then_send_cb(ucg_builtin_request_t *req,
307307
static int ucg_builtin_comp_recv_noncontig_many_then_send_cb(ucg_builtin_request_t *req,
308308
uint64_t offset, void *data, size_t length)
309309
{
310-
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state.dt.generic.state,
310+
req->op->recv_dt->ops.unpack(req->step->non_contig.unpack_state,
311311
offset, data, length);
312312
if (req->pending == 1) {
313313
req->recv_comp = 1;
@@ -348,8 +348,8 @@ UCS_PROFILE_FUNC(int, ucg_builtin_comp_reduce_full_cb, (req, offset, data, lengt
348348
char *tmp_buffer = NULL;
349349
char *netdata = (char *)req->step->phase->recv_cache_buffer;
350350
ucp_dt_generic_t *gen_dt = req->op->recv_dt;
351-
void *state_pack = req->step->non_contig.pack_state_recv.dt.generic.state;
352-
void *state_unpack = req->step->non_contig.unpack_state.dt.generic.state;
351+
void *state_pack = req->step->non_contig.pack_state_recv;
352+
void *state_unpack = req->step->non_contig.unpack_state;
353353
ucg_collective_params_t *params = &req->op->super.params;
354354
size_t dt_len = (gen_dt == NULL) ? params->recv.dt_len :
355355
ucg_builtin_get_dt_len(gen_dt);
@@ -720,21 +720,21 @@ ucg_builtin_init_state(ucg_builtin_op_step_t *step, int option,
720720
state_gen = dt_gen->ops.start_unpack(dt_gen->context, step->recv_buffer,
721721
params->recv.count);
722722

723-
step->non_contig.unpack_state.dt.generic.state = state_gen;
723+
step->non_contig.unpack_state = state_gen;
724724
break;
725725

726726
case 1:
727727
state_gen = dt_gen->ops.start_pack(dt_gen->context, step->send_buffer,
728728
params->send.count);
729729

730-
step->non_contig.pack_state.dt.generic.state = state_gen;
730+
step->non_contig.pack_state = state_gen;
731731
break;
732732

733733
case 2:
734734
state_gen = dt_gen->ops.start_pack(dt_gen->context, step->recv_buffer,
735735
params->recv.count);
736736

737-
step->non_contig.pack_state_recv.dt.generic.state = state_gen;
737+
step->non_contig.pack_state_recv = state_gen;
738738
break;
739739

740740
default:
@@ -756,15 +756,15 @@ ucg_builtin_finalize_state(ucg_builtin_op_step_t *step, int option,
756756

757757
switch (option) {
758758
case 0:
759-
dt_gen->ops.finish(step->non_contig.unpack_state.dt.generic.state);
759+
dt_gen->ops.finish(step->non_contig.unpack_state);
760760
break;
761761

762762
case 1:
763-
dt_gen->ops.finish(step->non_contig.pack_state.dt.generic.state);
763+
dt_gen->ops.finish(step->non_contig.pack_state);
764764
break;
765765

766766
case 2:
767-
dt_gen->ops.finish(step->non_contig.pack_state_recv.dt.generic.state);
767+
dt_gen->ops.finish(step->non_contig.pack_state_recv);
768768
break;
769769

770770
default:

builtin/ops/builtin_ops.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucg_builtin_step_am_short_one(ucg_builti
5151
ucs_debug("am_short_one step %u length %zu", step->am_header.step_idx, step->buffer_length);
5252

5353
int8_t *send_buffer = step->send_buffer;
54-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
54+
void *dt_state = step->non_contig.pack_state;
5555
if (dt_state != NULL) {
5656
req->op->send_dt->ops.pack(dt_state, 0, step->non_contig.contig_buffer, step->buffer_length);
5757
send_buffer = step->non_contig.contig_buffer;
@@ -68,7 +68,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucg_builtin_step_am_short_max(ucg_builti
6868
unsigned am_id = step->am_id;
6969
ucg_offset_t frag_size = step->fragment_length;
7070
int8_t *send_buffer = step->send_buffer;
71-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
71+
void *dt_state = step->non_contig.pack_state;
7272

7373
if (dt_state != NULL) {
7474
req->op->send_dt->ops.pack(dt_state, 0, step->non_contig.contig_buffer, step->buffer_length);
@@ -123,7 +123,7 @@ static size_t ucg_builtin_step_am_bcopy_single_frag_packer(void *dest, void *arg
123123
ucg_builtin_request_t *req = (ucg_builtin_request_t*)arg;
124124
ucg_builtin_op_step_t *step = req->step;
125125
ucg_builtin_header_t *header_ptr = (ucg_builtin_header_t*)dest;
126-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
126+
void *dt_state = step->non_contig.pack_state;
127127
header_ptr->header = step->am_header.header;
128128

129129
if (dt_state != NULL) {
@@ -139,7 +139,7 @@ static size_t ucg_builtin_step_am_bcopy_full_frag_packer(void *dest, void *arg)
139139
ucg_builtin_request_t *req = (ucg_builtin_request_t*)arg;
140140
ucg_builtin_op_step_t *step = req->step;
141141
ucg_builtin_header_t *header_ptr = (ucg_builtin_header_t*)dest;
142-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
142+
void *dt_state = step->non_contig.pack_state;
143143
header_ptr->header = step->am_header.header;
144144

145145
if (dt_state != NULL) {
@@ -156,7 +156,7 @@ static size_t ucg_builtin_step_am_bcopy_partial_frag_packer(void *dest, void *ar
156156
ucg_builtin_op_step_t *step = req->step;
157157
ucg_offset_t last_frag_length = step->buffer_length - step->iter_offset;
158158
ucg_builtin_header_t *header_ptr = (ucg_builtin_header_t*)dest;
159-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
159+
void *dt_state = step->non_contig.pack_state;
160160
header_ptr->header = step->am_header.header;
161161

162162
if (dt_state != NULL) {
@@ -237,7 +237,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucg_builtin_step_am_zcopy_one(ucg_builti
237237
uct_ep_h ep, int is_single_send)
238238
{
239239
int8_t *send_buffer = step->send_buffer;
240-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
240+
void *dt_state = step->non_contig.pack_state;
241241

242242
if (dt_state != NULL) {
243243
req->op->send_dt->ops.pack(dt_state, 0, step->non_contig.contig_buffer, step->buffer_length);
@@ -272,7 +272,7 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucg_builtin_step_am_zcopy_max(ucg_builti
272272
step->am_header.remote_offset = (is_single_send) ? step->iter_offset :
273273
step->am_header.remote_offset;
274274
int8_t *send_buffer = step->send_buffer;
275-
void *dt_state = step->non_contig.pack_state.dt.generic.state;
275+
void *dt_state = step->non_contig.pack_state;
276276
if (dt_state != NULL) {
277277
req->op->send_dt->ops.pack(dt_state, 0, step->non_contig.contig_buffer, step->buffer_length);
278278
send_buffer = step->non_contig.contig_buffer;
@@ -1069,9 +1069,9 @@ ucs_status_t ucg_builtin_step_create(ucg_builtin_plan_phase_t *phase,
10691069
step->send_cb = NULL;
10701070

10711071
step->non_contig.contig_buffer = NULL;
1072-
step->non_contig.pack_state.dt.generic.state = NULL;
1073-
step->non_contig.unpack_state.dt.generic.state = NULL;
1074-
step->non_contig.pack_state_recv.dt.generic.state = NULL;
1072+
step->non_contig.pack_state = NULL;
1073+
step->non_contig.unpack_state = NULL;
1074+
step->non_contig.pack_state_recv = NULL;
10751075

10761076
/* special parameter of buffer length should be set for allgather with bruck plan */
10771077
if (phase->method == UCG_PLAN_METHOD_ALLGATHER_BRUCK) {
@@ -1341,8 +1341,8 @@ void ucg_builtin_swap_net_recv(char *netdata, size_t length, size_t offset,
13411341
{
13421342
ucg_builtin_op_step_t *step = req->step;
13431343
ucp_dt_generic_t *gen_dt = req->op->recv_dt;
1344-
void *state_pack = step->non_contig.pack_state_recv.dt.generic.state;
1345-
void *state_unpack = step->non_contig.unpack_state.dt.generic.state;
1344+
void *state_pack = step->non_contig.pack_state_recv;
1345+
void *state_unpack = step->non_contig.unpack_state;
13461346
char *recv_buffer = (char *)step->recv_buffer;
13471347
char *tmp_buffer = NULL;
13481348

builtin/ops/builtin_ops.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ typedef struct ucg_builtin_op_step {
138138
/* Fields intended for non-contig datatypes */
139139
struct {
140140
int8_t *contig_buffer;
141-
ucp_dt_state_t pack_state;
142-
ucp_dt_state_t unpack_state;
143-
ucp_dt_state_t pack_state_recv;
141+
void *pack_state;
142+
void *unpack_state;
143+
void *pack_state_recv;
144144
} non_contig;
145145

146146
/* Fields intended for zero-copy */

0 commit comments

Comments
 (0)