Skip to content

Commit 0b771b6

Browse files
committed
Replace sleeping with condition variables
1 parent 175e6b1 commit 0b771b6

File tree

1 file changed

+23
-36
lines changed

1 file changed

+23
-36
lines changed

source/adios2/toolkit/sst/dp/rdma_dp.c

+23-36
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,11 @@ struct cq_event_list
132132
struct cq_manual_progress
133133
{
134134
struct fid_cq *cq_signal;
135-
pthread_mutex_t cq_event_list_mutex;
135+
136136
struct cq_event_list *cq_event_list;
137+
pthread_mutex_t cq_event_list_mutex;
138+
pthread_cond_t cq_even_list_signal;
139+
char cq_event_list_filled;
137140

138141
CP_Services Svcs;
139142
void *Stream;
@@ -156,46 +159,43 @@ void cq_manual_progress_push(struct cq_manual_progress *self, struct cq_event_li
156159
}
157160
head->next = item;
158161
}
162+
self->cq_event_list_filled = 1;
159163
pthread_mutex_unlock(&self->cq_event_list_mutex);
164+
pthread_cond_signal(&self->cq_even_list_signal);
160165
}
161166

162167
struct fi_cq_data_entry *cq_manual_progress_pop(struct cq_manual_progress *self)
163168
{
164169
struct fi_cq_data_entry *res;
165170
pthread_mutex_lock(&self->cq_event_list_mutex);
166-
if (!self->cq_event_list)
171+
while (!self->cq_event_list_filled)
167172
{
168-
res = NULL;
169-
}
170-
else
171-
{
172-
struct cq_event_list *head = self->cq_event_list;
173-
res = head->value;
174-
self->cq_event_list = head->next;
175-
free(head);
173+
pthread_cond_wait(&self->cq_even_list_signal, &self->cq_event_list_mutex);
176174
}
175+
assert(self->cq_event_list);
176+
struct cq_event_list *head = self->cq_event_list;
177+
res = head->value;
178+
self->cq_event_list = head->next;
179+
self->cq_event_list_filled = self->cq_event_list ? 1 : 0;
177180
pthread_mutex_unlock(&self->cq_event_list_mutex);
181+
free(head);
178182
return res;
179183
}
180184

181-
#define SST_BACKOFF_SECONDS_MAX 5
182-
183185
static void *make_progress(void *params_)
184186
{
185187
struct cq_manual_progress *params = (struct cq_manual_progress *)params_;
186188
size_t const batch_size = 100;
187-
struct fi_cq_data_entry *CQEntries = malloc(batch_size * sizeof(struct fi_cq_data_entry));
189+
struct fi_cq_data_entry CQEntries[batch_size];
188190

189191
while (params->do_continue)
190192
{
191193
/*
192194
* The main purpose of this worker thread is to make repeated blocking calls to the blocking
193-
* fi_cq_sread() with a timeout of 5 seconds. Some providers don't make progress in a timely
194-
* fashion otherwise (e.g. shm).
195+
* fi_cq_sread(). Some providers don't make progress in a timely fashion otherwise (e.g.
196+
* shm).
195197
*/
196-
printf("Going into fi_cq_sread()\n");
197198
ssize_t rc = fi_cq_sread(params->cq_signal, (void *)CQEntries, batch_size, NULL, -1);
198-
printf("fi_cq_sread()=%ld\n", rc);
199199
if (rc < 1)
200200
{
201201
struct fi_cq_err_entry error = {.err = 0};
@@ -222,8 +222,6 @@ static void *make_progress(void *params_)
222222
}
223223
}
224224
}
225-
free(CQEntries);
226-
printf("Returning from thread\n");
227225
return NULL;
228226
}
229227

@@ -259,22 +257,10 @@ struct fabric_state
259257
void cq_read(struct fabric_state *fabric, struct fi_cq_data_entry *CQEntry)
260258
{
261259
unsigned int current_backoff_seconds = 0;
262-
while (1)
263-
{
264-
struct fi_cq_data_entry *res = cq_manual_progress_pop(fabric->cq_manual_progress);
265-
if (res == NULL)
266-
{
267-
sleep(current_backoff_seconds);
268-
if(current_backoff_seconds < SST_BACKOFF_SECONDS_MAX)
269-
{
270-
++current_backoff_seconds;
271-
}
272-
continue;
273-
}
274-
memcpy(CQEntry, res, sizeof(struct fi_cq_data_entry));
275-
free(res);
276-
return;
277-
}
260+
struct fi_cq_data_entry *res = cq_manual_progress_pop(fabric->cq_manual_progress);
261+
memcpy(CQEntry, res, sizeof(struct fi_cq_data_entry));
262+
free(res);
263+
return;
278264
}
279265

280266
/*
@@ -670,9 +656,11 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
670656
return;
671657
}
672658
manual_progress->cq_event_list = NULL;
659+
manual_progress->cq_event_list_filled = 0;
673660
manual_progress->Svcs = Svcs;
674661
manual_progress->Stream = CP_Stream;
675662
manual_progress->do_continue = 1;
663+
pthread_cond_init(&manual_progress->cq_even_list_signal, NULL);
676664

677665
fabric->cq_manual_progress = manual_progress;
678666

@@ -1559,7 +1547,6 @@ static DP_WSR_Stream RdmaInitWriterPerReader(CP_Services Svcs, DP_WS_Stream WS_S
15591547
&WSR_Stream->rrmr, Fabric->ctx, Fabric->signal,
15601548
Fabric->info->domain_attr->mr_mode);
15611549
ReaderRollHandle->Key = fi_mr_key(WSR_Stream->rrmr);
1562-
printf("Key: %lu\n", ReaderRollHandle->Key);
15631550

15641551
WSR_Stream->WriterContactInfo = ContactInfo;
15651552

0 commit comments

Comments
 (0)