@@ -95,29 +95,40 @@ extern "C" {
95
95
#define OFI_CACHE_LINE_SIZE (64)
96
96
97
97
typedef void (* ofi_aq_init_fn )(void * );
98
+ enum {
99
+ OFI_AQ_FREE = 0 ,
100
+ OFI_AQ_READY ,
101
+ OFI_AQ_NOOP ,
102
+ };
103
+
98
104
/*
99
105
* Base address of atomic queue must be cache line aligned to maximize atomic
100
106
* value perforamnce benefits
101
107
*/
102
108
#define OFI_DECLARE_ATOMIC_Q (entrytype , name ) \
103
109
struct name ## _entry { \
104
- ofi_atomic64_t seq; \
105
- bool noop; \
110
+ ofi_atomic64_t state; \
106
111
entrytype buf; \
107
112
} __attribute__((__aligned__(64))); \
108
113
\
109
114
struct name { \
110
115
ofi_atomic64_t write_pos; \
111
116
uint8_t pad0[OFI_CACHE_LINE_SIZE - \
112
117
sizeof(ofi_atomic64_t)]; \
113
- ofi_atomic64_t read_pos; \
118
+ int64_t read_pos; \
114
119
ofi_aq_init_fn init_fn; \
115
120
uint8_t pad1[OFI_CACHE_LINE_SIZE - \
116
- (sizeof(ofi_atomic64_t ) + \
121
+ (sizeof(int64_t ) + \
117
122
sizeof(ofi_aq_init_fn))]; \
123
+ ofi_atomic64_t claim_avail; \
124
+ uint8_t pad2[OFI_CACHE_LINE_SIZE - \
125
+ sizeof(ofi_atomic64_t)]; \
126
+ ofi_atomic64_t discard_avail; \
127
+ uint8_t pad3[OFI_CACHE_LINE_SIZE - \
128
+ sizeof(ofi_atomic64_t)]; \
118
129
int size; \
119
130
int size_mask; \
120
- uint8_t pad2 [OFI_CACHE_LINE_SIZE - \
131
+ uint8_t pad4 [OFI_CACHE_LINE_SIZE - \
121
132
(sizeof(int) * 2)]; \
122
133
struct name ## _entry entry[]; \
123
134
} __attribute__((__aligned__(64))); \
@@ -132,12 +143,13 @@ static inline void name ## _init(struct name *aq, size_t size, \
132
143
aq->size_mask = aq->size - 1; \
133
144
aq->init_fn = init_fn; \
134
145
ofi_atomic_initialize64(&aq->write_pos, 0); \
135
- ofi_atomic_initialize64(&aq->read_pos, 0); \
146
+ aq->read_pos = 0; \
147
+ ofi_atomic_initialize64(&aq->discard_avail, 0); \
148
+ ofi_atomic_initialize64(&aq->claim_avail, size); \
136
149
for (i = 0; i < size; i++) { \
137
- ofi_atomic_initialize64(&aq->entry[i].seq, i); \
138
150
if (aq->init_fn) \
139
151
aq->init_fn(&aq->entry[i].buf); \
140
- aq->entry[i].noop = false; \
152
+ ofi_atomic_initialize64(& aq->entry[i].state, OFI_AQ_FREE); \
141
153
} \
142
154
} \
143
155
\
@@ -158,96 +170,106 @@ static inline void name ## _free(struct name *aq) \
158
170
{ \
159
171
free(aq); \
160
172
} \
161
- static inline int name ## _next(struct name *aq, \
162
- entrytype **buf, int64_t *pos) \
173
+ static inline bool name ## _claim(struct name *aq) \
163
174
{ \
164
- struct name ## _entry *ce; \
165
- int64_t diff, seq; \
166
- *pos = ofi_atomic_load_explicit64(&aq->write_pos, \
167
- memory_order_relaxed); \
168
- for (;;) { \
169
- ce = &aq->entry[*pos & aq->size_mask]; \
170
- seq = ofi_atomic_load_explicit64(&(ce->seq), \
171
- memory_order_acquire); \
172
- diff = seq - *pos; \
173
- if (diff == 0) { \
174
- if (ofi_atomic_compare_exchange_weak64( \
175
- &aq->write_pos, pos, \
176
- *pos + 1)) \
177
- break; \
178
- } else if (diff < 0) { \
179
- return -FI_ENOENT; \
180
- } else { \
181
- *pos = ofi_atomic_load_explicit64( \
182
- &aq->write_pos, \
183
- memory_order_relaxed); \
184
- } \
175
+ int64_t avail, discard_avail; \
176
+ avail = ofi_atomic_sub_explicit64(&aq->claim_avail, 1, \
177
+ memory_order_relaxed);\
178
+ if (avail > 0) \
179
+ return true; \
180
+ \
181
+ discard_avail = ofi_atomic_load_explicit64( \
182
+ &aq->discard_avail, \
183
+ memory_order_acquire); \
184
+ if (discard_avail) { \
185
+ if (!ofi_atomic_compare_exchange_weak64( \
186
+ &aq->discard_avail, \
187
+ &discard_avail, 0)) \
188
+ goto out; \
189
+ ofi_atomic_add_explicit64(&aq->claim_avail, \
190
+ discard_avail, \
191
+ memory_order_relaxed);\
192
+ } \
193
+ out: \
194
+ ofi_atomic_add_explicit64(&aq->claim_avail, 1, \
195
+ memory_order_relaxed); \
196
+ return false; \
197
+ \
198
+ } \
199
+ static inline entrytype *name ## _assign(struct name *aq) \
200
+ { \
201
+ int64_t pos; \
202
+ while (1) { \
203
+ pos = ofi_atomic_load_explicit64( \
204
+ &aq->write_pos, \
205
+ memory_order_acquire); \
206
+ if (ofi_atomic_compare_exchange_weak64( \
207
+ &aq->write_pos, &pos, \
208
+ pos + 1)) \
209
+ break; \
185
210
} \
186
- *buf = &ce->buf; \
187
- return FI_SUCCESS; \
211
+ return &aq->entry[pos & aq->size_mask].buf; \
212
+ } \
213
+ static inline entrytype *name ## _claim_assign(struct name *aq) \
214
+ { \
215
+ if (name ## _claim(aq)) { \
216
+ return name ## _assign(aq); \
217
+ } \
218
+ return NULL; \
188
219
} \
189
220
static inline void name ## _release(struct name *aq, \
190
- entrytype *buf, \
191
- int64_t pos) \
221
+ entrytype *buf) \
192
222
{ \
223
+ int64_t state = OFI_AQ_FREE; \
193
224
struct name ## _entry *ce; \
194
225
ce = container_of(buf, struct name ## _entry, buf); \
195
226
if (aq->init_fn) \
196
227
aq->init_fn(&ce->buf); \
197
- ofi_atomic_store_explicit64(&ce->seq, \
198
- pos + aq->size, \
228
+ ofi_atomic_store_explicit64(&ce->state, state, \
199
229
memory_order_release); \
230
+ aq->read_pos++; \
231
+ } \
232
+ static inline void name ## _discard(struct name *aq) \
233
+ { \
234
+ ofi_atomic_add_explicit64(&aq->discard_avail, 1, \
235
+ memory_order_relaxed); \
200
236
} \
201
- static inline int name ## _head(struct name *aq, \
202
- entrytype **buf, int64_t *pos) \
237
+ static inline void name ## _release_discard(struct name *aq, \
238
+ entrytype *buf) \
239
+ { \
240
+ name ## _release(aq, buf); \
241
+ name ## _discard(aq); \
242
+ } \
243
+ static inline entrytype *name ## _head(struct name *aq) \
203
244
{ \
204
- int64_t diff, seq; \
205
245
struct name ## _entry *ce; \
246
+ int64_t state; \
206
247
again: \
207
- *pos = ofi_atomic_load_explicit64(&aq->read_pos, \
208
- memory_order_relaxed); \
209
- for (;;) { \
210
- ce = &aq->entry[*pos & aq->size_mask]; \
211
- seq = ofi_atomic_load_explicit64(&(ce->seq), \
212
- memory_order_acquire); \
213
- diff = seq - (*pos + 1); \
214
- if (diff == 0) { \
215
- if (ofi_atomic_compare_exchange_weak64( \
216
- &aq->read_pos, pos, \
217
- *pos + 1)) \
218
- break; \
219
- } else if (diff < 0) { \
220
- return -FI_ENOENT; \
221
- } else { \
222
- *pos = ofi_atomic_load_explicit64( \
223
- &aq->read_pos, \
224
- memory_order_relaxed); \
225
- } \
226
- } \
227
- *buf = &ce->buf; \
228
- if (ce->noop) { \
229
- ce->noop = false; \
230
- name ##_release(aq, *buf, *pos); \
248
+ ce = &aq->entry[aq->read_pos & aq->size_mask]; \
249
+ state = ofi_atomic_load_explicit64(&ce->state, \
250
+ memory_order_acquire);\
251
+ if (state == OFI_AQ_FREE) \
252
+ return NULL; \
253
+ if (state == OFI_AQ_NOOP) { \
254
+ name ## _release_discard(aq, &ce->buf); \
231
255
goto again; \
232
256
} \
233
- return FI_SUCCESS ; \
257
+ return &ce->buf ; \
234
258
} \
235
- static inline void name ## _commit(entrytype *buf, \
236
- int64_t pos) \
259
+ static inline void name ## _commit(entrytype *buf) \
237
260
{ \
238
261
struct name ## _entry *ce; \
262
+ int64_t state = OFI_AQ_READY; \
239
263
ce = container_of(buf, struct name ## _entry, buf); \
240
- ofi_atomic_store_explicit64(&ce->seq, pos + 1 , \
241
- memory_order_release); \
264
+ ofi_atomic_store_explicit64(&ce->state, state , \
265
+ memory_order_release); \
242
266
} \
243
- static inline void name ## _discard(entrytype *buf, \
244
- int64_t pos) \
267
+ static inline void name ## _cancel(entrytype *buf) \
245
268
{ \
246
269
struct name ## _entry *ce; \
247
270
ce = container_of(buf, struct name ## _entry, buf); \
248
- ce->noop = true; \
249
- ofi_atomic_store_explicit64(&ce->seq, pos + 1, \
250
- memory_order_release); \
271
+ ofi_atomic_store_explicit64(&ce->state, OFI_AQ_NOOP, \
272
+ memory_order_release); \
251
273
} \
252
274
void dummy ## name (void) /* work-around global ; scope */
253
275
0 commit comments