Skip to content

Commit 065eb29

Browse files
committed
[busyclosure] Rename ReadyDeque to BusyClosure and update struct members and functions to better reflect the semantics of this new structure, as storing pointers to the busy closures that workers are currently working on.
1 parent 3e59e69 commit 065eb29

File tree

8 files changed

+246
-298
lines changed

8 files changed

+246
-298
lines changed

runtime/busyclosure.h

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#ifndef _BUSYCLOSURE_H
2+
#define _BUSYCLOSURE_H
3+
4+
#include "closure.h"
5+
#include "debug.h"
6+
#include "global.h"
7+
#include "rts-config.h"
8+
#include "worker_coord.h"
9+
#include <atomic>
10+
11+
struct alignas(CILK_CACHE_LINE) BusyClosure {
12+
Closure *closure;
13+
alignas(CILK_CACHE_LINE) std::atomic<worker_id> mutex_owner;
14+
15+
/*********************************************************
16+
* Management of BusyClosure
17+
*********************************************************/
18+
19+
void assert_ownership([[maybe_unused]] worker_id self) {
20+
CILK_ASSERT(mutex_owner.load(std::memory_order_relaxed) == self);
21+
}
22+
23+
static void assert_ownership(BusyClosure *busy, worker_id self,
24+
worker_id pn) {
25+
busy[pn].assert_ownership(self);
26+
}
27+
28+
void lock(worker_id id) {
29+
while (true) {
30+
worker_id current_owner =
31+
mutex_owner.load(std::memory_order_relaxed);
32+
if ((current_owner == NO_WORKER) &&
33+
mutex_owner.compare_exchange_weak(current_owner, id,
34+
std::memory_order_acq_rel,
35+
std::memory_order_relaxed))
36+
return;
37+
busy_loop_pause();
38+
}
39+
}
40+
41+
static void lock(BusyClosure *busy, worker_id self, worker_id pn) {
42+
busy[pn].lock(self);
43+
}
44+
45+
static void lock_self(BusyClosure *busy, worker_id self) {
46+
busy[self].lock(self);
47+
}
48+
static void unlock_self(BusyClosure *busy, worker_id self) {
49+
worker_id id = self;
50+
busy[id].mutex_owner.store(NO_WORKER, std::memory_order_release);
51+
}
52+
53+
bool trylock(worker_id id) {
54+
worker_id current_owner = mutex_owner.load(std::memory_order_relaxed);
55+
if (current_owner == NO_WORKER)
56+
return mutex_owner.compare_exchange_weak(current_owner, id,
57+
std::memory_order_acq_rel,
58+
std::memory_order_relaxed);
59+
return false;
60+
}
61+
62+
static bool trylock(BusyClosure *busy, worker_id self, worker_id pn) {
63+
return busy[pn].trylock(self);
64+
}
65+
66+
void unlock([[maybe_unused]] worker_id self) {
67+
mutex_owner.store(NO_WORKER, std::memory_order_release);
68+
}
69+
70+
static void unlock(BusyClosure *busy, worker_id self, worker_id pn) {
71+
busy[pn].unlock(self);
72+
}
73+
74+
/*
75+
* Add/remove the busy closure
76+
*
77+
* The precondition of these functions is that the worker self
78+
* must have locked worker pn's busy closure before entering the function.
79+
*/
80+
81+
static Closure *xtract(BusyClosure *busy, worker_id self, worker_id pn) {
82+
return busy[pn].xtract(self);
83+
}
84+
85+
Closure *xtract(worker_id self) {
86+
// make sure w has the lock on worker pn's busy closure
87+
assert_ownership(self);
88+
89+
if (Closure *cl = closure) {
90+
closure = nullptr;
91+
cl->owner = NO_WORKER;
92+
return cl;
93+
}
94+
return nullptr;
95+
}
96+
97+
Closure *peek(worker_id self) {
98+
// make sure w has the lock on worker pn's busy closure
99+
assert_ownership(self);
100+
// returns the closure but does not unlink
101+
return closure;
102+
}
103+
104+
static Closure *peek(BusyClosure *busy, worker_id self, worker_id pn) {
105+
return busy[pn].peek(self);
106+
}
107+
108+
/*
109+
* This allows self to make Closure cl worker pn's busy closure.
110+
*/
111+
void set(Closure *cl, worker_id self, worker_id pn) {
112+
assert_ownership(self);
113+
114+
CILK_ASSERT(cl->owner == NO_WORKER);
115+
CILK_ASSERT_NULL(closure);
116+
closure = cl;
117+
cl->owner = pn;
118+
}
119+
120+
static void set(BusyClosure *busy, Closure *cl, worker_id self,
121+
worker_id pn) {
122+
return busy[pn].set(cl, self, pn);
123+
}
124+
};
125+
126+
#endif

runtime/closure.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#include "cilk-internal.h"
55
#include "local-hypertable.h"
6+
#include "rts-config.h"
67
#include <atomic>
78

8-
// Forward declaration
9-
typedef struct Closure Closure;
9+
// Forward declarations
10+
struct Closure;
11+
struct BusyClosure;
1012

1113
enum ClosureStatus : unsigned char {
1214
/* Closure.status == 0 is invalid */
@@ -37,7 +39,7 @@ struct __attribute__((visibility("hidden"))) Closure {
3739
struct cilk_fiber *ext_fiber = nullptr;
3840
struct cilk_fiber *ext_fiber_child = nullptr;
3941

40-
worker_id owner_ready_deque = NO_WORKER; /* debug only */
42+
worker_id owner = NO_WORKER; /* debug only */
4143

4244
enum ClosureStatus status = CLOSURE_PRE_INVALID;
4345
bool exception_pending = false;
@@ -92,10 +94,9 @@ struct __attribute__((visibility("hidden"))) Closure {
9294
static void destroy(Closure *, struct global_state *);
9395

9496
// This method is used for sync.
95-
void suspend(struct ReadyDeque *deques, worker_id self);
97+
void suspend(BusyClosure *busy, worker_id self);
9698
// This method is used for steal.
97-
void suspend_victim(struct ReadyDeque *deques, worker_id thief,
98-
worker_id victim);
99+
void suspend_victim(BusyClosure *busy, worker_id thief, worker_id victim);
99100

100101
void add_callee(Closure *new_callee);
101102
void remove_callee();

runtime/global.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
#ifdef __FreeBSD__
88
#include <pthread_np.h>
99
#endif
10+
#include "busyclosure.h"
1011
#include "debug.h"
1112
#include "global.h"
12-
#include "readydeque.h"
13+
#include "local.h"
1314
#include <cstdio>
1415
#include <cstring>
1516
#include <sched.h>
@@ -177,8 +178,8 @@ global_state *global_state_init(int argc, char *argv[]) {
177178
(struct worker_args *)calloc(active_size, sizeof(struct worker_args));
178179
g->workers =
179180
(__cilkrts_worker **)calloc(active_size, sizeof(__cilkrts_worker *));
180-
g->deques = (ReadyDeque *)cilk_aligned_alloc(
181-
__alignof__(ReadyDeque), active_size * sizeof(ReadyDeque));
181+
g->busy = (BusyClosure *)cilk_aligned_alloc(
182+
__alignof__(BusyClosure), active_size * sizeof(BusyClosure));
182183
g->threads = new std::thread[active_size];
183184
g->index_to_worker = (worker_id *)calloc(active_size, sizeof(worker_id));
184185
g->worker_to_index = (worker_id *)calloc(active_size, sizeof(worker_id));

runtime/global.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ extern unsigned __cilkrts_nproc;
1919

2020
struct __cilkrts_worker;
2121
struct Closure;
22+
struct BusyClosure;
2223

2324
// clang-format off
2425
#define DEFAULT_OPTIONS \
@@ -64,8 +65,8 @@ struct CHEETAH_INTERNAL global_state {
6465
unsigned int nworkers; /* size of next 4 arrays */
6566
struct worker_args *worker_args;
6667
struct __cilkrts_worker **workers;
67-
/* dynamically-allocated array of deques, one per processor */
68-
struct ReadyDeque *deques;
68+
/* dynamically-allocated array of busy closures, one per processor */
69+
BusyClosure *busy;
6970
std::thread *threads;
7071
struct Closure *root_closure;
7172

runtime/init.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
#endif
1717
#include <unistd.h>
1818

19+
#include "busyclosure.h"
1920
#include "cilk-internal.h"
2021
#include "debug.h"
2122
#include "fiber.h"
2223
#include "global.h"
2324
#include "init.h"
2425
#include "local.h"
25-
#include "readydeque.h"
2626
#include "sched_stats.h"
2727
#include "scheduler.h"
2828

@@ -57,11 +57,11 @@ static void worker_local_destroy(local_state *l, global_state *g) {
5757
/* currently nothing to do here */
5858
}
5959

60-
static void deques_init(global_state *g) {
61-
cilkrts_alert(BOOT, "(deques_init) Initializing deques");
60+
static void busy_init(global_state *g) {
61+
cilkrts_alert(BOOT, "(busy_init) Initializing busy closures");
6262
for (unsigned int i = 0; i < g->options.nproc; i++) {
63-
g->deques[i].bottom = nullptr;
64-
g->deques[i].mutex_owner = NO_WORKER;
63+
g->busy[i].closure = nullptr;
64+
g->busy[i].mutex_owner = NO_WORKER;
6565
}
6666
}
6767

@@ -363,7 +363,7 @@ global_state *__cilkrts_startup(int argc, char *argv[]) {
363363
cilkrts_alert(BOOT, "(__cilkrts_startup) argc %d", argc);
364364
global_state *g = global_state_init(argc, argv);
365365
workers_init(g);
366-
deques_init(g);
366+
busy_init(g);
367367

368368
// Create the root closure and a fiber to go with it. Use worker 0 to
369369
// allocate the closure and fiber.
@@ -560,7 +560,7 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g,
560560

561561
worker_id self = w->self;
562562
const bool is_boss = (0 == self);
563-
ReadyDeque *deques = g->deques;
563+
BusyClosure *busy = g->busy;
564564

565565
// Mark the computation as done. Also "sleep" the workers: update global
566566
// flags so workers who exit the work-stealing loop will return to waiting
@@ -579,15 +579,15 @@ void __cilkrts_internal_exit_cilkified_root(global_state *g,
579579
w->extension = nullptr;
580580
}
581581

582-
// Clear this worker's deque. Nobody can successfully steal from this deque
583-
// at this point, because head == tail, but we still want any subsequent
584-
// Cilkified region to start with an empty deque. We go ahead and grab the
585-
// deque lock to make sure no other worker has a lingering pointer to the
586-
// closure.
587-
ReadyDeque::lock_self(deques, self);
588-
deques[self].bottom = nullptr;
589-
WHEN_CILK_DEBUG(g->root_closure->owner_ready_deque = NO_WORKER);
590-
ReadyDeque::unlock_self(deques, self);
582+
// Clear this worker's busy closure. Nobody can successfully steal from
583+
// this worker's deque at this point, because head == tail, but any
584+
// subsequent Cilkified region should still start with an empty busy
585+
// closure. We go ahead and grab the lock to make sure no other worker
586+
// has a lingering pointer to the closure.
587+
BusyClosure::lock_self(busy, self);
588+
busy[self].closure = nullptr;
589+
WHEN_CILK_DEBUG(g->root_closure->owner = NO_WORKER);
590+
BusyClosure::unlock_self(busy, self);
591591

592592
// Clear the flags in sf. This routine runs before leave_frame in a Cilk
593593
// function, but leave_frame is executed conditionally in Cilk functions
@@ -682,8 +682,8 @@ static void global_state_deinit(global_state *g) {
682682
free(g->workers);
683683
g->workers = nullptr;
684684
g->nworkers = 0;
685-
free(g->deques);
686-
g->deques = nullptr;
685+
free(g->busy);
686+
g->busy = nullptr;
687687
delete [] g->threads;
688688
g->threads = nullptr;
689689
free(g->index_to_worker);
@@ -693,10 +693,10 @@ static void global_state_deinit(global_state *g) {
693693
free(g);
694694
}
695695

696-
static void deques_deinit(global_state *g) {
697-
cilkrts_alert(BOOT, "(deques_deinit) Clean up deques");
696+
static void busy_deinit(global_state *g) {
697+
cilkrts_alert(BOOT, "(busy_deinit) Clean up busy closures");
698698
for (unsigned int i = 0; i < g->options.nproc; i++) {
699-
CILK_ASSERT(g->deques[i].mutex_owner == NO_WORKER);
699+
CILK_ASSERT(g->busy[i].mutex_owner == NO_WORKER);
700700
}
701701
}
702702

@@ -790,7 +790,7 @@ void __cilkrts_shutdown(global_state *g) {
790790
// internal-malloc that does not include all the free fibers.
791791
global_state_terminate(g);
792792
workers_deinit(g);
793-
deques_deinit(g);
793+
busy_deinit(g);
794794
global_state_deinit(g);
795795
}
796796

runtime/personality.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "busyclosure.h"
12
#include "cilk-internal.h"
23
#include "cilk2c.h"
34
#include "cilk2c_inlined.h"
@@ -7,7 +8,6 @@
78
#include "frame.h"
89
#include "init.h"
910
#include "local-reducer-api.h"
10-
#include "readydeque.h"
1111
#include <cilk/cilk_api.h>
1212
#include <cstdint>
1313
#include <cstring>
@@ -120,7 +120,7 @@ __attribute__((noinline)) static void
120120
sync_in_personality(__cilkrts_worker *w, __cilkrts_stack_frame *sf,
121121
struct _Unwind_Exception *ue_header) {
122122
worker_id self = w->self;
123-
ReadyDeque *deques = w->g->deques;
123+
BusyClosure *busy = w->g->busy;
124124
// save floating point state
125125
sysdep_save_fp_ctrl_state(sf);
126126

@@ -129,16 +129,16 @@ sync_in_personality(__cilkrts_worker *w, __cilkrts_stack_frame *sf,
129129
struct closure_exception *exn_r = get_exception_reducer(w);
130130
exn_r->exn = (char *)ue_header;
131131

132-
ReadyDeque::lock_self(deques, self);
133-
Closure *t = ReadyDeque::peek_bottom(deques, self, self);
132+
BusyClosure::lock_self(busy, self);
133+
Closure *t = BusyClosure::peek(busy, self, self);
134134
t->lock(self);
135135

136136
// ensure that we return here after a cilk_sync.
137137
exn_r->parent_rsp = t->orig_rsp;
138138
t->orig_rsp = (char *)SP(sf);
139139

140140
t->unlock(self);
141-
ReadyDeque::unlock_self(deques, self);
141+
BusyClosure::unlock_self(busy, self);
142142

143143
// save the current fiber for further stack unwinding.
144144
if (exn_r->throwing_fiber == nullptr) {

0 commit comments

Comments
 (0)