2525
2626inline void apply_node_patch_packing (
2727 std::vector<shamrock::patch::Patch> &global_patch_list, std::vector<i32 > &new_owner_table) {
28- using namespace shamrock ::patch;
29- sycl::buffer<i32 > new_owner (new_owner_table.data (), new_owner_table.size ());
30- sycl::buffer<Patch> patch_buf (global_patch_list.data (), global_patch_list.size ());
3128
32- sycl::range<1 > range{global_patch_list.size ()};
33-
34- // pack nodes
35- shamsys::instance::get_alt_queue ()
36- .submit ([&](sycl::handler &cgh) {
37- auto ptch = patch_buf.get_access <sycl::access::mode::read>(cgh);
38- // auto pdt = dt_buf.get_access<sycl::access::mode::read>(cgh);
39- auto chosen_node = new_owner.get_access <sycl::access::mode::write>(cgh);
40-
41- cgh.parallel_for (range, [=](sycl::item<1 > item) {
42- u64 i = (u64 ) item.get_id (0 );
43-
44- if (ptch[i].pack_node_index != u64_max) {
45- chosen_node[i] = chosen_node[ptch[i].pack_node_index ];
46- }
47- });
48- })
49- .wait ();
29+ // Note that there seems to be a data race here
30+ // However this should never happends as packing index will only point toward a patch without
31+ // packing. As such the data we are accessing should never be modified during this loop.
32+ #pragma omp parallel for
33+ for (size_t i = 0 ; i < global_patch_list.size (); i++) {
34+ if (global_patch_list[i].pack_node_index != u64_max) {
35+ new_owner_table[i] = new_owner_table[global_patch_list[i].pack_node_index ];
36+ }
37+ }
5038}
5139
5240namespace shamrock ::scheduler {
@@ -102,17 +90,17 @@ namespace shamrock::scheduler {
10290
10391 // TODO add bool for optional print verbosity
10492 // std::cout << i << " : " << old_owner << " -> " << new_owner << std::endl;
93+ if (new_owner != old_owner) {
10594
106- using ChangeOp = LoadBalancingChangeList::ChangeOp;
95+ using ChangeOp = LoadBalancingChangeList::ChangeOp;
10796
108- ChangeOp op;
109- op.patch_idx = i;
110- op.patch_id = global_patch_list[i].id_patch ;
111- op.rank_owner_new = new_owner;
112- op.rank_owner_old = old_owner;
113- op.tag_comm = tags_it_node[old_owner];
97+ ChangeOp op;
98+ op.patch_idx = i;
99+ op.patch_id = global_patch_list[i].id_patch ;
100+ op.rank_owner_new = new_owner;
101+ op.rank_owner_old = old_owner;
102+ op.tag_comm = tags_it_node[old_owner];
114103
115- if (new_owner != old_owner) {
116104 change_list.change_ops .push_back (op);
117105 tags_it_node[old_owner]++;
118106 }
@@ -126,23 +114,31 @@ namespace shamrock::scheduler {
126114 f64 avg = 0 ;
127115 f64 var = 0 ;
128116
129- for (i32 nid = 0 ; nid < shamcomm::world_size (); nid++) {
117+ i32 world_size = shamcomm::world_size ();
118+
119+ #pragma omp parallel for reduction(min : min) reduction(max : max) reduction(+ : avg)
120+ for (i32 nid = 0 ; nid < world_size; nid++) {
130121 f64 val = load_per_node[nid];
131122 min = sycl::fmin (min, val);
132123 max = sycl::fmax (max, val);
133124 avg += val;
125+ }
134126
135- if (shamcomm::world_rank () == 0 ) {
127+ if (shamcomm::world_rank () == 0
128+ && shamcomm::logs::get_loglevel () >= shamcomm::logs::log_debug) {
129+ for (i32 nid = 0 ; nid < world_size; nid++) {
136130 shamlog_debug_ln (
137131 " HilbertLoadBalance" , " node :" , nid, " load :" , load_per_node[nid]);
138132 }
139133 }
140- avg /= shamcomm::world_size ();
141- for (i32 nid = 0 ; nid < shamcomm::world_size (); nid++) {
134+ avg /= world_size;
135+
136+ #pragma omp parallel for reduction(+ : var)
137+ for (i32 nid = 0 ; nid < world_size; nid++) {
142138 f64 val = load_per_node[nid];
143139 var += (val - avg) * (val - avg);
144140 }
145- var /= shamcomm:: world_size() ;
141+ var /= world_size;
146142
147143 if (shamcomm::world_rank () == 0 ) {
148144 std::string str = " Loadbalance stats : \n " ;
0 commit comments