Skip to content

Commit 229bd4b

Browse files
Merge branch 'no_spart_memmove' into 'master'
Replaced the memmove in cell_add_spart() with a faster alternative See merge request swift/swiftsim!2321
2 parents 4806bed + 4288d3a commit 229bd4b

1 file changed

Lines changed: 84 additions & 36 deletions

File tree

src/cell_convert_part.c

Lines changed: 84 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -29,44 +29,91 @@
2929
#include "active.h"
3030
#include "engine.h"
3131
#include "hydro.h"
32+
#include "memswap.h"
3233
#include "sink_properties.h"
3334

3435
/**
3536
* @brief Recursively update the pointer and counter for #spart after the
3637
* addition of a new particle.
3738
*
39+
* Recall that the particles are in depth-first ordering. This means all
40+
* the leaves' content is next to each other in memory.
41+
* To insert a empty spot in the desired leaf (the one at the bottom of
42+
* the progeny list), we can:
43+
* (1) take the first particle in a leaf and move it to the
44+
* start of the next leaf.
45+
* (2a) If the leaf is the one where we want the gap, increase the particle
46+
* counter.
47+
* (2b) If not, shift the particle array pointer by one spot to account
48+
* for the gap which was created on the main progeny branch.
49+
* (3) Since we know the delta a spart is moved, we can update its #gpart
50+
* connection at the same time.
51+
* (4) Since all the cells beyond the insertion point have had their
52+
* particle order altered, we need to void the sort arrays.
53+
* (5) In cells that are not leaves, don't touch the particles, but move the
54+
* counter or pointer.
55+
*
3856
* @param c The cell we are working on.
3957
* @param progeny_list The list of the progeny index at each level for the
4058
* leaf-cell where the particle was added.
41-
* @param main_branch Are we in a cell directly above the leaf where the new
42-
* particle was added?
59+
* @param main_branch Are we in a cell directly above the leaf where we
60+
* want to create a gap?
61+
* @param spart_to_insert The #spart to insert at the start of the next
62+
* leaf encountered in the hierarchy.
4363
*/
4464
void cell_recursively_shift_sparts(struct cell *c,
4565
const int progeny_list[space_cell_maxdepth],
46-
const int main_branch) {
66+
const int main_branch,
67+
struct spart *const spart_to_insert) {
68+
69+
/* Recurse to the lower levels */
4770
if (c->split) {
48-
/* No need to recurse in progenies located before the insestion point */
71+
72+
/* No need to recurse in progenies located before the insertion point */
4973
const int first_progeny = main_branch ? progeny_list[(int)c->depth] : 0;
5074

5175
for (int k = first_progeny; k < 8; ++k) {
52-
if (c->progeny[k] != NULL)
76+
if (c->progeny[k] != NULL) {
77+
78+
/* Is the progeny on the main branch? */
79+
const int progeny_on_main_branch = main_branch && (k == first_progeny);
80+
5381
cell_recursively_shift_sparts(c->progeny[k], progeny_list,
54-
main_branch && (k == first_progeny));
82+
progeny_on_main_branch, spart_to_insert);
83+
}
5584
}
5685
}
5786

58-
/* When directly above the leaf with the new particle: increase the particle
59-
* count */
60-
/* When after the leaf with the new particle: shift by one position */
87+
/* Indicate that the cell is not sorted and cancel the pointer sorting
88+
* arrays. */
89+
c->stars.sorted = 0;
90+
cell_free_stars_sorts(c);
91+
92+
/* If we are in a leaf, we can operate on the particle arrays */
93+
if (!c->split) {
94+
95+
/* If we are in a leaf, then we want to:
96+
* - Swap the first particle in the array with the one in our copy buffer
97+
* - Update the gpart link of the new particle in the buffer since we know
98+
* where it is going (it is moving by exactly the cell's content).
99+
*/
100+
if (c->stars.count > 0) {
101+
102+
memswap(c->stars.parts, spart_to_insert, sizeof(struct spart));
103+
spart_to_insert->gpart->id_or_neg_offset -= c->stars.count;
104+
105+
} else {
106+
/* Nothing to do */
107+
}
108+
}
109+
110+
/* Now we can increase the cell counter or shift the pointer*/
61111
if (main_branch) {
62-
c->stars.count++;
63112

64-
/* Indicate that the cell is not sorted and cancel the pointer sorting
65-
* arrays. */
66-
c->stars.sorted = 0;
67-
cell_free_stars_sorts(c);
113+
c->stars.count++;
68114

69115
} else {
116+
70117
c->stars.parts++;
71118
}
72119
}
@@ -154,6 +201,7 @@ void cell_recursively_shift_gparts(struct cell *c,
154201
* time bin.
155202
*/
156203
struct spart *cell_add_spart(struct engine *e, struct cell *const c) {
204+
157205
/* Perform some basic consitency checks */
158206
if (c->nodeID != engine_rank) error("Adding spart on a foreign node");
159207
if (c->stars.ti_old_part != e->ti_current) error("Undrifted cell!");
@@ -205,39 +253,36 @@ struct spart *cell_add_spart(struct engine *e, struct cell *const c) {
205253
return NULL;
206254
}
207255

256+
#ifdef SWIFT_DEBUG_CHECKS
208257
/* Number of particles to shift in order to get a free space. */
209258
const size_t n_copy = &top->stars.parts[top->stars.count] - c->stars.parts;
210-
211-
#ifdef SWIFT_DEBUG_CHECKS
212259
if (c->stars.parts + n_copy > top->stars.parts + top->stars.count)
213260
error("Copying beyond the allowed range");
214261
#endif
215262

216-
if (n_copy > 0) {
217-
// MATTHIEU: This can be improved. We don't need to copy everything, just
218-
// need to swap a few particles.
219-
memmove(&c->stars.parts[1], &c->stars.parts[0],
220-
n_copy * sizeof(struct spart));
221-
222-
/* Update the spart->gpart links (shift by 1) */
223-
for (size_t i = 0; i < n_copy; ++i) {
224-
263+
/* Recursively shift all the stars to get a free spot at the start of the
264+
* current cell */
265+
struct spart temp;
225266
#ifdef SWIFT_DEBUG_CHECKS
226-
if (c->stars.parts[i + 1].gpart == NULL) {
227-
error("Incorrectly linked spart!");
228-
}
267+
bzero(&temp, sizeof(struct spart));
268+
temp.id = 666;
229269
#endif
230-
c->stars.parts[i + 1].gpart->id_or_neg_offset--;
231-
}
232-
}
270+
cell_recursively_shift_sparts(top, progeny, /* main_branch=*/1, &temp);
233271

234-
/* Recursively shift all the stars to get a free spot at the start of the
235-
* current cell*/
236-
cell_recursively_shift_sparts(top, progeny, /* main_branch=*/1);
272+
/* Finish all the operations by putting the last particle
273+
* we have in hands in the first spot beyond the top-level cell's
274+
* current range.
275+
* Note that top->stars.count has been updated in the recursion */
276+
struct spart *final_spart = top->stars.parts + (top->stars.count - 1);
277+
#ifdef SWIFT_DEBUG_CHECKS
278+
/* Verify that we are indeed overwriting an 'extra' spart */
279+
if (final_spart->id != -42)
280+
error("Incorrect shift beyond the original top-level range");
281+
#endif
282+
memcpy(final_spart, &temp, sizeof(struct spart));
237283

238284
/* Make sure the gravity will be recomputed for this particle in the next
239-
* step
240-
*/
285+
* step */
241286
struct cell *top2 = c;
242287
while (top2->parent != NULL) {
243288
top2->stars.ti_old_part = e->ti_current;
@@ -251,6 +296,9 @@ struct spart *cell_add_spart(struct engine *e, struct cell *const c) {
251296

252297
/* We now have an empty spart as the first particle in that cell */
253298
struct spart *sp = &c->stars.parts[0];
299+
#ifdef SWIFT_DEBUG_CHECKS
300+
if (sp->id != 666) error("Did not get the right particle!");
301+
#endif
254302
bzero(sp, sizeof(struct spart));
255303

256304
/* Give it a decent position */

0 commit comments

Comments
 (0)