Skip to content

Commit ef54342

Browse files
committed
path-walk: reorder object visits
The path-walk API currently uses a stack-based approach to recursing through the list of paths within the repository. This guarantees that after a tree path is explored, all paths contained within that tree path will be explored before continuing to explore siblings of that tree path. The initial motivation of this depth-first approach was to minimize memory pressure while exploring the repository. A breadth-first approach would have too many "active" paths being stored in the paths_to_lists map. We can take this approach one step further by making sure that blob paths are visited before tree paths. This allows the API to free the memory for these blob objects before continuing to perform the depth-first search. This modifies the order in which we visit siblings, but does not change the fact that we are performing depth-first search. To achieve this goal, use a priority queue with a custom sorting method. The sort needs to handle tags, blobs, and trees (commits are handled slightly differently). When objects share a type, we can sort by path name. This will keep children of the latest path to leave the stack be preferred over the rest of the paths in the stack, since they agree in prefix up to and including a directory separator. When the types are different, we can prefer tags over other types and blobs over trees. This causes significant adjustments to t6601-path-walk.sh to rearrange the order of the visited paths. Signed-off-by: Derrick Stolee <[email protected]>
1 parent f2ffc32 commit ef54342

File tree

2 files changed

+110
-74
lines changed

2 files changed

+110
-74
lines changed

path-walk.c

+48-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "list-objects.h"
1212
#include "object.h"
1313
#include "oid-array.h"
14+
#include "prio-queue.h"
1415
#include "revision.h"
1516
#include "string-list.h"
1617
#include "strmap.h"
@@ -49,24 +50,58 @@ struct path_walk_context {
4950
struct strmap paths_to_lists;
5051

5152
/**
52-
* Store the current list of paths in a stack, to
53-
* facilitate depth-first-search without recursion.
53+
* Store the current list of paths in a priority queue,
54+
* using object type as a sorting mechanism, mostly to
55+
* make sure blobs are popped off the stack first. No
56+
* other sort is made, so within each object type it acts
57+
* like a stack and performs a DFS within the trees.
5458
*
5559
* Use path_stack_pushed to indicate whether a path
5660
* was previously added to path_stack.
5761
*/
58-
struct string_list path_stack;
62+
struct prio_queue path_stack;
5963
struct strset path_stack_pushed;
6064
};
6165

66+
static int compare_by_type(const void *one, const void *two, void *cb_data)
67+
{
68+
struct type_and_oid_list *list1, *list2;
69+
const char *str1 = one;
70+
const char *str2 = two;
71+
struct path_walk_context *ctx = cb_data;
72+
73+
list1 = strmap_get(&ctx->paths_to_lists, str1);
74+
list2 = strmap_get(&ctx->paths_to_lists, str2);
75+
76+
/*
77+
* If object types are equal, then use path comparison.
78+
*/
79+
if (!list1 || !list2 || list1->type == list2->type)
80+
return strcmp(str1, str2);
81+
82+
/* Prefer tags to be popped off first. */
83+
if (list1->type == OBJ_TAG)
84+
return -1;
85+
if (list2->type == OBJ_TAG)
86+
return 1;
87+
88+
/* Prefer blobs to be popped off second. */
89+
if (list1->type == OBJ_BLOB)
90+
return -1;
91+
if (list2->type == OBJ_BLOB)
92+
return 1;
93+
94+
return 0;
95+
}
96+
6297
static void push_to_stack(struct path_walk_context *ctx,
6398
const char *path)
6499
{
65100
if (strset_contains(&ctx->path_stack_pushed, path))
66101
return;
67102

68103
strset_add(&ctx->path_stack_pushed, path);
69-
string_list_append(&ctx->path_stack, path);
104+
prio_queue_put(&ctx->path_stack, xstrdup(path));
70105
}
71106

72107
static int add_tree_entries(struct path_walk_context *ctx,
@@ -378,8 +413,8 @@ static int setup_pending_objects(struct path_walk_info *info,
378413
const char *tagged_blob_path = "/tagged-blobs";
379414
tagged_blobs->type = OBJ_BLOB;
380415
tagged_blobs->maybe_interesting = 1;
381-
push_to_stack(ctx, tagged_blob_path);
382416
strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
417+
push_to_stack(ctx, tagged_blob_path);
383418
} else {
384419
oid_array_clear(&tagged_blobs->oids);
385420
free(tagged_blobs);
@@ -390,8 +425,8 @@ static int setup_pending_objects(struct path_walk_info *info,
390425
const char *tag_path = "/tags";
391426
tags->type = OBJ_TAG;
392427
tags->maybe_interesting = 1;
393-
push_to_stack(ctx, tag_path);
394428
strmap_put(&ctx->paths_to_lists, tag_path, tags);
429+
push_to_stack(ctx, tag_path);
395430
} else {
396431
oid_array_clear(&tags->oids);
397432
free(tags);
@@ -418,7 +453,10 @@ int walk_objects_by_path(struct path_walk_info *info)
418453
.repo = info->revs->repo,
419454
.revs = info->revs,
420455
.info = info,
421-
.path_stack = STRING_LIST_INIT_DUP,
456+
.path_stack = {
457+
.compare = compare_by_type,
458+
.cb_data = &ctx
459+
},
422460
.path_stack_pushed = STRSET_INIT,
423461
.paths_to_lists = STRMAP_INIT
424462
};
@@ -504,8 +542,7 @@ int walk_objects_by_path(struct path_walk_info *info)
504542

505543
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
506544
while (!ret && ctx.path_stack.nr) {
507-
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
508-
ctx.path_stack.nr--;
545+
char *path = prio_queue_get(&ctx.path_stack);
509546
paths_nr++;
510547

511548
ret = walk_path(&ctx, path);
@@ -522,8 +559,7 @@ int walk_objects_by_path(struct path_walk_info *info)
522559
push_to_stack(&ctx, entry->key);
523560

524561
while (!ret && ctx.path_stack.nr) {
525-
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
526-
ctx.path_stack.nr--;
562+
char *path = prio_queue_get(&ctx.path_stack);
527563
paths_nr++;
528564

529565
ret = walk_path(&ctx, path);
@@ -537,7 +573,7 @@ int walk_objects_by_path(struct path_walk_info *info)
537573

538574
clear_paths_to_lists(&ctx.paths_to_lists);
539575
strset_clear(&ctx.path_stack_pushed);
540-
string_list_clear(&ctx.path_stack, 0);
576+
clear_prio_queue(&ctx.path_stack);
541577
return ret;
542578
}
543579

t/t6601-path-walk.sh

+62-62
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,20 @@ test_expect_success 'all' '
8484
3:tree::$(git rev-parse refs/tags/tree-tag^{})
8585
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
8686
4:blob:a:$(git rev-parse base~2:a)
87-
5:tree:right/:$(git rev-parse topic:right)
88-
5:tree:right/:$(git rev-parse base~1:right)
89-
5:tree:right/:$(git rev-parse base~2:right)
90-
6:blob:right/d:$(git rev-parse base~1:right/d)
91-
7:blob:right/c:$(git rev-parse base~2:right/c)
92-
7:blob:right/c:$(git rev-parse topic:right/c)
93-
8:tree:left/:$(git rev-parse base:left)
94-
8:tree:left/:$(git rev-parse base~2:left)
95-
9:blob:left/b:$(git rev-parse base~2:left/b)
96-
9:blob:left/b:$(git rev-parse base:left/b)
97-
10:tree:a/:$(git rev-parse base:a)
98-
11:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
99-
12:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
100-
13:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
87+
5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
88+
6:tree:a/:$(git rev-parse base:a)
89+
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
90+
8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
91+
9:tree:left/:$(git rev-parse base:left)
92+
9:tree:left/:$(git rev-parse base~2:left)
93+
10:blob:left/b:$(git rev-parse base~2:left/b)
94+
10:blob:left/b:$(git rev-parse base:left/b)
95+
11:tree:right/:$(git rev-parse topic:right)
96+
11:tree:right/:$(git rev-parse base~1:right)
97+
11:tree:right/:$(git rev-parse base~2:right)
98+
12:blob:right/c:$(git rev-parse base~2:right/c)
99+
12:blob:right/c:$(git rev-parse topic:right/c)
100+
13:blob:right/d:$(git rev-parse base~1:right/d)
101101
blobs:10
102102
commits:4
103103
tags:7
@@ -154,19 +154,19 @@ test_expect_success 'branches and indexed objects mix well' '
154154
1:tree::$(git rev-parse base^{tree})
155155
1:tree::$(git rev-parse base~1^{tree})
156156
1:tree::$(git rev-parse base~2^{tree})
157-
2:blob:a:$(git rev-parse base~2:a)
158-
3:tree:right/:$(git rev-parse topic:right)
159-
3:tree:right/:$(git rev-parse base~1:right)
160-
3:tree:right/:$(git rev-parse base~2:right)
161-
4:blob:right/d:$(git rev-parse base~1:right/d)
162-
4:blob:right/d:$(git rev-parse :right/d)
163-
5:blob:right/c:$(git rev-parse base~2:right/c)
164-
5:blob:right/c:$(git rev-parse topic:right/c)
165-
6:tree:left/:$(git rev-parse base:left)
166-
6:tree:left/:$(git rev-parse base~2:left)
167-
7:blob:left/b:$(git rev-parse base:left/b)
168-
7:blob:left/b:$(git rev-parse base~2:left/b)
169-
8:tree:a/:$(git rev-parse refs/tags/third:a)
157+
2:tree:a/:$(git rev-parse refs/tags/third:a)
158+
3:tree:left/:$(git rev-parse base:left)
159+
3:tree:left/:$(git rev-parse base~2:left)
160+
4:blob:left/b:$(git rev-parse base:left/b)
161+
4:blob:left/b:$(git rev-parse base~2:left/b)
162+
5:tree:right/:$(git rev-parse topic:right)
163+
5:tree:right/:$(git rev-parse base~1:right)
164+
5:tree:right/:$(git rev-parse base~2:right)
165+
6:blob:right/c:$(git rev-parse base~2:right/c)
166+
6:blob:right/c:$(git rev-parse topic:right/c)
167+
7:blob:right/d:$(git rev-parse base~1:right/d)
168+
7:blob:right/d:$(git rev-parse :right/d)
169+
8:blob:a:$(git rev-parse base~2:a)
170170
blobs:7
171171
commits:4
172172
tags:0
@@ -186,15 +186,15 @@ test_expect_success 'topic only' '
186186
1:tree::$(git rev-parse topic^{tree})
187187
1:tree::$(git rev-parse base~1^{tree})
188188
1:tree::$(git rev-parse base~2^{tree})
189-
2:tree:right/:$(git rev-parse topic:right)
190-
2:tree:right/:$(git rev-parse base~1:right)
191-
2:tree:right/:$(git rev-parse base~2:right)
192-
3:blob:right/d:$(git rev-parse base~1:right/d)
193-
4:blob:right/c:$(git rev-parse base~2:right/c)
194-
4:blob:right/c:$(git rev-parse topic:right/c)
195-
5:tree:left/:$(git rev-parse base~2:left)
196-
6:blob:left/b:$(git rev-parse base~2:left/b)
197-
7:blob:a:$(git rev-parse base~2:a)
189+
2:blob:a:$(git rev-parse base~2:a)
190+
3:tree:left/:$(git rev-parse base~2:left)
191+
4:blob:left/b:$(git rev-parse base~2:left/b)
192+
5:tree:right/:$(git rev-parse topic:right)
193+
5:tree:right/:$(git rev-parse base~1:right)
194+
5:tree:right/:$(git rev-parse base~2:right)
195+
6:blob:right/c:$(git rev-parse base~2:right/c)
196+
6:blob:right/c:$(git rev-parse topic:right/c)
197+
7:blob:right/d:$(git rev-parse base~1:right/d)
198198
blobs:5
199199
commits:3
200200
tags:0
@@ -210,12 +210,12 @@ test_expect_success 'topic, not base' '
210210
cat >expect <<-EOF &&
211211
0:commit::$(git rev-parse topic)
212212
1:tree::$(git rev-parse topic^{tree})
213-
2:tree:right/:$(git rev-parse topic:right)
214-
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
215-
4:blob:right/c:$(git rev-parse topic:right/c)
216-
5:tree:left/:$(git rev-parse topic:left):UNINTERESTING
217-
6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
218-
7:blob:a:$(git rev-parse topic:a):UNINTERESTING
213+
2:blob:a:$(git rev-parse topic:a):UNINTERESTING
214+
3:tree:left/:$(git rev-parse topic:left):UNINTERESTING
215+
4:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
216+
5:tree:right/:$(git rev-parse topic:right)
217+
6:blob:right/c:$(git rev-parse topic:right/c)
218+
7:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
219219
blobs:4
220220
commits:1
221221
tags:0
@@ -233,12 +233,12 @@ test_expect_success 'fourth, blob-tag2, not base' '
233233
1:tag:/tags:$(git rev-parse fourth)
234234
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
235235
3:tree::$(git rev-parse topic^{tree})
236-
4:tree:right/:$(git rev-parse topic:right)
237-
5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
238-
6:blob:right/c:$(git rev-parse topic:right/c)
239-
7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
240-
8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
241-
9:blob:a:$(git rev-parse base~1:a):UNINTERESTING
236+
4:blob:a:$(git rev-parse base~1:a):UNINTERESTING
237+
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
238+
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
239+
7:tree:right/:$(git rev-parse topic:right)
240+
8:blob:right/c:$(git rev-parse topic:right/c)
241+
9:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
242242
blobs:5
243243
commits:1
244244
tags:1
@@ -253,10 +253,10 @@ test_expect_success 'topic, not base, only blobs' '
253253
-- topic --not base >out &&
254254
255255
cat >expect <<-EOF &&
256-
0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
257-
1:blob:right/c:$(git rev-parse topic:right/c)
258-
2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
259-
3:blob:a:$(git rev-parse topic:a):UNINTERESTING
256+
0:blob:a:$(git rev-parse topic:a):UNINTERESTING
257+
1:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
258+
2:blob:right/c:$(git rev-parse topic:right/c)
259+
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
260260
blobs:4
261261
commits:0
262262
tags:0
@@ -289,8 +289,8 @@ test_expect_success 'topic, not base, only trees' '
289289
290290
cat >expect <<-EOF &&
291291
0:tree::$(git rev-parse topic^{tree})
292-
1:tree:right/:$(git rev-parse topic:right)
293-
2:tree:left/:$(git rev-parse topic:left):UNINTERESTING
292+
1:tree:left/:$(git rev-parse topic:left):UNINTERESTING
293+
2:tree:right/:$(git rev-parse topic:right)
294294
commits:0
295295
blobs:0
296296
tags:0
@@ -308,14 +308,14 @@ test_expect_success 'topic, not base, boundary' '
308308
0:commit::$(git rev-parse base~1):UNINTERESTING
309309
1:tree::$(git rev-parse topic^{tree})
310310
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
311-
2:tree:right/:$(git rev-parse topic:right)
312-
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
313-
3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
314-
4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
315-
4:blob:right/c:$(git rev-parse topic:right/c)
316-
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
317-
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
318-
7:blob:a:$(git rev-parse base~1:a):UNINTERESTING
311+
2:blob:a:$(git rev-parse base~1:a):UNINTERESTING
312+
3:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
313+
4:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
314+
5:tree:right/:$(git rev-parse topic:right)
315+
5:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
316+
6:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
317+
6:blob:right/c:$(git rev-parse topic:right/c)
318+
7:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
319319
blobs:5
320320
commits:2
321321
tags:0

0 commit comments

Comments
 (0)