Skip to content

Commit e716672

Browse files
committed
path-walk: reorder object visits
The path-walk API currently uses a stack-based approach to recursing through the list of paths within the repository. This guarantees that after a tree path is explored, all paths contained within that tree path will be explored before continuing to explore siblings of that tree path. The initial motivation of this depth-first approach was to minimize memory pressure while exploring the repository. A breadth-first approach would have too many "active" paths being stored in the paths_to_lists map. We can take this approach one step further by making sure that blob paths are visited before tree paths. This allows the API to free the memory for these blob objects before continuing to perform the depth-first search. This modifies the order in which we visit siblings, but does not change the fact that we are performing depth-first search. To achieve this goal, use a priority queue with a custom sorting method. The sort needs to handle tags, blobs, and trees (commits are handled slightly differently). When objects share a type, we can sort by path name. This will keep children of the latest path to leave the stack be preferred over the rest of the paths in the stack, since they agree in prefix up to and including a directory separator. When the types are different, we can prefer tags over other types and blobs over trees. This causes significant adjustments to t6601-path-walk.sh to rearrange the order of the visited paths. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 0f1e6c5 commit e716672

File tree

2 files changed

+109
-73
lines changed

2 files changed

+109
-73
lines changed

path-walk.c

+48-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "list-objects.h"
1212
#include "object.h"
1313
#include "oid-array.h"
14+
#include "prio-queue.h"
1415
#include "revision.h"
1516
#include "string-list.h"
1617
#include "strmap.h"
@@ -50,24 +51,58 @@ struct path_walk_context {
5051
struct strmap paths_to_lists;
5152

5253
/**
53-
* Store the current list of paths in a stack, to
54-
* facilitate depth-first-search without recursion.
54+
* Store the current list of paths in a priority queue,
55+
* using object type as a sorting mechanism, mostly to
56+
* make sure blobs are popped off the stack first. No
57+
* other sort is made, so within each object type it acts
58+
* like a stack and performs a DFS within the trees.
5559
*
5660
* Use path_stack_pushed to indicate whether a path
5761
* was previously added to path_stack.
5862
*/
59-
struct string_list path_stack;
63+
struct prio_queue path_stack;
6064
struct strset path_stack_pushed;
6165
};
6266

67+
static int compare_by_type(const void *one, const void *two, void *cb_data)
68+
{
69+
struct type_and_oid_list *list1, *list2;
70+
const char *str1 = one;
71+
const char *str2 = two;
72+
struct path_walk_context *ctx = cb_data;
73+
74+
list1 = strmap_get(&ctx->paths_to_lists, str1);
75+
list2 = strmap_get(&ctx->paths_to_lists, str2);
76+
77+
/*
78+
* If object types are equal, then use path comparison.
79+
*/
80+
if (!list1 || !list2 || list1->type == list2->type)
81+
return strcmp(str1, str2);
82+
83+
/* Prefer tags to be popped off first. */
84+
if (list1->type == OBJ_TAG)
85+
return -1;
86+
if (list2->type == OBJ_TAG)
87+
return 1;
88+
89+
/* Prefer blobs to be popped off second. */
90+
if (list1->type == OBJ_BLOB)
91+
return -1;
92+
if (list2->type == OBJ_BLOB)
93+
return 1;
94+
95+
return 0;
96+
}
97+
6398
static void push_to_stack(struct path_walk_context *ctx,
6499
const char *path)
65100
{
66101
if (strset_contains(&ctx->path_stack_pushed, path))
67102
return;
68103

69104
strset_add(&ctx->path_stack_pushed, path);
70-
string_list_append(&ctx->path_stack, path);
105+
prio_queue_put(&ctx->path_stack, xstrdup(path));
71106
}
72107

73108
static int add_children(struct path_walk_context *ctx,
@@ -372,8 +407,8 @@ static void setup_pending_objects(struct path_walk_info *info,
372407
const char *tagged_blob_path = "/tagged-blobs";
373408
tagged_blobs->type = OBJ_BLOB;
374409
tagged_blobs->maybe_interesting = 1;
375-
push_to_stack(ctx, tagged_blob_path);
376410
strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
411+
push_to_stack(ctx, tagged_blob_path);
377412
} else {
378413
oid_array_clear(&tagged_blobs->oids);
379414
free(tagged_blobs);
@@ -384,8 +419,8 @@ static void setup_pending_objects(struct path_walk_info *info,
384419
const char *tag_path = "/tags";
385420
tags->type = OBJ_TAG;
386421
tags->maybe_interesting = 1;
387-
push_to_stack(ctx, tag_path);
388422
strmap_put(&ctx->paths_to_lists, tag_path, tags);
423+
push_to_stack(ctx, tag_path);
389424
} else {
390425
oid_array_clear(&tags->oids);
391426
free(tags);
@@ -410,7 +445,10 @@ int walk_objects_by_path(struct path_walk_info *info)
410445
.repo = info->revs->repo,
411446
.revs = info->revs,
412447
.info = info,
413-
.path_stack = STRING_LIST_INIT_DUP,
448+
.path_stack = {
449+
.compare = compare_by_type,
450+
.cb_data = &ctx
451+
},
414452
.path_stack_pushed = STRSET_INIT,
415453
.paths_to_lists = STRMAP_INIT
416454
};
@@ -493,8 +531,7 @@ int walk_objects_by_path(struct path_walk_info *info)
493531

494532
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
495533
while (!ret && ctx.path_stack.nr) {
496-
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
497-
ctx.path_stack.nr--;
534+
char *path = prio_queue_get(&ctx.path_stack);
498535
paths_nr++;
499536

500537
ret = walk_path(&ctx, path);
@@ -511,8 +548,7 @@ int walk_objects_by_path(struct path_walk_info *info)
511548
push_to_stack(&ctx, entry->key);
512549

513550
while (!ret && ctx.path_stack.nr) {
514-
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
515-
ctx.path_stack.nr--;
551+
char *path = prio_queue_get(&ctx.path_stack);
516552
paths_nr++;
517553

518554
ret = walk_path(&ctx, path);
@@ -526,6 +562,6 @@ int walk_objects_by_path(struct path_walk_info *info)
526562

527563
clear_strmap(&ctx.paths_to_lists);
528564
strset_clear(&ctx.path_stack_pushed);
529-
string_list_clear(&ctx.path_stack, 0);
565+
clear_prio_queue(&ctx.path_stack);
530566
return ret;
531567
}

t/t6601-path-walk.sh

+61-61
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,20 @@ test_expect_success 'all' '
8484
3:tree::$(git rev-parse refs/tags/tree-tag^{})
8585
3:tree::$(git rev-parse refs/tags/tree-tag2^{})
8686
4:blob:a:$(git rev-parse base~2:a)
87-
5:tree:right/:$(git rev-parse topic:right)
88-
5:tree:right/:$(git rev-parse base~1:right)
89-
5:tree:right/:$(git rev-parse base~2:right)
90-
6:blob:right/d:$(git rev-parse base~1:right/d)
91-
7:blob:right/c:$(git rev-parse base~2:right/c)
92-
7:blob:right/c:$(git rev-parse topic:right/c)
93-
8:tree:left/:$(git rev-parse base:left)
94-
8:tree:left/:$(git rev-parse base~2:left)
95-
9:blob:left/b:$(git rev-parse base~2:left/b)
96-
9:blob:left/b:$(git rev-parse base:left/b)
97-
10:tree:a/:$(git rev-parse base:a)
98-
11:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
99-
12:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
100-
13:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
87+
5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2)
88+
6:tree:a/:$(git rev-parse base:a)
89+
7:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
90+
8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file)
91+
9:tree:left/:$(git rev-parse base:left)
92+
9:tree:left/:$(git rev-parse base~2:left)
93+
10:blob:left/b:$(git rev-parse base~2:left/b)
94+
10:blob:left/b:$(git rev-parse base:left/b)
95+
11:tree:right/:$(git rev-parse topic:right)
96+
11:tree:right/:$(git rev-parse base~1:right)
97+
11:tree:right/:$(git rev-parse base~2:right)
98+
12:blob:right/c:$(git rev-parse base~2:right/c)
99+
12:blob:right/c:$(git rev-parse topic:right/c)
100+
13:blob:right/d:$(git rev-parse base~1:right/d)
101101
blobs:10
102102
commits:4
103103
tags:7
@@ -155,18 +155,18 @@ test_expect_success 'branches and indexed objects mix well' '
155155
1:tree::$(git rev-parse base~1^{tree})
156156
1:tree::$(git rev-parse base~2^{tree})
157157
2:blob:a:$(git rev-parse base~2:a)
158-
3:tree:right/:$(git rev-parse topic:right)
159-
3:tree:right/:$(git rev-parse base~1:right)
160-
3:tree:right/:$(git rev-parse base~2:right)
161-
4:blob:right/d:$(git rev-parse base~1:right/d)
162-
4:blob:right/d:$(git rev-parse :right/d)
163-
5:blob:right/c:$(git rev-parse base~2:right/c)
164-
5:blob:right/c:$(git rev-parse topic:right/c)
165-
6:tree:left/:$(git rev-parse base:left)
166-
6:tree:left/:$(git rev-parse base~2:left)
167-
7:blob:left/b:$(git rev-parse base:left/b)
168-
7:blob:left/b:$(git rev-parse base~2:left/b)
169-
8:tree:a/:$(git rev-parse refs/tags/third:a)
158+
3:tree:a/:$(git rev-parse refs/tags/third:a)
159+
4:tree:left/:$(git rev-parse base:left)
160+
4:tree:left/:$(git rev-parse base~2:left)
161+
5:blob:left/b:$(git rev-parse base:left/b)
162+
5:blob:left/b:$(git rev-parse base~2:left/b)
163+
6:tree:right/:$(git rev-parse topic:right)
164+
6:tree:right/:$(git rev-parse base~1:right)
165+
6:tree:right/:$(git rev-parse base~2:right)
166+
7:blob:right/c:$(git rev-parse base~2:right/c)
167+
7:blob:right/c:$(git rev-parse topic:right/c)
168+
8:blob:right/d:$(git rev-parse base~1:right/d)
169+
8:blob:right/d:$(git rev-parse :right/d)
170170
blobs:7
171171
commits:4
172172
tags:0
@@ -186,15 +186,15 @@ test_expect_success 'topic only' '
186186
1:tree::$(git rev-parse topic^{tree})
187187
1:tree::$(git rev-parse base~1^{tree})
188188
1:tree::$(git rev-parse base~2^{tree})
189-
2:tree:right/:$(git rev-parse topic:right)
190-
2:tree:right/:$(git rev-parse base~1:right)
191-
2:tree:right/:$(git rev-parse base~2:right)
192-
3:blob:right/d:$(git rev-parse base~1:right/d)
193-
4:blob:right/c:$(git rev-parse base~2:right/c)
194-
4:blob:right/c:$(git rev-parse topic:right/c)
195-
5:tree:left/:$(git rev-parse base~2:left)
196-
6:blob:left/b:$(git rev-parse base~2:left/b)
197-
7:blob:a:$(git rev-parse base~2:a)
189+
2:blob:a:$(git rev-parse base~2:a)
190+
3:tree:left/:$(git rev-parse base~2:left)
191+
4:blob:left/b:$(git rev-parse base~2:left/b)
192+
5:tree:right/:$(git rev-parse topic:right)
193+
5:tree:right/:$(git rev-parse base~1:right)
194+
5:tree:right/:$(git rev-parse base~2:right)
195+
6:blob:right/c:$(git rev-parse base~2:right/c)
196+
6:blob:right/c:$(git rev-parse topic:right/c)
197+
7:blob:right/d:$(git rev-parse base~1:right/d)
198198
blobs:5
199199
commits:3
200200
tags:0
@@ -210,12 +210,12 @@ test_expect_success 'topic, not base' '
210210
cat >expect <<-EOF &&
211211
0:commit::$(git rev-parse topic)
212212
1:tree::$(git rev-parse topic^{tree})
213-
2:tree:right/:$(git rev-parse topic:right)
214-
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
215-
4:blob:right/c:$(git rev-parse topic:right/c)
216-
5:tree:left/:$(git rev-parse topic:left):UNINTERESTING
217-
6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
218-
7:blob:a:$(git rev-parse topic:a):UNINTERESTING
213+
2:blob:a:$(git rev-parse topic:a):UNINTERESTING
214+
3:tree:left/:$(git rev-parse topic:left):UNINTERESTING
215+
4:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
216+
5:tree:right/:$(git rev-parse topic:right)
217+
6:blob:right/c:$(git rev-parse topic:right/c)
218+
7:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
219219
blobs:4
220220
commits:1
221221
tags:0
@@ -233,12 +233,12 @@ test_expect_success 'fourth, blob-tag2, not base' '
233233
1:tag:/tags:$(git rev-parse fourth)
234234
2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
235235
3:tree::$(git rev-parse topic^{tree})
236-
4:tree:right/:$(git rev-parse topic:right)
237-
5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
238-
6:blob:right/c:$(git rev-parse topic:right/c)
239-
7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
240-
8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
241-
9:blob:a:$(git rev-parse base~1:a):UNINTERESTING
236+
4:blob:a:$(git rev-parse base~1:a):UNINTERESTING
237+
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
238+
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
239+
7:tree:right/:$(git rev-parse topic:right)
240+
8:blob:right/c:$(git rev-parse topic:right/c)
241+
9:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
242242
blobs:5
243243
commits:1
244244
tags:1
@@ -253,10 +253,10 @@ test_expect_success 'topic, not base, only blobs' '
253253
-- topic --not base >out &&
254254
255255
cat >expect <<-EOF &&
256-
0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
257-
1:blob:right/c:$(git rev-parse topic:right/c)
258-
2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
259-
3:blob:a:$(git rev-parse topic:a):UNINTERESTING
256+
0:blob:a:$(git rev-parse topic:a):UNINTERESTING
257+
1:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING
258+
2:blob:right/c:$(git rev-parse topic:right/c)
259+
3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING
260260
blobs:4
261261
commits:0
262262
tags:0
@@ -289,8 +289,8 @@ test_expect_success 'topic, not base, only trees' '
289289
290290
cat >expect <<-EOF &&
291291
0:tree::$(git rev-parse topic^{tree})
292-
1:tree:right/:$(git rev-parse topic:right)
293-
2:tree:left/:$(git rev-parse topic:left):UNINTERESTING
292+
1:tree:left/:$(git rev-parse topic:left):UNINTERESTING
293+
2:tree:right/:$(git rev-parse topic:right)
294294
commits:0
295295
blobs:0
296296
tags:0
@@ -308,14 +308,14 @@ test_expect_success 'topic, not base, boundary' '
308308
0:commit::$(git rev-parse base~1):UNINTERESTING
309309
1:tree::$(git rev-parse topic^{tree})
310310
1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING
311-
2:tree:right/:$(git rev-parse topic:right)
312-
2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
313-
3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
314-
4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
315-
4:blob:right/c:$(git rev-parse topic:right/c)
316-
5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
317-
6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
318-
7:blob:a:$(git rev-parse base~1:a):UNINTERESTING
311+
2:blob:a:$(git rev-parse base~1:a):UNINTERESTING
312+
3:tree:left/:$(git rev-parse base~1:left):UNINTERESTING
313+
4:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
314+
5:tree:right/:$(git rev-parse topic:right)
315+
5:tree:right/:$(git rev-parse base~1:right):UNINTERESTING
316+
6:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
317+
6:blob:right/c:$(git rev-parse topic:right/c)
318+
7:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
319319
blobs:5
320320
commits:2
321321
tags:0

0 commit comments

Comments
 (0)