@@ -17,12 +17,42 @@ import (
17
17
"github.com/ipld/go-ipld-prime/traversal"
18
18
)
19
19
20
+ // TraverseResumer allows resuming a progress from a previously encountered path
21
+ // in the selector.
22
+ type TraverseResumer interface {
23
+ RewindToPath (from datamodel.Path ) error
24
+ RewindToOffset (offset uint64 ) error
25
+ Position () uint64
26
+ }
27
+
28
+ // TraversalResumerPathState tracks a traversal state for the purpose of
29
+ // building a CAR. For each block in the CAR it tracks the path to that block,
30
+ // the Link of the block and where in the CAR the block is located.
31
+ //
32
+ // A TraversalResumerPathState shared across multiple traversals using the same
33
+ // selector and DAG will yield the same state. This allows us to resume at
34
+ // arbitrary points within in the DAG and load the minimal additional blocks
35
+ // required to resume the traversal at that point.
36
+ type TraversalResumerPathState interface {
37
+ AddPath (path []datamodel.PathSegment , link datamodel.Link , atOffset uint64 )
38
+ GetLinks (root datamodel.Path ) []datamodel.Link
39
+ GetOffsetAfter (root datamodel.Path ) (uint64 , error )
40
+ }
41
+
20
42
type pathNode struct {
21
43
link datamodel.Link
22
44
offset uint64
23
45
children map [datamodel.PathSegment ]* pathNode
24
46
}
25
47
48
+ // NewTraversalResumerPathState creates a new TraversalResumerPathState.
49
+ //
50
+ // Note that the TraversalResumerPathState returned by this factory is not
51
+ // thread-safe.
52
+ func NewTraversalResumerPathState () TraversalResumerPathState {
53
+ return newPath (nil , 0 )
54
+ }
55
+
26
56
func newPath (link datamodel.Link , at uint64 ) * pathNode {
27
57
return & pathNode {
28
58
link : link ,
@@ -31,15 +61,15 @@ func newPath(link datamodel.Link, at uint64) *pathNode {
31
61
}
32
62
}
33
63
34
- func (pn pathNode ) addPath (p []datamodel.PathSegment , link datamodel.Link , at uint64 ) {
64
+ func (pn pathNode ) AddPath (p []datamodel.PathSegment , link datamodel.Link , atOffset uint64 ) {
35
65
if len (p ) == 0 {
36
66
return
37
67
}
38
68
if _ , ok := pn .children [p [0 ]]; ! ok {
39
- child := newPath (link , at )
69
+ child := newPath (link , atOffset )
40
70
pn .children [p [0 ]] = child
41
71
}
42
- pn .children [p [0 ]].addPath (p [1 :], link , at )
72
+ pn .children [p [0 ]].AddPath (p [1 :], link , atOffset )
43
73
}
44
74
45
75
func (pn pathNode ) allLinks () []datamodel.Link {
@@ -57,7 +87,7 @@ func (pn pathNode) allLinks() []datamodel.Link {
57
87
}
58
88
59
89
// getPaths returns reconstructed paths in the tree rooted at 'root'
60
- func (pn pathNode ) getLinks (root datamodel.Path ) []datamodel.Link {
90
+ func (pn pathNode ) GetLinks (root datamodel.Path ) []datamodel.Link {
61
91
segs := root .Segments ()
62
92
switch len (segs ) {
63
93
case 0 :
@@ -80,12 +110,12 @@ func (pn pathNode) getLinks(root datamodel.Path) []datamodel.Link {
80
110
// base case 2: not registered sub-path.
81
111
return []datamodel.Link {}
82
112
}
83
- return pn .children [next ].getLinks (datamodel .NewPathNocopy (segs [1 :]))
113
+ return pn .children [next ].GetLinks (datamodel .NewPathNocopy (segs [1 :]))
84
114
}
85
115
86
116
var errInvalid = fmt .Errorf ("invalid path" )
87
117
88
- func (pn pathNode ) offsetAfter (root datamodel.Path ) (uint64 , error ) {
118
+ func (pn pathNode ) GetOffsetAfter (root datamodel.Path ) (uint64 , error ) {
89
119
// we look for offset of next sibling.
90
120
// if no next sibling recurse up the path segments until we find a next sibling.
91
121
segs := root .Segments ()
@@ -100,7 +130,7 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
100
130
closest := chld .offset
101
131
// try recursive path
102
132
if len (segs ) > 1 {
103
- co , err := chld .offsetAfter (datamodel .NewPathNocopy (segs [1 :]))
133
+ co , err := chld .GetOffsetAfter (datamodel .NewPathNocopy (segs [1 :]))
104
134
if err == nil {
105
135
return co , err
106
136
}
@@ -121,35 +151,28 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
121
151
return 0 , errInvalid
122
152
}
123
153
124
- // TraverseResumer allows resuming a progress from a previously encountered path in the selector.
125
- type TraverseResumer interface {
126
- RewindToPath (from datamodel.Path ) error
127
- RewindToOffset (offset uint64 ) error
128
- Position () uint64
129
- }
130
-
131
154
type traversalState struct {
132
155
wrappedLinksystem * linking.LinkSystem
133
156
lsCounter * loader.Counter
134
- blockNumber int
135
- pathOrder map [int ]datamodel.Path
136
- pathTree * pathNode
157
+ pathTree TraversalResumerPathState
137
158
rewindPathTarget * datamodel.Path
138
159
rewindOffsetTarget uint64
139
160
pendingBlockStart uint64 // on rewinds, we store where the counter was in order to know the length of the last read block.
140
161
progress * traversal.Progress
141
162
}
142
163
164
+ var _ TraverseResumer = (* traversalState )(nil )
165
+
143
166
func (ts * traversalState ) RewindToPath (from datamodel.Path ) error {
144
167
if ts .progress == nil {
145
168
return nil
146
169
}
147
170
// reset progress and traverse until target.
148
171
ts .progress .SeenLinks = make (map [datamodel.Link ]struct {})
149
- ts .blockNumber = 0
150
172
ts .pendingBlockStart = ts .lsCounter .Size ()
151
173
ts .lsCounter .TotalRead = 0
152
174
ts .rewindPathTarget = & from
175
+ ts .rewindOffsetTarget = 0
153
176
return nil
154
177
}
155
178
@@ -163,10 +186,10 @@ func (ts *traversalState) RewindToOffset(offset uint64) error {
163
186
}
164
187
// reset progress and traverse until target.
165
188
ts .progress .SeenLinks = make (map [datamodel.Link ]struct {})
166
- ts .blockNumber = 0
167
189
ts .pendingBlockStart = ts .lsCounter .Size ()
168
190
ts .lsCounter .TotalRead = 0
169
191
ts .rewindOffsetTarget = offset
192
+ ts .rewindPathTarget = nil
170
193
return nil
171
194
}
172
195
@@ -177,9 +200,7 @@ func (ts *traversalState) Position() uint64 {
177
200
func (ts * traversalState ) traverse (lc linking.LinkContext , l ipld.Link ) (io.Reader , error ) {
178
201
// when not in replay mode, we track metadata
179
202
if ts .rewindPathTarget == nil && ts .rewindOffsetTarget == 0 {
180
- ts .pathOrder [ts .blockNumber ] = lc .LinkPath
181
- ts .pathTree .addPath (lc .LinkPath .Segments (), l , ts .lsCounter .Size ())
182
- ts .blockNumber ++
203
+ ts .pathTree .AddPath (lc .LinkPath .Segments (), l , ts .lsCounter .Size ())
183
204
return ts .wrappedLinksystem .StorageReadOpener (lc , l )
184
205
}
185
206
@@ -205,12 +226,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
205
226
break
206
227
}
207
228
if targetSegments [i ].String () != s .String () {
208
- links := ts .pathTree .getLinks (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
229
+ links := ts .pathTree .GetLinks (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
209
230
for _ , l := range links {
210
231
ts .progress .SeenLinks [l ] = struct {}{}
211
232
}
212
233
var err error
213
- ts .lsCounter .TotalRead , err = ts .pathTree .offsetAfter (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
234
+ ts .lsCounter .TotalRead , err = ts .pathTree .GetOffsetAfter (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
214
235
if err == errInvalid {
215
236
ts .lsCounter .TotalRead = ts .pendingBlockStart
216
237
} else if err != nil {
@@ -222,12 +243,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
222
243
}
223
244
}
224
245
if ts .rewindOffsetTarget != 0 {
225
- links := ts .pathTree .getLinks (lc .LinkPath )
246
+ links := ts .pathTree .GetLinks (lc .LinkPath )
226
247
for _ , l := range links {
227
248
ts .progress .SeenLinks [l ] = struct {}{}
228
249
}
229
250
var err error
230
- ts .lsCounter .TotalRead , err = ts .pathTree .offsetAfter (lc .LinkPath )
251
+ ts .lsCounter .TotalRead , err = ts .pathTree .GetOffsetAfter (lc .LinkPath )
231
252
if err == errInvalid {
232
253
ts .lsCounter .TotalRead = ts .pendingBlockStart
233
254
} else if err != nil {
@@ -243,13 +264,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
243
264
// WithTraversingLinksystem extends a progress for traversal such that it can
244
265
// subsequently resume and perform subsets of the walk efficiently from
245
266
// an arbitrary position within the selector traversal.
246
- func WithTraversingLinksystem (p * traversal.Progress ) (TraverseResumer , error ) {
267
+ func WithTraversingLinksystem (p * traversal.Progress , pathState TraversalResumerPathState ) (TraverseResumer , error ) {
247
268
wls , ctr := loader .CountingLinkSystem (p .Cfg .LinkSystem )
248
269
ts := & traversalState {
249
270
wrappedLinksystem : & wls ,
250
271
lsCounter : ctr .(* loader.Counter ),
251
- pathOrder : make (map [int ]datamodel.Path ),
252
- pathTree : newPath (nil , 0 ),
272
+ pathTree : pathState ,
253
273
progress : p ,
254
274
}
255
275
p .Cfg .LinkSystem .StorageReadOpener = ts .traverse
0 commit comments