Skip to content

Commit d2e5313

Browse files
committed
feat: extract TraversalResumerPathState interface and allow it to be shared across traversals
1 parent e9c7d8a commit d2e5313

File tree

4 files changed

+73
-33
lines changed

4 files changed

+73
-33
lines changed

v2/options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/multiformats/go-multicodec"
99

1010
"github.com/ipld/go-car/v2/internal/carv1"
11+
resumetraversal "github.com/ipld/go-car/v2/traversal"
1112
)
1213

1314
// DefaultMaxIndexCidSize specifies the maximum size in byptes accepted as a section CID by CARv2 index.
@@ -62,6 +63,7 @@ type Options struct {
6263
TraversalPrototypeChooser traversal.LinkTargetNodePrototypeChooser
6364
DataPayloadSize uint64
6465
SkipOffset uint64
66+
TraversalResumerPathState resumetraversal.TraversalResumerPathState
6567

6668
MaxAllowedHeaderSize uint64
6769
MaxAllowedSectionSize uint64

v2/selective.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ func WithDataPayloadSize(size uint64) Option {
5050
}
5151
}
5252

53+
// WithTraversalResumerPathState provides a custom TraversalResumerPathState
54+
// that can be reused between selective CAR creations where traversals may need
55+
// to be resumed at arbitrary points within the DAG.
56+
//
57+
// A TraversalResumerPathState shared across multiple traversals using the same
58+
// selector and DAG will yield the same state. This allows us to resume at
59+
// arbitrary points within in the DAG and load the minimal additional blocks
60+
// required to resume the traversal at that point.
61+
func WithTraversalResumerPathState(pathState resumetraversal.TraversalResumerPathState) Option {
62+
return func(o *Options) {
63+
o.TraversalResumerPathState = pathState
64+
}
65+
}
66+
5367
// NewSelectiveWriter walks through the proposed dag traversal to learn its total size in order to be able to
5468
// stream out a car to a writer in the expected traversal order in one go.
5569
func NewSelectiveWriter(ctx context.Context, ls *ipld.LinkSystem, root cid.Cid, selector ipld.Node, opts ...Option) (Writer, error) {
@@ -321,7 +335,11 @@ func (tc *traversalCar) setup(ctx context.Context, ls *ipld.LinkSystem, opts Opt
321335
}
322336

323337
ls.TrustedStorage = true
324-
resumer, err := resumetraversal.WithTraversingLinksystem(&progress)
338+
pathState := opts.TraversalResumerPathState
339+
if pathState == nil {
340+
pathState = resumetraversal.NewTraversalResumerPathState()
341+
}
342+
resumer, err := resumetraversal.WithTraversingLinksystem(&progress, pathState)
325343
if err != nil {
326344
return err
327345
}

v2/traversal/resumption.go

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,42 @@ import (
1717
"github.com/ipld/go-ipld-prime/traversal"
1818
)
1919

20+
// TraverseResumer allows resuming a progress from a previously encountered path
21+
// in the selector.
22+
type TraverseResumer interface {
23+
RewindToPath(from datamodel.Path) error
24+
RewindToOffset(offset uint64) error
25+
Position() uint64
26+
}
27+
28+
// TraversalResumerPathState tracks a traversal state for the purpose of
29+
// building a CAR. For each block in the CAR it tracks the path to that block,
30+
// the Link of the block and where in the CAR the block is located.
31+
//
32+
// A TraversalResumerPathState shared across multiple traversals using the same
33+
// selector and DAG will yield the same state. This allows us to resume at
34+
// arbitrary points within in the DAG and load the minimal additional blocks
35+
// required to resume the traversal at that point.
36+
type TraversalResumerPathState interface {
37+
AddPath(path []datamodel.PathSegment, link datamodel.Link, atOffset uint64)
38+
GetLinks(root datamodel.Path) []datamodel.Link
39+
GetOffsetAfter(root datamodel.Path) (uint64, error)
40+
}
41+
2042
type pathNode struct {
2143
link datamodel.Link
2244
offset uint64
2345
children map[datamodel.PathSegment]*pathNode
2446
}
2547

48+
// NewTraversalResumerPathState creates a new TraversalResumerPathState.
49+
//
50+
// Note that the TraversalResumerPathState returned by this factory is not
51+
// thread-safe.
52+
func NewTraversalResumerPathState() TraversalResumerPathState {
53+
return newPath(nil, 0)
54+
}
55+
2656
func newPath(link datamodel.Link, at uint64) *pathNode {
2757
return &pathNode{
2858
link: link,
@@ -31,15 +61,15 @@ func newPath(link datamodel.Link, at uint64) *pathNode {
3161
}
3262
}
3363

34-
func (pn pathNode) addPath(p []datamodel.PathSegment, link datamodel.Link, at uint64) {
64+
func (pn pathNode) AddPath(p []datamodel.PathSegment, link datamodel.Link, atOffset uint64) {
3565
if len(p) == 0 {
3666
return
3767
}
3868
if _, ok := pn.children[p[0]]; !ok {
39-
child := newPath(link, at)
69+
child := newPath(link, atOffset)
4070
pn.children[p[0]] = child
4171
}
42-
pn.children[p[0]].addPath(p[1:], link, at)
72+
pn.children[p[0]].AddPath(p[1:], link, atOffset)
4373
}
4474

4575
func (pn pathNode) allLinks() []datamodel.Link {
@@ -57,7 +87,7 @@ func (pn pathNode) allLinks() []datamodel.Link {
5787
}
5888

5989
// getPaths returns reconstructed paths in the tree rooted at 'root'
60-
func (pn pathNode) getLinks(root datamodel.Path) []datamodel.Link {
90+
func (pn pathNode) GetLinks(root datamodel.Path) []datamodel.Link {
6191
segs := root.Segments()
6292
switch len(segs) {
6393
case 0:
@@ -80,12 +110,12 @@ func (pn pathNode) getLinks(root datamodel.Path) []datamodel.Link {
80110
// base case 2: not registered sub-path.
81111
return []datamodel.Link{}
82112
}
83-
return pn.children[next].getLinks(datamodel.NewPathNocopy(segs[1:]))
113+
return pn.children[next].GetLinks(datamodel.NewPathNocopy(segs[1:]))
84114
}
85115

86116
var errInvalid = fmt.Errorf("invalid path")
87117

88-
func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
118+
func (pn pathNode) GetOffsetAfter(root datamodel.Path) (uint64, error) {
89119
// we look for offset of next sibling.
90120
// if no next sibling recurse up the path segments until we find a next sibling.
91121
segs := root.Segments()
@@ -100,7 +130,7 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
100130
closest := chld.offset
101131
// try recursive path
102132
if len(segs) > 1 {
103-
co, err := chld.offsetAfter(datamodel.NewPathNocopy(segs[1:]))
133+
co, err := chld.GetOffsetAfter(datamodel.NewPathNocopy(segs[1:]))
104134
if err == nil {
105135
return co, err
106136
}
@@ -121,35 +151,28 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
121151
return 0, errInvalid
122152
}
123153

124-
// TraverseResumer allows resuming a progress from a previously encountered path in the selector.
125-
type TraverseResumer interface {
126-
RewindToPath(from datamodel.Path) error
127-
RewindToOffset(offset uint64) error
128-
Position() uint64
129-
}
130-
131154
type traversalState struct {
132155
wrappedLinksystem *linking.LinkSystem
133156
lsCounter *loader.Counter
134-
blockNumber int
135-
pathOrder map[int]datamodel.Path
136-
pathTree *pathNode
157+
pathTree TraversalResumerPathState
137158
rewindPathTarget *datamodel.Path
138159
rewindOffsetTarget uint64
139160
pendingBlockStart uint64 // on rewinds, we store where the counter was in order to know the length of the last read block.
140161
progress *traversal.Progress
141162
}
142163

164+
var _ TraverseResumer = (*traversalState)(nil)
165+
143166
func (ts *traversalState) RewindToPath(from datamodel.Path) error {
144167
if ts.progress == nil {
145168
return nil
146169
}
147170
// reset progress and traverse until target.
148171
ts.progress.SeenLinks = make(map[datamodel.Link]struct{})
149-
ts.blockNumber = 0
150172
ts.pendingBlockStart = ts.lsCounter.Size()
151173
ts.lsCounter.TotalRead = 0
152174
ts.rewindPathTarget = &from
175+
ts.rewindOffsetTarget = 0
153176
return nil
154177
}
155178

@@ -163,10 +186,10 @@ func (ts *traversalState) RewindToOffset(offset uint64) error {
163186
}
164187
// reset progress and traverse until target.
165188
ts.progress.SeenLinks = make(map[datamodel.Link]struct{})
166-
ts.blockNumber = 0
167189
ts.pendingBlockStart = ts.lsCounter.Size()
168190
ts.lsCounter.TotalRead = 0
169191
ts.rewindOffsetTarget = offset
192+
ts.rewindPathTarget = nil
170193
return nil
171194
}
172195

@@ -177,9 +200,7 @@ func (ts *traversalState) Position() uint64 {
177200
func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Reader, error) {
178201
// when not in replay mode, we track metadata
179202
if ts.rewindPathTarget == nil && ts.rewindOffsetTarget == 0 {
180-
ts.pathOrder[ts.blockNumber] = lc.LinkPath
181-
ts.pathTree.addPath(lc.LinkPath.Segments(), l, ts.lsCounter.Size())
182-
ts.blockNumber++
203+
ts.pathTree.AddPath(lc.LinkPath.Segments(), l, ts.lsCounter.Size())
183204
return ts.wrappedLinksystem.StorageReadOpener(lc, l)
184205
}
185206

@@ -205,12 +226,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
205226
break
206227
}
207228
if targetSegments[i].String() != s.String() {
208-
links := ts.pathTree.getLinks(datamodel.NewPathNocopy(seg[0 : i+1]))
229+
links := ts.pathTree.GetLinks(datamodel.NewPathNocopy(seg[0 : i+1]))
209230
for _, l := range links {
210231
ts.progress.SeenLinks[l] = struct{}{}
211232
}
212233
var err error
213-
ts.lsCounter.TotalRead, err = ts.pathTree.offsetAfter(datamodel.NewPathNocopy(seg[0 : i+1]))
234+
ts.lsCounter.TotalRead, err = ts.pathTree.GetOffsetAfter(datamodel.NewPathNocopy(seg[0 : i+1]))
214235
if err == errInvalid {
215236
ts.lsCounter.TotalRead = ts.pendingBlockStart
216237
} else if err != nil {
@@ -222,12 +243,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
222243
}
223244
}
224245
if ts.rewindOffsetTarget != 0 {
225-
links := ts.pathTree.getLinks(lc.LinkPath)
246+
links := ts.pathTree.GetLinks(lc.LinkPath)
226247
for _, l := range links {
227248
ts.progress.SeenLinks[l] = struct{}{}
228249
}
229250
var err error
230-
ts.lsCounter.TotalRead, err = ts.pathTree.offsetAfter(lc.LinkPath)
251+
ts.lsCounter.TotalRead, err = ts.pathTree.GetOffsetAfter(lc.LinkPath)
231252
if err == errInvalid {
232253
ts.lsCounter.TotalRead = ts.pendingBlockStart
233254
} else if err != nil {
@@ -243,13 +264,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
243264
// WithTraversingLinksystem extends a progress for traversal such that it can
244265
// subsequently resume and perform subsets of the walk efficiently from
245266
// an arbitrary position within the selector traversal.
246-
func WithTraversingLinksystem(p *traversal.Progress) (TraverseResumer, error) {
267+
func WithTraversingLinksystem(p *traversal.Progress, pathState TraversalResumerPathState) (TraverseResumer, error) {
247268
wls, ctr := loader.CountingLinkSystem(p.Cfg.LinkSystem)
248269
ts := &traversalState{
249270
wrappedLinksystem: &wls,
250271
lsCounter: ctr.(*loader.Counter),
251-
pathOrder: make(map[int]datamodel.Path),
252-
pathTree: newPath(nil, 0),
272+
pathTree: pathState,
253273
progress: p,
254274
}
255275
p.Cfg.LinkSystem.StorageReadOpener = ts.traverse

v2/traversal/resumption_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func TestWalkResumeByPath(t *testing.T) {
8989
LinkTargetNodePrototypeChooser: basicnode.Chooser,
9090
},
9191
}
92-
resumer, err := cartraversal.WithTraversingLinksystem(&p)
92+
resumer, err := cartraversal.WithTraversingLinksystem(&p, cartraversal.NewTraversalResumerPathState())
9393
if err != nil {
9494
t.Fatal(err)
9595
}
@@ -154,7 +154,7 @@ func TestWalkResumeByPathPartialWalk(t *testing.T) {
154154
LinkTargetNodePrototypeChooser: basicnode.Chooser,
155155
},
156156
}
157-
resumer, err := cartraversal.WithTraversingLinksystem(&p)
157+
resumer, err := cartraversal.WithTraversingLinksystem(&p, cartraversal.NewTraversalResumerPathState())
158158
if err != nil {
159159
t.Fatal(err)
160160
}
@@ -195,7 +195,7 @@ func TestWalkResumeByOffset(t *testing.T) {
195195
LinkTargetNodePrototypeChooser: basicnode.Chooser,
196196
},
197197
}
198-
resumer, err := cartraversal.WithTraversingLinksystem(&p)
198+
resumer, err := cartraversal.WithTraversingLinksystem(&p, cartraversal.NewTraversalResumerPathState())
199199
if err != nil {
200200
t.Fatal(err)
201201
}

0 commit comments

Comments
 (0)