feat: port blobindex package from go-libstoracha#14
Conversation
There was a problem hiding this comment.
Pull request overview
This PR ports a blobindex package into libforge, adding an in-memory sharded DAG index representation plus CAR-based archive/extract support using a CBOR/DAG-JSON datamodel (with generated codecs).
Changes:
- Add
blobindexAPIs (ShardedDagIndex,MapShardedDagIndex) and multihash-keyed map helpers. - Implement CAR decoding/encoding for sharded DAG indexes (
Extract,Archive) with deterministic output ordering. - Introduce the
blobindex/datamodelschema + generated CBOR/DAG-JSON codecs and add round-trip/determinism tests.
Reviewed changes
Copilot reviewed 7 out of 12 changed files in this pull request and generated 6 comments.
Show a summary per file
| File | Description |
|---|---|
| go.mod | Adds new direct/indirect dependencies needed for CAR + block/CID handling. |
| go.sum | Updates dependency lockfile for the newly introduced modules. |
| blobindex/types.go | Defines core blobindex interfaces/types (e.g., ShardedDagIndex, Range). |
| blobindex/multihashmap.go | Adds constructor helper for multihash-keyed maps. |
| blobindex/shardeddagindex.go | Implements CAR extract/archive + in-memory sharded DAG index implementation. |
| blobindex/shardeddagindex_test.go | Adds round-trip and determinism coverage for archive/extract behavior. |
| blobindex/datamodel/shardeddagindex.go | Defines the CBOR/DAG-JSON data model structs for the index format. |
| blobindex/datamodel/gen/main.go | Adds generator entrypoint for CBOR/DAG-JSON encoder/decoder generation. |
| blobindex/datamodel/cbor_gen.maps.go | Generated CBOR map encoders/decoders for the datamodel. |
| blobindex/datamodel/cbor_gen.tuples.go | Generated CBOR tuple encoders/decoders for the datamodel. |
| blobindex/datamodel/json_gen.maps.go | Generated DAG-JSON map encoders/decoders for the datamodel. |
| blobindex/datamodel/json_gen.tuples.go | Generated DAG-JSON tuple encoders/decoders for the datamodel. |
Files not reviewed (4)
- blobindex/datamodel/cbor_gen.maps.go: Language not supported
- blobindex/datamodel/cbor_gen.tuples.go: Language not supported
- blobindex/datamodel/json_gen.maps.go: Language not supported
- blobindex/datamodel/json_gen.tuples.go: Language not supported
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| b := buf.Bytes() | ||
| l, err := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256}.Sum(b) | ||
| if err != nil { | ||
| return nil, err | ||
| } |
| // BlobIndexModel is the golang structure for encoding a shard of CIDs in a block | ||
| type BlobIndexModel struct { | ||
| Digest multihash.Multihash | ||
| Slices []BlobSliceModel | ||
| } |
| slices.SortFunc(list, func(a, b E) int { | ||
| decodedA := decodeds.Get(getDigest(a)) | ||
| decodedB := decodeds.Get(getDigest(b)) | ||
| return bytes.Compare(decodedA.Digest, decodedB.Digest) |
| return fmt.Errorf(fmt.Sprintf("unknown format: %s", reason), args...) | ||
| } | ||
|
|
||
| // NewDecodeFailureError returns an error for a decode failure. | ||
| func NewDecodeFailureError(reason string, args ...any) error { | ||
| return fmt.Errorf(fmt.Sprintf("decode failure: %s", reason), args...) |
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
| func Archive(index ShardedDagIndex) (io.Reader, error) { | ||
| // assemble blob index shards | ||
| blobIndexDatas, err := toList(index.Shards(), func(shardHash mh.Multihash, shard MultihashMap[Range]) (dm.BlobIndexModel, error) { | ||
| // assemble blob slices | ||
| blobSliceDatas, err := toList(shard, func(sliceHash mh.Multihash, byteRange Range) (dm.BlobSliceModel, error) { | ||
| return dm.BlobSliceModel{Digest: sliceHash, Range: byteRange}, nil | ||
| }) | ||
| if err != nil { | ||
| return dm.BlobIndexModel{}, err | ||
| } | ||
| // sort blob slices | ||
| if err := sortByDigest(blobSliceDatas, func(bsm dm.BlobSliceModel) mh.Multihash { | ||
| return bsm.Digest | ||
| }); err != nil { | ||
| return dm.BlobIndexModel{}, err | ||
| } | ||
| return dm.BlobIndexModel{ | ||
| Digest: shardHash, | ||
| Slices: blobSliceDatas, | ||
| }, nil | ||
| }) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| // sort blob index shards | ||
| if err := sortByDigest(blobIndexDatas, func(bim dm.BlobIndexModel) mh.Multihash { | ||
| return bim.Digest | ||
| }); err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| // initialize root sharded dag index | ||
| shardedDagIndex := dm.ShardedDagIndexModel_0_1{ | ||
| Shards: make([]cid.Cid, 0, len(blobIndexDatas)), | ||
| } | ||
| // encode blob index shards to blocks and add links to sharded dag index | ||
| blks := make([]blocks.Block, 0, len(blobIndexDatas)+1) | ||
| for _, shard := range blobIndexDatas { | ||
| var buf bytes.Buffer | ||
| err := shard.MarshalCBOR(&buf) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| b := buf.Bytes() | ||
| l, err := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256}.Sum(b) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| blk, err := blocks.NewBlockWithCid(b, l) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| blks = append(blks, blk) | ||
| shardedDagIndex.Shards = append(shardedDagIndex.Shards, l) | ||
| } | ||
|
|
||
| // encode the root block | ||
| model := dm.ShardedDagIndexModel{DagO_1: &shardedDagIndex} | ||
| var rootData bytes.Buffer | ||
| if err := model.MarshalCBOR(&rootData); err != nil { | ||
| return nil, err | ||
| } | ||
| root, err := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256}.Sum(rootData.Bytes()) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| rootBlock, err := blocks.NewBlockWithCid(rootData.Bytes(), root) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| reader, writer := io.Pipe() | ||
| go func() { | ||
| err := car.WriteHeader(&car.CarHeader{Roots: []cid.Cid{root}, Version: 1}, writer) | ||
| if err != nil { | ||
| writer.CloseWithError(fmt.Errorf("writing CAR header: %w", err)) | ||
| return | ||
| } | ||
| for _, block := range append(blks, rootBlock) { | ||
| err = util.LdWrite(writer, block.Cid().Bytes(), block.RawData()) | ||
| if err != nil { | ||
| writer.CloseWithError(fmt.Errorf("writing CAR blocks: %w", err)) | ||
| return | ||
| } | ||
| } | ||
| writer.Close() | ||
| }() | ||
| return reader, nil | ||
| } |
There was a problem hiding this comment.
I'd really like to remove this go routine at the end, sorry this comment has to land here I realize you're just porting things. Could we instead flip the signature?:
func Archive(index ShardedDagIndex, w io.Writer) errorCallers that want streaming pass a pipe writer (and run the goroutine on their side), or if they want a buffer pass a bytes.Buffer. Ditto that for files, and http.ResponseWriters
Then we could wrap this like:
func ArchiveReader(index ShardedDagIndex) io.Reader {
r, w := io.Pipe()
go func() {
w.CloseWithError(Archive(index, w))
}()
return r
}to enable the current style if we need it.
There was a problem hiding this comment.
Yep, done. Also switched to new CAR library that doesn't bring in the whole of boxo and benchmarks seem to suggest it's faster than ipld/go-car and makes fewer allocations.
I'm not super happy that
go-carpulls in boxo, maybe I'll write a simple CAR reader/writer for Go at some point.