Skip to content

Commit da38805

Browse files
committed
refactor + fix + tests
1 parent ff8ec8c commit da38805

File tree

2 files changed

+333
-67
lines changed

2 files changed

+333
-67
lines changed

x/fibre/blob.go

Lines changed: 151 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,19 @@ import (
55
"encoding/hex"
66
"errors"
77
"fmt"
8-
"math"
98
"runtime"
109

1110
"github.com/celestiaorg/rsema1d"
1211
"github.com/celestiaorg/rsema1d/field"
1312
)
1413

15-
// ErrBlobTooLarge is returned when the blob size exceeds MaxBlobSize.
16-
var ErrBlobTooLarge = errors.New("blob size exceeds maximum allowed size")
14+
var (
15+
// ErrBlobTooLarge is returned when the blob size exceeds MaxBlobSize.
16+
ErrBlobTooLarge = errors.New("blob size exceeds maximum allowed size")
17+
)
1718

18-
// Commitment is a commitment to fibre [Blob].
19-
// TODO(@Wondertan): merge with rsema1d.Commitment and move these methods.
19+
// Commitment is a commitment to a blob.
20+
// TODO(@Wondertan): merge with rsema1d.Commitment once it has these methods.
2021
type Commitment rsema1d.Commitment
2122

2223
// UnmarshalBinary decodes a [Commitment] from bytes.
@@ -47,7 +48,7 @@ type BlobConfig struct {
4748
// MaxBlobSize is the maximum allowed blob size.
4849
MaxBlobSize int
4950
// BlobVersion is the version of the row format.
50-
BlobVersion uint32
51+
BlobVersion uint8
5152
// CodingWorkers is the number of workers to use for encoding and decoding rsema1d.
5253
CodingWorkers int
5354
}
@@ -72,32 +73,34 @@ type Blob struct {
7273
commitment Commitment
7374
rlcOrig []field.GF128
7475

75-
// rows holds the shards for both encoded data and reconstruction.
76-
rows [][]byte
76+
// holds meta fields about the blob
77+
header blobHeaderV0
78+
// data holds the decoded original data (without header).
79+
data []byte
7780
}
7881

79-
// NewBlob creates a new [Blob] instance by encoding the original data.
80-
// It takes the original data and a [BlobConfig].
81-
// The data is prefixed with a header containing the blob version and original data size.
82-
func NewBlob(originalData []byte, cfg BlobConfig) (d *Blob, err error) {
83-
if len(originalData) == 0 {
82+
// NewBlob creates a new [Blob] instance by encoding the data.
83+
// It takes the data and a [BlobConfig].
84+
// The data is prefixed with a header containing the blob version and data size.
85+
func NewBlob(data []byte, cfg BlobConfig) (d *Blob, err error) {
86+
if len(data) == 0 {
8487
return nil, fmt.Errorf("data cannot be empty")
8588
}
86-
if len(originalData) > cfg.MaxBlobSize {
87-
return nil, fmt.Errorf("%w: data size %d exceeds maximum %d", ErrBlobTooLarge, len(originalData), cfg.MaxBlobSize)
89+
if len(data) > cfg.MaxBlobSize {
90+
return nil, fmt.Errorf("%w: data size %d exceeds maximum %d", ErrBlobTooLarge, len(data), cfg.MaxBlobSize)
8891
}
8992

9093
d = &Blob{
91-
cfg: cfg,
94+
cfg: cfg,
95+
header: newBlobHeaderV0(len(data)),
96+
data: data,
9297
}
9398

94-
rowSize := d.calculateRowSize(len(originalData))
95-
d.rows = d.splitIntoRows(originalData, rowSize)
96-
97-
d.extendedData, d.commitment, d.rlcOrig, err = rsema1d.Encode(d.rows, &rsema1d.Config{
99+
rows := d.header.encodeToRows(data, cfg)
100+
d.extendedData, d.commitment, d.rlcOrig, err = rsema1d.Encode(rows, &rsema1d.Config{
98101
K: cfg.OriginalRows,
99102
N: cfg.ParityRows,
100-
RowSize: rowSize,
103+
RowSize: len(rows[0]),
101104
WorkerCount: cfg.CodingWorkers,
102105
})
103106
if err != nil {
@@ -109,7 +112,7 @@ func NewBlob(originalData []byte, cfg BlobConfig) (d *Blob, err error) {
109112

110113
// Commitment returns the commitment to the blob.
111114
func (d *Blob) Commitment() Commitment {
112-
return d.commitment
115+
return Commitment(d.commitment)
113116
}
114117

115118
// RLCOrig returns the original RLC coefficients.
@@ -118,30 +121,23 @@ func (d *Blob) RLCOrig() []field.GF128 {
118121
}
119122

120123
// RowSize returns the size of each row in bytes.
124+
// Returns 0 if no original data available to determine row size.
121125
func (d *Blob) RowSize() int {
122-
if len(d.rows) > 0 && len(d.rows[0]) > 0 {
123-
return len(d.rows[0])
126+
if len(d.data) == 0 {
127+
return 0
124128
}
125-
return 0
129+
130+
return d.header.calculateRowSize(len(d.data), d.cfg)
126131
}
127132

128133
// DataSize returns the size of the original data (without header) by reading from the blob header.
129-
// Returns 0 if the header cannot be read.
134+
// Returns 0 if no original data available to determine its size.
130135
func (d *Blob) DataSize() int {
131-
if len(d.rows) == 0 {
132-
return 0
133-
}
134-
135-
// extract size from header (first row)
136-
if len(d.rows[0]) < blobHeaderSize {
137-
return 0
138-
}
139-
140-
return int(binary.BigEndian.Uint32(d.rows[0][math.MaxUint32 : math.MaxUint32*2]))
136+
return len(d.data)
141137
}
142138

143139
// Size returns the total size of the blob including the header overhead.
144-
// Returns 0 if the size cannot be determined.
140+
// Returns 0 if no original data available to determine blob size.
145141
func (d *Blob) Size() int {
146142
dataSize := d.DataSize()
147143
if dataSize == 0 {
@@ -150,6 +146,12 @@ func (d *Blob) Size() int {
150146
return blobHeaderSize + dataSize
151147
}
152148

149+
// Data returns the cached original data (without header).
150+
// Returns nil if the data has not been decoded yet (call Reconstruct first for received blobs).
151+
func (d *Blob) Data() []byte {
152+
return d.data
153+
}
154+
153155
// Row returns the [rsema1d.RowProof] for the given index from the extended data.
154156
func (d *Blob) Row(index int) (*rsema1d.RowProof, error) {
155157
if d.extendedData == nil {
@@ -160,66 +162,148 @@ func (d *Blob) Row(index int) (*rsema1d.RowProof, error) {
160162
}
161163

162164
const (
165+
// uint32SizeBytes is the size of a uint32 in bytes.
166+
uint32SizeBytes = 4
167+
// uint8SizeBytes is the size of a uint8 in bytes.
168+
uint8SizeBytes = 1
163169
// blobHeaderSize is the size of the blob header in bytes.
164-
// Format: 4 bytes version (uint32) + 4 bytes blob size (uint32)
165-
blobHeaderSize = math.MaxUint32 + math.MaxUint32
170+
// Format: 1 byte version (uint8) + 4 bytes blob size (uint32)
171+
blobHeaderSize = uint8SizeBytes + uint32SizeBytes
166172
)
167173

168-
// calculateRowSize computes the row size for a given data length.
169-
// Row size must be a multiple of RowSizeMin and is calculated as:
170-
// ceil(dataLen / OriginalRows) rounded up to the nearest multiple of RowSizeMin.
171-
// Prepends blob header size automatically.
172-
func (d *Blob) calculateRowSize(dataLen int) int {
173-
dataLen += blobHeaderSize
174-
// calculate minimum row size needed
175-
minRowSize := (dataLen + d.cfg.OriginalRows - 1) / d.cfg.OriginalRows // ceil(dataLen / OriginalRows)
174+
// blobHeaderV0 represents the version 0 blob header at the start of the first row.
175+
// Format: 1 byte version (uint8, always 0) + 4 bytes data size (uint32)
176+
type blobHeaderV0 struct {
177+
dataSize uint32
178+
}
176179

177-
// round up to nearest multiple of RowSizeMin
178-
if minRowSize%d.cfg.RowSizeMin != 0 {
179-
minRowSize = ((minRowSize / d.cfg.RowSizeMin) + 1) * d.cfg.RowSizeMin
180+
// newBlobHeaderV0 creates a new version 0 blob header with the given data size.
181+
// The version field is implicitly 0 for this header type.
182+
func newBlobHeaderV0(dataSize int) blobHeaderV0 {
183+
return blobHeaderV0{
184+
dataSize: uint32(dataSize),
180185
}
181-
182-
return minRowSize
183186
}
184187

185-
// splitIntoRows splits data into a 2D byte slice where each slice is row data.
186-
// The first row is prefixed with a header containing the blob version and original data size,
187-
// avoiding a full data copy. Returns OriginalRows rows of rowSize bytes each, padding with zeros as needed.
188-
// Uses slices from the original data when possible, only allocating for the header row and padding.
189-
func (d *Blob) splitIntoRows(data []byte, rowSize int) [][]byte {
190-
rows := make([][]byte, d.cfg.OriginalRows)
188+
// encodeToRows encodes the data into rows with version 0 header format.
189+
// Returns OriginalRows rows of calculated rowSize bytes each, padding with zeros as needed.
190+
// The first row contains the header followed by data.
191+
func (h blobHeaderV0) encodeToRows(data []byte, cfg BlobConfig) [][]byte {
192+
rowSize := h.calculateRowSize(len(data), cfg)
193+
rows := make([][]byte, cfg.OriginalRows)
191194

192-
// first row: allocate and write header + beginning of data
195+
// First row: allocate and write header + beginning of data
193196
rows[0] = make([]byte, rowSize)
194-
binary.BigEndian.PutUint32(rows[0][0:math.MaxUint32], d.cfg.BlobVersion)
195-
binary.BigEndian.PutUint32(rows[0][math.MaxUint32:math.MaxUint32*2], uint32(len(data)))
197+
h.encode(rows[0])
196198

197-
// copy as much data as fits in the first row after the header
199+
// Copy as much data as fits in the first row after the header
198200
firstRowDataSize := rowSize - blobHeaderSize
199201
if firstRowDataSize > len(data) {
200202
firstRowDataSize = len(data)
201203
}
202204
copy(rows[0][blobHeaderSize:], data[:firstRowDataSize])
203205

204-
// remaining rows: use slices from data (offset by what we already used)
206+
// Remaining rows: use slices from data (offset by what we already used)
205207
dataOffset := firstRowDataSize
206-
for i := 1; i < d.cfg.OriginalRows; i++ {
208+
for i := 1; i < cfg.OriginalRows; i++ {
207209
start := dataOffset
208210
end := start + rowSize
209211
dataOffset += rowSize
210212

211213
if end <= len(data) {
212-
// full row available in data - use slice directly
214+
// Full row available in data - use slice directly
213215
rows[i] = data[start:end:end]
214216
continue
215217
}
216-
// some or no data left - allocate zero-filled padded row
218+
// Some or no data left - allocate zero-filled padded row
217219
rows[i] = make([]byte, rowSize)
218220
if start < len(data) {
219-
// partial row - insert the remaining data into the row
221+
// Partial row - insert the remaining data into the row
220222
copy(rows[i], data[start:])
221223
}
222224
}
223225

224226
return rows
225227
}
228+
229+
// decodeFromRows decodes the data from rows with version 0 header format.
230+
// Decodes the header from the first row, validates it, then extracts the original data.
231+
// Returns error if rows are invalid, header cannot be decoded, or data cannot be extracted.
232+
func (h *blobHeaderV0) decodeFromRows(rows [][]byte, cfg BlobConfig) ([]byte, error) {
233+
if len(rows) == 0 {
234+
return nil, fmt.Errorf("no rows to decode")
235+
}
236+
237+
if len(rows[0]) < blobHeaderSize {
238+
return nil, fmt.Errorf("first row too small: need at least %d bytes for header, got %d", blobHeaderSize, len(rows[0]))
239+
}
240+
241+
// decode header from first row
242+
if err := h.decode(rows[0]); err != nil {
243+
return nil, fmt.Errorf("decoding header: %w", err)
244+
}
245+
246+
size := blobHeaderSize + int(h.dataSize)
247+
248+
// Pre-allocate the exact size needed
249+
data := make([]byte, size)
250+
251+
// Copy data from rows into the pre-allocated buffer
252+
copied := 0
253+
for i := 0; i < cfg.OriginalRows && copied < size; i++ {
254+
rowData := rows[i]
255+
if len(rowData) == 0 {
256+
continue
257+
}
258+
259+
// Determine how much to copy from this row
260+
toCopy := len(rowData)
261+
if copied+toCopy > size {
262+
toCopy = size - copied
263+
}
264+
265+
copy(data[copied:], rowData[:toCopy])
266+
copied += toCopy
267+
}
268+
269+
if copied < size {
270+
return nil, fmt.Errorf("not enough data in rows: copied %d bytes, need %d", copied, size)
271+
}
272+
273+
// Return original data without header
274+
return data[blobHeaderSize:size:size], nil
275+
}
276+
277+
// calculateRowSize computes the row size for the given data length and config.
278+
// Row size is calculated as ceil((dataLen + headerSize) / OriginalRows),
279+
// rounded up to the nearest multiple of RowSizeMin.
280+
func (h blobHeaderV0) calculateRowSize(dataLen int, cfg BlobConfig) int {
281+
totalLen := dataLen + blobHeaderSize
282+
minRowSize := (totalLen + cfg.OriginalRows - 1) / cfg.OriginalRows // ceil(totalLen / OriginalRows)
283+
284+
// Round up to nearest multiple of RowSizeMin
285+
if minRowSize%cfg.RowSizeMin != 0 {
286+
minRowSize = ((minRowSize / cfg.RowSizeMin) + 1) * cfg.RowSizeMin
287+
}
288+
289+
return minRowSize
290+
}
291+
292+
// encode writes the version 0 blob header into the provided buffer.
293+
// The buffer must be at least blobHeaderSize bytes long.
294+
// Always writes version byte as 0.
295+
func (h blobHeaderV0) encode(buf []byte) {
296+
buf[0] = 0 // version 0
297+
binary.BigEndian.PutUint32(buf[uint8SizeBytes:blobHeaderSize], h.dataSize)
298+
}
299+
300+
// decode reads the blob header from the provided buffer.
301+
// The buffer must be at least blobHeaderSize bytes long.
302+
// Returns an error if the version byte is not 0.
303+
func (h *blobHeaderV0) decode(buf []byte) error {
304+
if buf[0] != 0 {
305+
return fmt.Errorf("invalid blob version: expected 0, got %d", buf[0])
306+
}
307+
h.dataSize = binary.BigEndian.Uint32(buf[uint8SizeBytes:blobHeaderSize])
308+
return nil
309+
}

0 commit comments

Comments
 (0)