@@ -5,18 +5,19 @@ import (
55 "encoding/hex"
66 "errors"
77 "fmt"
8- "math"
98 "runtime"
109
1110 "github.com/celestiaorg/rsema1d"
1211 "github.com/celestiaorg/rsema1d/field"
1312)
1413
15- // ErrBlobTooLarge is returned when the blob size exceeds MaxBlobSize.
16- var ErrBlobTooLarge = errors .New ("blob size exceeds maximum allowed size" )
14+ var (
15+ // ErrBlobTooLarge is returned when the blob size exceeds MaxBlobSize.
16+ ErrBlobTooLarge = errors .New ("blob size exceeds maximum allowed size" )
17+ )
1718
18- // Commitment is a commitment to fibre [Blob] .
19- // TODO(@Wondertan): merge with rsema1d.Commitment and move these methods.
19+ // Commitment is a commitment to a blob .
20+ // TODO(@Wondertan): merge with rsema1d.Commitment once it has these methods.
2021type Commitment rsema1d.Commitment
2122
2223// UnmarshalBinary decodes a [Commitment] from bytes.
@@ -47,7 +48,7 @@ type BlobConfig struct {
4748 // MaxBlobSize is the maximum allowed blob size.
4849 MaxBlobSize int
4950 // BlobVersion is the version of the row format.
50- BlobVersion uint32
51+ BlobVersion uint8
5152 // CodingWorkers is the number of workers to use for encoding and decoding rsema1d.
5253 CodingWorkers int
5354}
@@ -72,32 +73,34 @@ type Blob struct {
7273 commitment Commitment
7374 rlcOrig []field.GF128
7475
75- // rows holds the shards for both encoded data and reconstruction.
76- rows [][]byte
76+ // holds meta fields about the blob
77+ header blobHeaderV0
78+ // data holds the decoded original data (without header).
79+ data []byte
7780}
7881
79- // NewBlob creates a new [Blob] instance by encoding the original data.
80- // It takes the original data and a [BlobConfig].
81- // The data is prefixed with a header containing the blob version and original data size.
82- func NewBlob (originalData []byte , cfg BlobConfig ) (d * Blob , err error ) {
83- if len (originalData ) == 0 {
82+ // NewBlob creates a new [Blob] instance by encoding the data.
83+ // It takes the data and a [BlobConfig].
84+ // The data is prefixed with a header containing the blob version and data size.
85+ func NewBlob (data []byte , cfg BlobConfig ) (d * Blob , err error ) {
86+ if len (data ) == 0 {
8487 return nil , fmt .Errorf ("data cannot be empty" )
8588 }
86- if len (originalData ) > cfg .MaxBlobSize {
87- return nil , fmt .Errorf ("%w: data size %d exceeds maximum %d" , ErrBlobTooLarge , len (originalData ), cfg .MaxBlobSize )
89+ if len (data ) > cfg .MaxBlobSize {
90+ return nil , fmt .Errorf ("%w: data size %d exceeds maximum %d" , ErrBlobTooLarge , len (data ), cfg .MaxBlobSize )
8891 }
8992
9093 d = & Blob {
91- cfg : cfg ,
94+ cfg : cfg ,
95+ header : newBlobHeaderV0 (len (data )),
96+ data : data ,
9297 }
9398
94- rowSize := d .calculateRowSize (len (originalData ))
95- d .rows = d .splitIntoRows (originalData , rowSize )
96-
97- d .extendedData , d .commitment , d .rlcOrig , err = rsema1d .Encode (d .rows , & rsema1d.Config {
99+ rows := d .header .encodeToRows (data , cfg )
100+ d .extendedData , d .commitment , d .rlcOrig , err = rsema1d .Encode (rows , & rsema1d.Config {
98101 K : cfg .OriginalRows ,
99102 N : cfg .ParityRows ,
100- RowSize : rowSize ,
103+ RowSize : len ( rows [ 0 ]) ,
101104 WorkerCount : cfg .CodingWorkers ,
102105 })
103106 if err != nil {
@@ -109,7 +112,7 @@ func NewBlob(originalData []byte, cfg BlobConfig) (d *Blob, err error) {
109112
110113// Commitment returns the commitment to the blob.
111114func (d * Blob ) Commitment () Commitment {
112- return d .commitment
115+ return Commitment ( d .commitment )
113116}
114117
115118// RLCOrig returns the original RLC coefficients.
@@ -118,30 +121,23 @@ func (d *Blob) RLCOrig() []field.GF128 {
118121}
119122
120123// RowSize returns the size of each row in bytes.
124+ // Returns 0 if no original data available to determine row size.
121125func (d * Blob ) RowSize () int {
122- if len (d .rows ) > 0 && len ( d . rows [ 0 ]) > 0 {
123- return len ( d . rows [ 0 ])
126+ if len (d .data ) == 0 {
127+ return 0
124128 }
125- return 0
129+
130+ return d .header .calculateRowSize (len (d .data ), d .cfg )
126131}
127132
128133// DataSize returns the size of the original data (without header) by reading from the blob header.
129- // Returns 0 if the header cannot be read .
134+ // Returns 0 if no original data available to determine its size .
130135func (d * Blob ) DataSize () int {
131- if len (d .rows ) == 0 {
132- return 0
133- }
134-
135- // extract size from header (first row)
136- if len (d .rows [0 ]) < blobHeaderSize {
137- return 0
138- }
139-
140- return int (binary .BigEndian .Uint32 (d .rows [0 ][math .MaxUint32 : math .MaxUint32 * 2 ]))
136+ return len (d .data )
141137}
142138
143139// Size returns the total size of the blob including the header overhead.
144- // Returns 0 if the size cannot be determined .
140+ // Returns 0 if no original data available to determine blob size .
145141func (d * Blob ) Size () int {
146142 dataSize := d .DataSize ()
147143 if dataSize == 0 {
@@ -150,6 +146,12 @@ func (d *Blob) Size() int {
150146 return blobHeaderSize + dataSize
151147}
152148
149+ // Data returns the cached original data (without header).
150+ // Returns nil if the data has not been decoded yet (call Reconstruct first for received blobs).
151+ func (d * Blob ) Data () []byte {
152+ return d .data
153+ }
154+
153155// Row returns the [rsema1d.RowProof] for the given index from the extended data.
154156func (d * Blob ) Row (index int ) (* rsema1d.RowProof , error ) {
155157 if d .extendedData == nil {
@@ -160,66 +162,148 @@ func (d *Blob) Row(index int) (*rsema1d.RowProof, error) {
160162}
161163
162164const (
165+ // uint32SizeBytes is the size of a uint32 in bytes.
166+ uint32SizeBytes = 4
167+ // uint8SizeBytes is the size of a uint8 in bytes.
168+ uint8SizeBytes = 1
163169 // blobHeaderSize is the size of the blob header in bytes.
164- // Format: 4 bytes version (uint32 ) + 4 bytes blob size (uint32)
165- blobHeaderSize = math . MaxUint32 + math . MaxUint32
170+ // Format: 1 byte version (uint8 ) + 4 bytes blob size (uint32)
171+ blobHeaderSize = uint8SizeBytes + uint32SizeBytes
166172)
167173
168- // calculateRowSize computes the row size for a given data length.
169- // Row size must be a multiple of RowSizeMin and is calculated as:
170- // ceil(dataLen / OriginalRows) rounded up to the nearest multiple of RowSizeMin.
171- // Prepends blob header size automatically.
172- func (d * Blob ) calculateRowSize (dataLen int ) int {
173- dataLen += blobHeaderSize
174- // calculate minimum row size needed
175- minRowSize := (dataLen + d .cfg .OriginalRows - 1 ) / d .cfg .OriginalRows // ceil(dataLen / OriginalRows)
174+ // blobHeaderV0 represents the version 0 blob header at the start of the first row.
175+ // Format: 1 byte version (uint8, always 0) + 4 bytes data size (uint32)
176+ type blobHeaderV0 struct {
177+ dataSize uint32
178+ }
176179
177- // round up to nearest multiple of RowSizeMin
178- if minRowSize % d .cfg .RowSizeMin != 0 {
179- minRowSize = ((minRowSize / d .cfg .RowSizeMin ) + 1 ) * d .cfg .RowSizeMin
180+ // newBlobHeaderV0 creates a new version 0 blob header with the given data size.
181+ // The version field is implicitly 0 for this header type.
182+ func newBlobHeaderV0 (dataSize int ) blobHeaderV0 {
183+ return blobHeaderV0 {
184+ dataSize : uint32 (dataSize ),
180185 }
181-
182- return minRowSize
183186}
184187
185- // splitIntoRows splits data into a 2D byte slice where each slice is row data .
186- // The first row is prefixed with a header containing the blob version and original data size,
187- // avoiding a full data copy. Returns OriginalRows rows of rowSize bytes each, padding with zeros as needed .
188- // Uses slices from the original data when possible, only allocating for the header row and padding.
189- func ( d * Blob ) splitIntoRows ( data [] byte , rowSize int ) [][] byte {
190- rows := make ([][]byte , d . cfg .OriginalRows )
188+ // encodeToRows encodes the data into rows with version 0 header format .
189+ // Returns OriginalRows rows of calculated rowSize bytes each, padding with zeros as needed.
190+ // The first row contains the header followed by data .
191+ func ( h blobHeaderV0 ) encodeToRows ( data [] byte , cfg BlobConfig ) [][] byte {
192+ rowSize := h . calculateRowSize ( len ( data ), cfg )
193+ rows := make ([][]byte , cfg .OriginalRows )
191194
192- // first row: allocate and write header + beginning of data
195+ // First row: allocate and write header + beginning of data
193196 rows [0 ] = make ([]byte , rowSize )
194- binary .BigEndian .PutUint32 (rows [0 ][0 :math .MaxUint32 ], d .cfg .BlobVersion )
195- binary .BigEndian .PutUint32 (rows [0 ][math .MaxUint32 :math .MaxUint32 * 2 ], uint32 (len (data )))
197+ h .encode (rows [0 ])
196198
197- // copy as much data as fits in the first row after the header
199+ // Copy as much data as fits in the first row after the header
198200 firstRowDataSize := rowSize - blobHeaderSize
199201 if firstRowDataSize > len (data ) {
200202 firstRowDataSize = len (data )
201203 }
202204 copy (rows [0 ][blobHeaderSize :], data [:firstRowDataSize ])
203205
204- // remaining rows: use slices from data (offset by what we already used)
206+ // Remaining rows: use slices from data (offset by what we already used)
205207 dataOffset := firstRowDataSize
206- for i := 1 ; i < d . cfg .OriginalRows ; i ++ {
208+ for i := 1 ; i < cfg .OriginalRows ; i ++ {
207209 start := dataOffset
208210 end := start + rowSize
209211 dataOffset += rowSize
210212
211213 if end <= len (data ) {
212- // full row available in data - use slice directly
214+ // Full row available in data - use slice directly
213215 rows [i ] = data [start :end :end ]
214216 continue
215217 }
216- // some or no data left - allocate zero-filled padded row
218+ // Some or no data left - allocate zero-filled padded row
217219 rows [i ] = make ([]byte , rowSize )
218220 if start < len (data ) {
219- // partial row - insert the remaining data into the row
221+ // Partial row - insert the remaining data into the row
220222 copy (rows [i ], data [start :])
221223 }
222224 }
223225
224226 return rows
225227}
228+
229+ // decodeFromRows decodes the data from rows with version 0 header format.
230+ // Decodes the header from the first row, validates it, then extracts the original data.
231+ // Returns error if rows are invalid, header cannot be decoded, or data cannot be extracted.
232+ func (h * blobHeaderV0 ) decodeFromRows (rows [][]byte , cfg BlobConfig ) ([]byte , error ) {
233+ if len (rows ) == 0 {
234+ return nil , fmt .Errorf ("no rows to decode" )
235+ }
236+
237+ if len (rows [0 ]) < blobHeaderSize {
238+ return nil , fmt .Errorf ("first row too small: need at least %d bytes for header, got %d" , blobHeaderSize , len (rows [0 ]))
239+ }
240+
241+ // decode header from first row
242+ if err := h .decode (rows [0 ]); err != nil {
243+ return nil , fmt .Errorf ("decoding header: %w" , err )
244+ }
245+
246+ size := blobHeaderSize + int (h .dataSize )
247+
248+ // Pre-allocate the exact size needed
249+ data := make ([]byte , size )
250+
251+ // Copy data from rows into the pre-allocated buffer
252+ copied := 0
253+ for i := 0 ; i < cfg .OriginalRows && copied < size ; i ++ {
254+ rowData := rows [i ]
255+ if len (rowData ) == 0 {
256+ continue
257+ }
258+
259+ // Determine how much to copy from this row
260+ toCopy := len (rowData )
261+ if copied + toCopy > size {
262+ toCopy = size - copied
263+ }
264+
265+ copy (data [copied :], rowData [:toCopy ])
266+ copied += toCopy
267+ }
268+
269+ if copied < size {
270+ return nil , fmt .Errorf ("not enough data in rows: copied %d bytes, need %d" , copied , size )
271+ }
272+
273+ // Return original data without header
274+ return data [blobHeaderSize :size :size ], nil
275+ }
276+
277+ // calculateRowSize computes the row size for the given data length and config.
278+ // Row size is calculated as ceil((dataLen + headerSize) / OriginalRows),
279+ // rounded up to the nearest multiple of RowSizeMin.
280+ func (h blobHeaderV0 ) calculateRowSize (dataLen int , cfg BlobConfig ) int {
281+ totalLen := dataLen + blobHeaderSize
282+ minRowSize := (totalLen + cfg .OriginalRows - 1 ) / cfg .OriginalRows // ceil(totalLen / OriginalRows)
283+
284+ // Round up to nearest multiple of RowSizeMin
285+ if minRowSize % cfg .RowSizeMin != 0 {
286+ minRowSize = ((minRowSize / cfg .RowSizeMin ) + 1 ) * cfg .RowSizeMin
287+ }
288+
289+ return minRowSize
290+ }
291+
292+ // encode writes the version 0 blob header into the provided buffer.
293+ // The buffer must be at least blobHeaderSize bytes long.
294+ // Always writes version byte as 0.
295+ func (h blobHeaderV0 ) encode (buf []byte ) {
296+ buf [0 ] = 0 // version 0
297+ binary .BigEndian .PutUint32 (buf [uint8SizeBytes :blobHeaderSize ], h .dataSize )
298+ }
299+
300+ // decode reads the blob header from the provided buffer.
301+ // The buffer must be at least blobHeaderSize bytes long.
302+ // Returns an error if the version byte is not 0.
303+ func (h * blobHeaderV0 ) decode (buf []byte ) error {
304+ if buf [0 ] != 0 {
305+ return fmt .Errorf ("invalid blob version: expected 0, got %d" , buf [0 ])
306+ }
307+ h .dataSize = binary .BigEndian .Uint32 (buf [uint8SizeBytes :blobHeaderSize ])
308+ return nil
309+ }
0 commit comments