Skip to content

Commit eb403e1

Browse files
authored
minor improvements to file scanners (#22)
1 parent fb950ba commit eb403e1

File tree

8 files changed

+51
-114
lines changed

8 files changed

+51
-114
lines changed

internal/format/au.go

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,19 @@ func ScanSunAudio(r *Reader) (*ScanResult, error) {
6363
return nil, fmt.Errorf("failed to read AU header: %w", err)
6464
}
6565

66-
// 1. Check Magic Number (Big Endian)
66+
// Check Magic Number (Big Endian)
6767
magic := binary.BigEndian.Uint32(headerBuf[0:4])
6868
if magic != AU_MAGIC {
6969
return nil, fmt.Errorf("reader does not start with AU magic signature")
7070
}
7171

72-
// 2. Read Header Size (Big Endian)
72+
// Read Header Size (Big Endian)
7373
headerSize := binary.BigEndian.Uint32(headerBuf[4:8])
7474
if headerSize < MIN_AU_HEADER_SIZE {
7575
return nil, fmt.Errorf("AU header size (%d) is invalid", headerSize)
7676
}
7777

78-
// 3. Read Data Size (Big Endian)
78+
// Read Data Size (Big Endian)
7979
dataSize := binary.BigEndian.Uint32(headerBuf[8:12])
8080

8181
bytesRead := uint64(MIN_AU_HEADER_SIZE)
@@ -97,32 +97,24 @@ func ScanSunAudio(r *Reader) (*ScanResult, error) {
9797

9898
var totalAUSize uint64
9999
if dataSize == AU_DATA_SIZE_UNKNOWN {
100-
// Data extends to the end of the file.
101-
// We've read the header, so the remaining size is what's left in the reader.
102-
// Since we don't know the full length, we'll return the bytes read so far.
103-
// The caller would typically read until EOF for the data.
104-
return &ScanResult{Size: bytesRead}, nil // Indicate that the valid part up to the header is found
105-
} else {
106-
// Data size is explicitly defined.
107-
totalAUSize = uint64(headerSize) + uint64(dataSize)
108-
109-
// We need to advance the reader past the data chunk for the returned bytesRead to be accurate.
110-
// Calculate how many bytes of data are yet to be read.
111-
dataBytesToRead := int64(totalAUSize - bytesRead)
112-
113-
if dataBytesToRead > 0 { // Only skip if there's data left to read
114-
skipped, err := io.CopyN(io.Discard, r, dataBytesToRead)
115-
if err != nil {
116-
if err == io.EOF && skipped < dataBytesToRead {
117-
// Data chunk is truncated. The valid AU ends here.
118-
return &ScanResult{Size: bytesRead + uint64(skipped)}, nil
119-
}
120-
return nil, fmt.Errorf("failed to skip AU data: %w", err)
100+
// Data size is not explicitly defined. Return an error for now.
101+
return &ScanResult{Size: bytesRead}, fmt.Errorf("unknown AU file size")
102+
}
103+
104+
totalAUSize = uint64(headerSize) + uint64(dataSize)
105+
106+
bytesToSkip := int(totalAUSize - bytesRead)
107+
108+
if bytesToSkip > 0 {
109+
skipped, err := r.Discard(bytesToSkip)
110+
if err != nil {
111+
if err == io.EOF && skipped < bytesToSkip {
112+
// Data chunk is truncated. The valid AU ends here.
113+
return &ScanResult{Size: bytesRead + uint64(skipped)}, nil
121114
}
122-
bytesRead += uint64(skipped)
115+
return nil, fmt.Errorf("failed to skip AU data: %w", err)
123116
}
117+
bytesRead += uint64(skipped)
124118
}
125-
126-
// If we reach here, we've successfully scanned and potentially skipped all valid AU data.
127119
return &ScanResult{Size: totalAUSize}, nil
128120
}

internal/format/bmp.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func ScanBMP(r *Reader) (*ScanResult, error) {
8181
var bmpHeader BMPHeader
8282
var dibHeader DIBHeader
8383

84-
// 1. Read the BMP File Header (14 bytes)
84+
// Read the BMP File Header (14 bytes)
8585
err := binary.Read(r, binary.LittleEndian, &bmpHeader)
8686
if err != nil {
8787
if errors.Is(err, io.EOF) {
@@ -104,7 +104,7 @@ func ScanBMP(r *Reader) (*ScanResult, error) {
104104
return nil, errors.New("invalid BMP header: data offset is before the BMP file header")
105105
}
106106

107-
// 2. Read the DIB Header (assume BITMAPINFOHEADER for commonality, 40 bytes)
107+
// Read the DIB Header (assume BITMAPINFOHEADER for commonality, 40 bytes)
108108
// We need to peek at the HeaderSize field to determine the exact DIB header type.
109109
// For simplicity, we'll assume BITMAPINFOHEADER (40 bytes) and then read it.
110110
// A more robust solution might read only the HeaderSize first, then seek/read the rest.
@@ -121,7 +121,7 @@ func ScanBMP(r *Reader) (*ScanResult, error) {
121121
}
122122
dibHeader.HeaderSize = binary.LittleEndian.Uint32(buf[:])
123123

124-
// More robust validation of DIB Header fields
124+
// Validate DIB Header fields
125125
if dibHeader.HeaderSize != 40 &&
126126
dibHeader.HeaderSize != 12 && // BITMAPCOREHEADER
127127
dibHeader.HeaderSize != 64 && // BITMAPINFOHEADER V2
@@ -130,8 +130,7 @@ func ScanBMP(r *Reader) (*ScanResult, error) {
130130
return nil, fmt.Errorf("unsupported DIB header size: %d", dibHeader.HeaderSize)
131131
}
132132

133-
// Now read the rest of the DIBHeader based on its size
134-
// We've already read the first 4 bytes (HeaderSize), so adjust the read length.
133+
// Read the rest of the DIBHeader based on its size
135134
n, err = r.Read(buf[4:dibHeader.HeaderSize])
136135
if err != nil {
137136
if errors.Is(err, io.EOF) {

internal/format/gif.go

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ package format
1717
import (
1818
"errors"
1919
"fmt"
20-
"io"
2120
)
2221

2322
var gifFileHeader = FileHeader{
@@ -55,7 +54,7 @@ const (
5554

5655
type gifDecoder struct {
5756
loopCount int
58-
r io.Reader
57+
r *Reader
5958

6059
width, height int
6160
imageFields byte
@@ -77,7 +76,7 @@ func ScanGIF(r *Reader) (*ScanResult, error) {
7776
}
7877

7978
for {
80-
c, err := readByte(d.r.(io.ByteReader))
79+
c, err := d.r.ReadByte()
8180
if err != nil {
8281
return nil, fmt.Errorf("gif: reading frames: %v", err)
8382
}
@@ -108,7 +107,7 @@ func ScanGIF(r *Reader) (*ScanResult, error) {
108107
}
109108

110109
func (d *gifDecoder) readExtension() error {
111-
extension, err := readByte(d.r.(io.ByteReader))
110+
extension, err := d.r.ReadByte()
112111
if err != nil {
113112
return fmt.Errorf("gif: reading extension: %v", err)
114113
}
@@ -121,7 +120,7 @@ func (d *gifDecoder) readExtension() error {
121120
case eComment:
122121
// nothing to do but read the data.
123122
case eApplication:
124-
b, err := readByte(d.r.(io.ByteReader))
123+
b, err := d.r.ReadByte()
125124
if err != nil {
126125
return fmt.Errorf("gif: reading extension: %v", err)
127126
}
@@ -131,7 +130,7 @@ func (d *gifDecoder) readExtension() error {
131130
return fmt.Errorf("gif: unknown extension 0x%.2x", extension)
132131
}
133132
if size > 0 {
134-
if err := readFull(d.r, d.tmp[:size]); err != nil {
133+
if _, err := d.r.Read(d.tmp[:size]); err != nil {
135134
return fmt.Errorf("gif: reading extension: %v", err)
136135
}
137136
}
@@ -162,7 +161,7 @@ func (d *gifDecoder) readExtension() error {
162161
}
163162

164163
func (d *gifDecoder) readGraphicControl() error {
165-
if err := readFull(d.r, d.tmp[:6]); err != nil {
164+
if _, err := d.r.Read(d.tmp[:6]); err != nil {
166165
return fmt.Errorf("gif: can't read graphic control: %s", err)
167166
}
168167
if d.tmp[0] != 4 {
@@ -175,7 +174,7 @@ func (d *gifDecoder) readGraphicControl() error {
175174
}
176175

177176
func (d *gifDecoder) parseImageDescriptorBounds() error {
178-
if err := readFull(d.r, d.tmp[:9]); err != nil {
177+
if _, err := d.r.Read(d.tmp[:9]); err != nil {
179178
return fmt.Errorf("gif: can't read image descriptor: %s", err)
180179
}
181180
left := int(d.tmp[0]) + int(d.tmp[1])<<8
@@ -220,7 +219,7 @@ func (d *gifDecoder) readImageDescriptor() error {
220219
return errors.New("gif: no color table")
221220
}
222221

223-
litWidth, err := readByte(d.r.(io.ByteReader))
222+
litWidth, err := d.r.ReadByte()
224223
if err != nil {
225224
return fmt.Errorf("gif: reading image data: %v", err)
226225
}
@@ -230,15 +229,15 @@ func (d *gifDecoder) readImageDescriptor() error {
230229

231230
// discard LZW encoded blocks
232231
for {
233-
size, err := readByte(d.r.(io.ByteReader)) // read LZW minimum code size
232+
size, err := d.r.ReadByte() // read LZW minimum code size
234233
if err != nil {
235234
return fmt.Errorf("gif: reading image data: %v", err)
236235
}
237236
if size == 0 {
238237
// 0 means end of LZW data.
239238
break
240239
}
241-
if err := discard(int(size), d.r); err != nil {
240+
if _, err := d.r.Discard(int(size)); err != nil {
242241
return err
243242
}
244243
}
@@ -248,18 +247,18 @@ func (d *gifDecoder) readImageDescriptor() error {
248247
}
249248

250249
func (d *gifDecoder) readBlock() (int, error) {
251-
n, err := readByte(d.r.(io.ByteReader))
250+
n, err := d.r.ReadByte()
252251
if n == 0 || err != nil {
253252
return 0, err
254253
}
255-
if err := readFull(d.r, d.tmp[:n]); err != nil {
254+
if _, err := d.r.Read(d.tmp[:n]); err != nil {
256255
return 0, err
257256
}
258257
return int(n), nil
259258
}
260259

261260
func (d *gifDecoder) readHeaderAndScreenDescriptor() error {
262-
err := readFull(d.r, d.tmp[:13])
261+
_, err := d.r.Read(d.tmp[:13])
263262
if err != nil {
264263
return fmt.Errorf("gif: reading header: %v", err)
265264
}
@@ -286,28 +285,9 @@ func (d *gifDecoder) readHeaderAndScreenDescriptor() error {
286285

287286
func (d *gifDecoder) skipColorTable(fields byte) error {
288287
n := 1 << (1 + uint(fields&fColorTableBitsMask))
289-
err := readFull(d.r, d.tmp[:3*n])
288+
_, err := d.r.Read(d.tmp[:3*n])
290289
if err != nil {
291290
return fmt.Errorf("gif: reading color table: %s", err)
292291
}
293292
return nil
294293
}
295-
296-
func readByte(r io.ByteReader) (byte, error) {
297-
b, err := r.ReadByte()
298-
if err != nil {
299-
if err == io.EOF {
300-
return 0, io.ErrUnexpectedEOF
301-
}
302-
return 0, fmt.Errorf("gif: reading byte: %v", err)
303-
}
304-
return b, nil
305-
}
306-
307-
func readFull(r io.Reader, b []byte) error {
308-
_, err := io.ReadFull(r, b)
309-
if err == io.EOF {
310-
err = io.ErrUnexpectedEOF
311-
}
312-
return err
313-
}

internal/format/jpeg.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ package format
1717

1818
import (
1919
"fmt"
20-
"io"
2120
)
2221

2322
var jpegFileHeader = FileHeader{
@@ -50,11 +49,6 @@ const (
5049
app15Marker = 0xef
5150
)
5251

53-
func discard(n int, r io.Reader) error {
54-
_, err := io.CopyN(io.Discard, r, int64(n))
55-
return err
56-
}
57-
5852
// ScanJPEG attempts to validate a JPEG file from the beginning of the 'data'
5953
// buffer and determine its total size. This function is adapted from the
6054
// standard library's 'image/jpeg' package's internal scanning logic,
@@ -158,10 +152,10 @@ func ScanJPEG(r *Reader) (*ScanResult, error) {
158152
case sof0Marker, sof1Marker, sof2Marker,
159153
dhtMarker, dqtMarker, sosMarker,
160154
driMarker, app0Marker, app14Marker:
161-
err = discard(n, r)
155+
_, err = r.Discard(n)
162156
default:
163157
if app0Marker <= marker && marker <= app15Marker || marker == comMarker {
164-
err = discard(n, r)
158+
_, err = r.Discard(n)
165159
} else if marker < 0xc0 { // See Table B.1 "Marker code assignments".
166160
err = fmt.Errorf("unknown marker")
167161
} else {

internal/format/pcx.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ func ScanPCX(r *Reader) (*ScanResult, error) {
179179
if header.Encoding == 0 { // Uncompressed
180180
expectedImageDataSize := uint32(header.BytesPerLine) * uint32(header.NumPlanes) * height
181181
// We've only read the header. Now, skip the image data.
182-
skipped, err := io.CopyN(io.Discard, r, int64(expectedImageDataSize))
182+
skipped, err := r.Discard(int(expectedImageDataSize))
183183
if err != nil {
184184
if errors.Is(err, io.EOF) {
185185
return nil, errors.New("unexpected EOF while skipping uncompressed image data")

internal/format/pdf.go

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,9 @@ var (
4141
// ScanPDF reads a byte stream from an io.Reader, identifies a potential PDF file,
4242
// and returns its carved size.
4343
//
44-
// This function assumes that the entire potential PDF segment can be read into
45-
// memory. For extremely large files (gigabytes), a more advanced streaming
46-
// parser with limited look-behind would be necessary.
47-
//
4844
// It searches for the first occurrence of the standard PDF header (%PDF-X.Y)
4945
// and the last occurrence of the end-of-file marker (%%EOF). The carved size
5046
// is determined by the position of the last %%EOF marker plus its length.
51-
//
52-
// Parameters:
53-
//
54-
// r io.Reader: The input stream from which to read the PDF data.
55-
//
56-
// Returns:
57-
//
58-
// uint64: The size of the carved PDF file in bytes.
59-
// error: An error if the PDF header or EOF marker is not found, or if the
60-
// EOF marker appears before the header.
6147
func ScanPDF(r *Reader) (*ScanResult, error) {
6248
var headerBuf [5]byte
6349
_, err := r.Read(headerBuf[:])

internal/format/wav.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,9 @@ const (
4747
// WAV data, or 0 and an error if no valid WAV file is found at the beginning.
4848
// The reader's position will be at the end of the WAV data upon successful return.
4949
func ScanWAV(r *Reader) (*ScanResult, error) {
50-
// We'll use a small buffer for reading headers and sizes.
51-
5250
var headerBuf [8]byte // For ChunkID and ChunkSize
5351

54-
// 1. Check RIFF chunk
52+
// Check RIFF chunk
5553
// Read Offset 0-3: ChunkID "RIFF" and Offset 4-7: ChunkSize
5654
n, err := io.ReadFull(r, headerBuf[:])
5755
if err != nil {
@@ -81,7 +79,7 @@ func ScanWAV(r *Reader) (*ScanResult, error) {
8179

8280
bytesRead := uint64(12) // RIFF (8 bytes) + WAVE (4 bytes)
8381

84-
// 2. Find and parse 'fmt ' sub-chunk
82+
// Find and parse 'fmt ' sub-chunk
8583
fmtChunkFound := false
8684
for bytesRead < uint64(riffChunkSize)+8 { // Ensure we don't read beyond the declared RIFF chunk
8785
n, err = io.ReadFull(r, headerBuf[:])
@@ -130,7 +128,7 @@ func ScanWAV(r *Reader) (*ScanResult, error) {
130128
return nil, fmt.Errorf("missing 'fmt ' sub-chunk")
131129
}
132130

133-
// 3. Find and parse 'data' sub-chunk
131+
// Find and parse 'data' sub-chunk
134132
dataChunkFound := false
135133
dataChunkSize := uint32(0)
136134

@@ -159,9 +157,9 @@ func ScanWAV(r *Reader) (*ScanResult, error) {
159157
}
160158

161159
// Skip over the current chunk's data
162-
skipped, err := io.CopyN(io.Discard, r, int64(chunkSize))
160+
skipped, err := r.Discard(int(chunkSize))
163161
if err != nil {
164-
if err == io.EOF && skipped < int64(chunkSize) {
162+
if err == io.EOF && skipped < int(chunkSize) {
165163
// Truncated chunk data, can't determine full WAV size
166164
bytesRead += uint64(skipped)
167165
return &ScanResult{Size: bytesRead}, nil // Return what was read before truncation

0 commit comments

Comments
 (0)