Skip to content

Commit 97a9fe4

Browse files
authored
Merge pull request #1918 from braingram/junk_data
allow non-null post tree pre block padding
2 parents a7af342 + 74010e7 commit 97a9fe4

3 files changed

Lines changed: 48 additions & 51 deletions

File tree

asdf/_block/reader.py

Lines changed: 35 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import weakref
33

44
from asdf import constants
5-
from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning
5+
from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning, DelimiterNotFoundError
66

77
from . import io as bio
88
from .exceptions import BlockIndexError
@@ -122,55 +122,42 @@ def _read_blocks_serially(fd, memmap=False, lazy_load=False, validate_checksums=
122122
For parameter and return value descriptions see `read_blocks`.
123123
"""
124124
blocks = []
125-
buff = b""
126125
magic_len = len(constants.BLOCK_MAGIC)
127-
while True:
128-
# the expectation is that this will begin PRIOR to the block magic
129-
# read 4 bytes
130-
if not after_magic:
131-
buff += fd.read(magic_len - len(buff))
132-
if len(buff) == 0:
133-
# we are done, there are no more blocks and no index
134-
break
135-
elif len(buff) < magic_len:
136-
# we have less than magic_len bytes, this is likely an error
137-
# in the input file/bytes
138-
if all([b == 0 for b in buff]):
139-
# if these are all 0, assume this was a 'truncated' file
140-
# so don't issue a warning
141-
break
142-
# if these are non-0 bytes issue a warning that the file
143-
# is likely corrupt
144-
msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt"
145-
warnings.warn(msg, AsdfWarning)
146-
break
147-
148-
if buff == constants.INDEX_HEADER[:magic_len]:
149-
# we hit the block index, which is not useful here
150-
break
151-
152-
if after_magic or buff == constants.BLOCK_MAGIC:
153-
# this is another block
154-
offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load)
155-
blocks.append(
156-
ReadBlock(
157-
offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data
158-
)
126+
127+
if not after_magic:
128+
# seek until the first magic is found
129+
try:
130+
fd.seek_until(b"(" + constants.BLOCK_MAGIC + b")", magic_len)
131+
except DelimiterNotFoundError:
132+
return blocks
133+
after_magic = True
134+
135+
buff = constants.BLOCK_MAGIC
136+
while buff == constants.BLOCK_MAGIC:
137+
# read the block
138+
offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load)
139+
blocks.append(
140+
ReadBlock(
141+
offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data
159142
)
160-
if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED:
161-
# a file can only have 1 streamed block and it must be at the end so we
162-
# can stop looking for more blocks
163-
break
164-
buff = b""
165-
after_magic = False
166-
else:
167-
if len(blocks) or buff[0] != 0:
168-
# if this is not the first block or we haven't found any
169-
# blocks and the first byte is non-zero
170-
msg = f"Invalid bytes while reading blocks {buff}"
171-
raise OSError(msg)
172-
# this is the first block, allow empty bytes before block
173-
buff = buff.strip(b"\0")
143+
)
144+
if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED:
145+
# a file can only have 1 streamed block and it must be at the end so we
146+
# can stop looking for more blocks
147+
return blocks
148+
149+
# check for the next block
150+
buff = fd.read(magic_len)
151+
152+
# check remaining bytes
153+
if buff == constants.INDEX_HEADER[: len(buff)]:
154+
# remaining bytes are the start of the block index
155+
return blocks
156+
if buff == b"\0" * len(buff):
157+
# remaining bytes are null
158+
return blocks
159+
msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt"
160+
warnings.warn(msg, AsdfWarning)
174161
return blocks
175162

176163

asdf/_tests/_block/test_reader.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,18 @@ def test_read(tmp_path, lazy_load, memmap, with_index, validate_checksums, paddi
7878
assert r[0].cached_data is r[0].cached_data
7979

8080

81-
def test_read_invalid_padding():
82-
with gen_blocks(padding=1, padding_byte=b"\1") as (fd, check):
83-
with pytest.raises(OSError, match="Invalid bytes.*"):
81+
@pytest.mark.parametrize("padding", (1, 4, 7))
82+
@pytest.mark.parametrize("padding_byte", (b"\1", b"\0", b" ", b"\xd3", b"B", b"L", b"K", b"\xd3BL"))
83+
def test_read_valid_padding(padding, padding_byte):
84+
"""Test that reader allows padding bytes before the first block"""
85+
with gen_blocks(padding=padding, padding_byte=padding_byte) as (fd, check):
86+
check(read_blocks(fd))
87+
88+
89+
@pytest.mark.parametrize("padding_byte", (b"\xd3BLK", b" \xd3BLK"))
90+
def test_read_invalid_padding(padding_byte):
91+
with gen_blocks(padding=1, padding_byte=padding_byte) as (fd, check):
92+
with pytest.raises(ValueError, match="buffer is smaller than requested size"):
8493
check(read_blocks(fd))
8594

8695

changes/1918.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow non-null bytes before the first byte.

0 commit comments

Comments
 (0)