Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 35 additions & 48 deletions asdf/_block/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import weakref

from asdf import constants
from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning
from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning, DelimiterNotFoundError

from . import io as bio
from .exceptions import BlockIndexError
Expand Down Expand Up @@ -122,55 +122,42 @@ def _read_blocks_serially(fd, memmap=False, lazy_load=False, validate_checksums=
For parameter and return value descriptions see `read_blocks`.
"""
blocks = []
buff = b""
magic_len = len(constants.BLOCK_MAGIC)
while True:
# the expectation is that this will begin PRIOR to the block magic
# read 4 bytes
if not after_magic:
buff += fd.read(magic_len - len(buff))
if len(buff) == 0:
# we are done, there are no more blocks and no index
break
elif len(buff) < magic_len:
# we have less than magic_len bytes, this is likely an error
# in the input file/bytes
if all([b == 0 for b in buff]):
# if these are all 0, assume this was a 'truncated' file
# so don't issue a warning
break
# if these are non-0 bytes issue a warning that the file
# is likely corrupt
msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt"
warnings.warn(msg, AsdfWarning)
break

if buff == constants.INDEX_HEADER[:magic_len]:
# we hit the block index, which is not useful here
break

if after_magic or buff == constants.BLOCK_MAGIC:
# this is another block
offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load)
blocks.append(
ReadBlock(
offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data
)

if not after_magic:
# seek until the first magic is found
try:
fd.seek_until(b"(" + constants.BLOCK_MAGIC + b")", magic_len)
except DelimiterNotFoundError:
return blocks
after_magic = True

buff = constants.BLOCK_MAGIC
while buff == constants.BLOCK_MAGIC:
# read the block
offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load)
blocks.append(
ReadBlock(
offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data
)
if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED:
# a file can only have 1 streamed block and it must be at the end so we
# can stop looking for more blocks
break
buff = b""
after_magic = False
else:
if len(blocks) or buff[0] != 0:
# if this is not the first block or we haven't found any
# blocks and the first byte is non-zero
msg = f"Invalid bytes while reading blocks {buff}"
raise OSError(msg)
# this is the first block, allow empty bytes before block
buff = buff.strip(b"\0")
)
if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED:
# a file can only have 1 streamed block and it must be at the end so we
# can stop looking for more blocks
return blocks

# check for the next block
buff = fd.read(magic_len)

# check remaining bytes
if buff == constants.INDEX_HEADER[: len(buff)]:
# remaining bytes are the start of the block index
return blocks
if buff == b"\0" * len(buff):
# remaining bytes are null
return blocks
msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt"
warnings.warn(msg, AsdfWarning)
return blocks


Expand Down
15 changes: 12 additions & 3 deletions asdf/_tests/_block/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,18 @@ def test_read(tmp_path, lazy_load, memmap, with_index, validate_checksums, paddi
assert r[0].cached_data is r[0].cached_data


def test_read_invalid_padding():
with gen_blocks(padding=1, padding_byte=b"\1") as (fd, check):
with pytest.raises(OSError, match="Invalid bytes.*"):
@pytest.mark.parametrize("padding", (1, 4, 7))
@pytest.mark.parametrize("padding_byte", (b"\1", b"\0", b" ", b"\xd3", b"B", b"L", b"K", b"\xd3BL"))
def test_read_valid_padding(padding, padding_byte):
"""Test that reader allows padding bytes before the first block"""
with gen_blocks(padding=padding, padding_byte=padding_byte) as (fd, check):
check(read_blocks(fd))


@pytest.mark.parametrize("padding_byte", (b"\xd3BLK", b" \xd3BLK"))
def test_read_invalid_padding(padding_byte):
with gen_blocks(padding=1, padding_byte=padding_byte) as (fd, check):
with pytest.raises(ValueError, match="buffer is smaller than requested size"):
check(read_blocks(fd))


Expand Down
1 change: 1 addition & 0 deletions changes/1918.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow non-null bytes before the first byte.
Loading