|
2 | 2 | import weakref |
3 | 3 |
|
4 | 4 | from asdf import constants |
5 | | -from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning |
| 5 | +from asdf.exceptions import AsdfBlockIndexWarning, AsdfWarning, DelimiterNotFoundError |
6 | 6 |
|
7 | 7 | from . import io as bio |
8 | 8 | from .exceptions import BlockIndexError |
@@ -122,55 +122,42 @@ def _read_blocks_serially(fd, memmap=False, lazy_load=False, validate_checksums= |
122 | 122 | For parameter and return value descriptions see `read_blocks`. |
123 | 123 | """ |
124 | 124 | blocks = [] |
125 | | - buff = b"" |
126 | 125 | magic_len = len(constants.BLOCK_MAGIC) |
127 | | - while True: |
128 | | - # the expectation is that this will begin PRIOR to the block magic |
129 | | - # read 4 bytes |
130 | | - if not after_magic: |
131 | | - buff += fd.read(magic_len - len(buff)) |
132 | | - if len(buff) == 0: |
133 | | - # we are done, there are no more blocks and no index |
134 | | - break |
135 | | - elif len(buff) < magic_len: |
136 | | - # we have less than magic_len bytes, this is likely an error |
137 | | - # in the input file/bytes |
138 | | - if all([b == 0 for b in buff]): |
139 | | - # if these are all 0, assume this was a 'truncated' file |
140 | | - # so don't issue a warning |
141 | | - break |
142 | | - # if these are non-0 bytes issue a warning that the file |
143 | | - # is likely corrupt |
144 | | - msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt" |
145 | | - warnings.warn(msg, AsdfWarning) |
146 | | - break |
147 | | - |
148 | | - if buff == constants.INDEX_HEADER[:magic_len]: |
149 | | - # we hit the block index, which is not useful here |
150 | | - break |
151 | | - |
152 | | - if after_magic or buff == constants.BLOCK_MAGIC: |
153 | | - # this is another block |
154 | | - offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load) |
155 | | - blocks.append( |
156 | | - ReadBlock( |
157 | | - offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data |
158 | | - ) |
| 126 | + |
| 127 | + if not after_magic: |
| 128 | + # seek until the first magic is found |
| 129 | + try: |
| 130 | + fd.seek_until(b"(" + constants.BLOCK_MAGIC + b")", magic_len) |
| 131 | + except DelimiterNotFoundError: |
| 132 | + return blocks |
| 133 | + after_magic = True |
| 134 | + |
| 135 | + buff = constants.BLOCK_MAGIC |
| 136 | + while buff == constants.BLOCK_MAGIC: |
| 137 | + # read the block |
| 138 | + offset, header, data_offset, data = bio.read_block(fd, memmap=memmap, lazy_load=lazy_load) |
| 139 | + blocks.append( |
| 140 | + ReadBlock( |
| 141 | + offset, fd, memmap, lazy_load, validate_checksums, header=header, data_offset=data_offset, data=data |
159 | 142 | ) |
160 | | - if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED: |
161 | | - # a file can only have 1 streamed block and it must be at the end so we |
162 | | - # can stop looking for more blocks |
163 | | - break |
164 | | - buff = b"" |
165 | | - after_magic = False |
166 | | - else: |
167 | | - if len(blocks) or buff[0] != 0: |
168 | | - # if this is not the first block or we haven't found any |
169 | | - # blocks and the first byte is non-zero |
170 | | - msg = f"Invalid bytes while reading blocks {buff}" |
171 | | - raise OSError(msg) |
172 | | - # this is the first block, allow empty bytes before block |
173 | | - buff = buff.strip(b"\0") |
| 143 | + ) |
| 144 | + if blocks[-1].header["flags"] & constants.BLOCK_FLAG_STREAMED: |
| 145 | + # a file can only have 1 streamed block and it must be at the end so we |
| 146 | + # can stop looking for more blocks |
| 147 | + return blocks |
| 148 | + |
| 149 | + # check for the next block |
| 150 | + buff = fd.read(magic_len) |
| 151 | + |
| 152 | + # check remaining bytes |
| 153 | + if buff == constants.INDEX_HEADER[: len(buff)]: |
| 154 | + # remaining bytes are the start of the block index |
| 155 | + return blocks |
| 156 | + if buff == b"\0" * len(buff): |
| 157 | + # remaining bytes are null |
| 158 | + return blocks |
| 159 | + msg = f"Read invalid bytes {buff!r} after blocks, your file might be corrupt" |
| 160 | + warnings.warn(msg, AsdfWarning) |
174 | 161 | return blocks |
175 | 162 |
|
176 | 163 |
|
|
0 commit comments