Skip to content

Commit 416ac12

Browse files
authored
Merge pull request #2018 from sydduckworth/block-checksum
Support not writing checksums
2 parents 86a120c + b49153d commit 416ac12

11 files changed

Lines changed: 113 additions & 34 deletions

File tree

asdf/_asdf.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ def _tree_finalizer(tagged_tree):
768768
padding = util.calculate_padding(fd.tell(), pad_blocks, fd.block_size)
769769
fd.fast_forward(padding)
770770

771-
def _serial_write(self, fd, pad_blocks, include_block_index):
771+
def _serial_write(self, fd, pad_blocks, include_block_index, write_checksums):
772772
with self._blocks.write_context(fd):
773773
# prep a tree for a writing
774774
tree = copy.copy(self._tree)
@@ -777,7 +777,7 @@ def _serial_write(self, fd, pad_blocks, include_block_index):
777777
tree["history"] = copy.deepcopy(self._tree["history"])
778778

779779
self._write_tree(tree, fd, pad_blocks)
780-
self._blocks.write(pad_blocks, include_block_index)
780+
self._blocks.write(pad_blocks, include_block_index, write_checksums)
781781

782782
def update(
783783
self,
@@ -787,6 +787,7 @@ def update(
787787
pad_blocks=False,
788788
include_block_index=True,
789789
version=None,
790+
write_checksums=True,
790791
):
791792
"""
792793
Update the file on disk in place.
@@ -839,6 +840,9 @@ def update(
839840
version : str, optional
840841
Update the ASDF core schemas version of this AsdfFile before
841842
writing.
843+
844+
write_checksums: bool, optional
845+
Compute and write block checksums to the file.
842846
"""
843847

844848
with config_context() as config:
@@ -876,7 +880,12 @@ def update(
876880

877881
def rewrite():
878882
self._fd.seek(0)
879-
self._serial_write(self._fd, pad_blocks, include_block_index)
883+
self._serial_write(
884+
self._fd,
885+
pad_blocks,
886+
include_block_index,
887+
write_checksums,
888+
)
880889
self._fd.truncate()
881890
if self._fd.can_memmap():
882891
self._fd.close_memmap()
@@ -901,7 +910,7 @@ def rewrite():
901910
new_tree_size = tree_fd.tell()
902911

903912
# update blocks
904-
self._blocks.update(new_tree_size, pad_blocks, include_block_index)
913+
self._blocks.update(new_tree_size, pad_blocks, include_block_index, write_checksums)
905914
end_of_file = self._fd.tell()
906915

907916
# now write the tree
@@ -925,6 +934,7 @@ def write_to(
925934
pad_blocks=False,
926935
include_block_index=True,
927936
version=None,
937+
write_checksums=True,
928938
):
929939
"""
930940
Write the ASDF file to the given file-like object.
@@ -987,6 +997,9 @@ def write_to(
987997
version : str, optional
988998
Update the ASDF core schemas version of this AsdfFile before
989999
writing.
1000+
1001+
write_checksums: bool, optional
1002+
Compute and write block checksums to the file.
9901003
"""
9911004
with config_context() as config:
9921005
if all_array_storage is not NotSet:
@@ -1002,7 +1015,7 @@ def write_to(
10021015

10031016
try:
10041017
with generic_io.get_file(fd, mode="w") as fd:
1005-
self._serial_write(fd, pad_blocks, include_block_index)
1018+
self._serial_write(fd, pad_blocks, include_block_index, write_checksums)
10061019
finally:
10071020
if version is not None:
10081021
self.version = previous_version

asdf/_block/io.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ def callback():
275275
return offset, header, data_offset, data
276276

277277

278-
def generate_write_header(data, stream=False, compression_kwargs=None, padding=False, fs_block_size=1, **header_kwargs):
278+
def generate_write_header(
279+
data, stream=False, compression_kwargs=None, padding=False, fs_block_size=1, write_checksum=True, **header_kwargs
280+
):
279281
"""
280282
Generate a dict representation of a ASDF block header that can be
281283
used for writing a block.
@@ -307,6 +309,10 @@ def generate_write_header(data, stream=False, compression_kwargs=None, padding=F
307309
The filesystem block size. See the `asdf.util.calculate_padding`
308310
``block_size`` argument for more details.
309311
312+
write_checksum: bool, optional
313+
Compute and write the checksum of the block data.
314+
If disabled then the checksum field is set to 0.
315+
310316
**header_kwargs : dict, optional
311317
Block header settings that will be read, updated, and used
312318
to generate the binary block header representation by packing
@@ -353,7 +359,7 @@ def generate_write_header(data, stream=False, compression_kwargs=None, padding=F
353359
padding = util.calculate_padding(used_size, padding, fs_block_size)
354360
header_kwargs["allocated_size"] = header_kwargs.get("allocated_size", used_size + padding)
355361

356-
if stream:
362+
if stream or not write_checksum:
357363
header_kwargs["checksum"] = b"\0" * 16
358364
elif buff is not None:
359365
header_kwargs["checksum"] = calculate_block_checksum(buff.getbuffer())
@@ -370,7 +376,9 @@ def generate_write_header(data, stream=False, compression_kwargs=None, padding=F
370376
return header_kwargs, buff, padding_bytes
371377

372378

373-
def write_block(fd, data, offset=None, stream=False, compression_kwargs=None, padding=False, **header_kwargs):
379+
def write_block(
380+
fd, data, offset=None, stream=False, compression_kwargs=None, padding=False, write_checksum=True, **header_kwargs
381+
):
374382
"""
375383
Write an ASDF block.
376384
@@ -392,6 +400,10 @@ def write_block(fd, data, offset=None, stream=False, compression_kwargs=None, pa
392400
padding : bool, optional, default False
393401
Optionally pad the block data. See `generate_write_header`.
394402
403+
write_checksum: bool, optional
404+
Compute and write the checksum of the block data.
405+
If disabled then the checksum field is set to 0.
406+
395407
**header_kwargs : dict
396408
Block header settings. See `generate_write_header`.
397409
@@ -403,7 +415,7 @@ def write_block(fd, data, offset=None, stream=False, compression_kwargs=None, pa
403415
for writing.
404416
"""
405417
header_dict, buff, padding_bytes = generate_write_header(
406-
data, stream, compression_kwargs, padding, fd.block_size, **header_kwargs
418+
data, stream, compression_kwargs, padding, fd.block_size, write_checksum, **header_kwargs
407419
)
408420
header_bytes = BLOCK_HEADER.pack(**header_dict)
409421

asdf/_block/manager.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ def _clear_write(self):
364364
self._streamed_obj_keys = set()
365365
self._write_fd = None
366366

367-
def _write_external_blocks(self):
367+
def _write_external_blocks(self, write_checksums):
368368
from asdf import AsdfFile
369369

370370
if self._write_fd is None or self._write_fd.uri is None:
@@ -376,7 +376,7 @@ def _write_external_blocks(self):
376376
af = AsdfFile()
377377
with generic_io.get_file(uri, mode="w") as f:
378378
af.write_to(f, include_block_index=False)
379-
writer.write_blocks(f, [blk])
379+
writer.write_blocks(f, [blk], write_checksums=write_checksums)
380380

381381
def make_write_block(self, data, options, obj):
382382
"""
@@ -541,7 +541,7 @@ def write_context(self, fd, copy_options=True):
541541
yield
542542
self._clear_write()
543543

544-
def write(self, pad_blocks, include_block_index):
544+
def write(self, pad_blocks, include_block_index, write_checksums):
545545
"""
546546
Write blocks that were set up during the current
547547
`write_context`.
@@ -559,6 +559,9 @@ def write(self, pad_blocks, include_block_index):
559559
If a streamed_block is provided (or the file is not
560560
seekable) no block index will be written.
561561
562+
write_checksums : bool
563+
Compute and write checksums for each block.
564+
562565
Raises
563566
------
564567
OSError
@@ -574,11 +577,12 @@ def write(self, pad_blocks, include_block_index):
574577
pad_blocks,
575578
streamed_block=self._streamed_write_block,
576579
write_index=include_block_index,
580+
write_checksums=write_checksums,
577581
)
578582
if len(self._external_write_blocks):
579-
self._write_external_blocks()
583+
self._write_external_blocks(write_checksums=write_checksums)
580584

581-
def update(self, new_tree_size, pad_blocks, include_block_index):
585+
def update(self, new_tree_size, pad_blocks, include_block_index, write_checksums):
582586
"""
583587
Perform an update-in-place of ASDF blocks set up during
584588
a `write_context`.
@@ -596,11 +600,13 @@ def update(self, new_tree_size, pad_blocks, include_block_index):
596600
a number of padding bytes based off a ratio of the data
597601
size.
598602
599-
include_block_index : bool
603+
include_block_index : bool or None
600604
If True, include a block index at the end of the file.
601605
If a streamed_block is provided (or the file is not
602606
seekable) no block index will be written.
603607
608+
write_checksums: bool, optional
609+
Compute and write block checksums to the file.
604610
605611
Raises
606612
------
@@ -626,7 +632,7 @@ def update(self, new_tree_size, pad_blocks, include_block_index):
626632
)
627633

628634
if len(self._external_write_blocks):
629-
self._write_external_blocks()
635+
self._write_external_blocks(write_checksums=write_checksums)
630636

631637
# do we have any blocks to write?
632638
if len(self._write_blocks) or self._streamed_write_block:
@@ -637,6 +643,7 @@ def update(self, new_tree_size, pad_blocks, include_block_index):
637643
pad_blocks,
638644
streamed_block=self._streamed_write_block,
639645
write_index=False, # don't write an index as we will modify the offsets
646+
write_checksums=write_checksums,
640647
)
641648
new_block_end = self._write_fd.tell()
642649

asdf/_block/writer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def data_bytes(self):
2929
return np.ndarray(0, np.uint8)
3030

3131

32-
def write_blocks(fd, blocks, padding=False, streamed_block=None, write_index=True):
32+
def write_blocks(fd, blocks, padding=False, streamed_block=None, write_index=True, write_checksums=True):
3333
"""
3434
Write a list of WriteBlocks to a file
3535
@@ -60,6 +60,9 @@ def write_blocks(fd, blocks, padding=False, streamed_block=None, write_index=Tru
6060
If a streamed_block is provided (or the file is not
6161
seekable) no block index will be written.
6262
63+
write_checksums: bool, optional
64+
Compute and write block checksums to the file.
65+
6366
Returns
6467
-------
6568
offsets : list of int
@@ -101,12 +104,13 @@ def tell():
101104
compression_kwargs=blk.compression_kwargs,
102105
padding=padding,
103106
compression=blk.compression,
107+
write_checksum=write_checksums,
104108
)
105109
)
106110
if streamed_block is not None:
107111
offsets.append(tell())
108112
fd.write(constants.BLOCK_MAGIC)
109-
headers.append(bio.write_block(fd, streamed_block.data_bytes, stream=True))
113+
headers.append(bio.write_block(fd, streamed_block.data_bytes, stream=True, write_checksum=write_checksums))
110114

111115
# os.pipe on windows returns a file-like object
112116
# that reports as seekable but tell always returns 0

asdf/_dump.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def dump(
1717
compression_kwargs=NotSet,
1818
pad_blocks=False,
1919
custom_schema=None,
20+
write_checksums=True,
2021
):
2122
"""
2223
Write a tree to an ASDF file.
@@ -54,6 +55,9 @@ def dump(
5455
validation pass. This can be used to ensure that particular ASDF
5556
files follow custom conventions beyond those enforced by the
5657
specification.
58+
59+
write_checksums: bool, optional
60+
Compute and write block checksums to the file.
5761
"""
5862
AsdfFile(tree, custom_schema=custom_schema, extensions=extensions).write_to(
5963
fp,
@@ -62,6 +66,7 @@ def dump(
6266
all_array_compression=all_array_compression,
6367
compression_kwargs=compression_kwargs,
6468
pad_blocks=pad_blocks,
69+
write_checksums=write_checksums,
6570
)
6671

6772

@@ -75,6 +80,7 @@ def dumps(
7580
compression_kwargs=NotSet,
7681
pad_blocks=False,
7782
custom_schema=None,
83+
write_checksums=True,
7884
):
7985
"""
8086
Write tree to a string.
@@ -110,6 +116,9 @@ def dumps(
110116
files follow custom conventions beyond those enforced by the
111117
specification.
112118
119+
write_checksums: bool, optional
120+
Compute and write block checksums to the file.
121+
113122
Returns
114123
-------
115124
str
@@ -126,6 +135,7 @@ def dumps(
126135
compression_kwargs=compression_kwargs,
127136
pad_blocks=pad_blocks,
128137
custom_schema=custom_schema,
138+
write_checksums=write_checksums,
129139
)
130140
return buff.getvalue()
131141

asdf/_tests/_block/test_io.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def test_checksum(tmp_path):
1818
# check that when written, a block generates the correct checksum
1919
path = tmp_path / "test"
2020
with generic_io.get_file(path, mode="w") as fd:
21-
bio.write_block(fd, my_array.view(dtype="uint8"))
21+
# check that no warnings are raised when writing checksums without compression
22+
bio.write_block(fd, my_array.view(dtype="uint8"), write_checksum=True)
2223
with generic_io.get_file(path, mode="r") as fd:
2324
_, header, _, _ = bio.read_block(fd, True)
2425
assert header["checksum"] == target_checksum

asdf/_tests/_block/test_manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def test_write_no_uri(tmp_path):
5858
with asdf.open(fn) as af:
5959
m = af._blocks
6060
with pytest.raises(ValueError, match=r"Can't write external blocks.*"):
61-
m._write_external_blocks()
61+
m._write_external_blocks(write_checksums=False)
6262

6363

6464
def test_write_outside_context(tmp_path):
@@ -67,7 +67,7 @@ def test_write_outside_context(tmp_path):
6767
with asdf.open(fn) as af:
6868
m = af._blocks
6969
with pytest.raises(OSError, match=r"write called outside of valid write_context"):
70-
m.write(False, False)
70+
m.write(pad_blocks=False, include_block_index=False, write_checksums=False)
7171

7272

7373
def test_update_outside_context(tmp_path):
@@ -76,7 +76,7 @@ def test_update_outside_context(tmp_path):
7676
with asdf.open(fn) as af:
7777
m = af._blocks
7878
with pytest.raises(OSError, match=r"update called outside of valid write_context"):
79-
m.update(0, False, False)
79+
m.update(0, pad_blocks=False, include_block_index=False, write_checksums=False)
8080

8181

8282
def test_input_compression(tmp_path):

0 commit comments

Comments
 (0)