From 96e733a8e0b5f5c5d05e7b5ed817fc5232bd709d Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 11 Aug 2025 15:32:15 -0400 Subject: [PATCH 01/11] drop _open_impl and _open_asdf --- asdf/_asdf.py | 267 ++++++++++++++++++++------------------------------ 1 file changed, 106 insertions(+), 161 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index bd3646af9..80a2e5b26 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -792,154 +792,6 @@ def _find_asdf_version_in_comments(cls, comments): return None - @classmethod - def _open_asdf( - cls, - self, - fd, - validate_checksums=False, - extensions=None, - lazy_tree=NotSet, - _get_yaml_content=False, - _force_raw_types=False, - strict_extension_check=False, - ignore_missing_extensions=False, - ): - """Attempt to populate AsdfFile data from file-like object""" - - if strict_extension_check and ignore_missing_extensions: - msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" - raise ValueError(msg) - - with config_context() as cfg: - # validate_checksums (unlike memmap and lazy_load) is provided - # here instead of in __init__ - self._blocks._validate_checksums = validate_checksums - self._mode = fd.mode - self._fd = fd - if self._fd._uri: - self._blocks._uri = self._fd._uri - # The filename is currently only used for tracing warning information - self._fname = self._fd._uri if self._fd._uri else "" - try: - header_line = fd.read_until(b"\r?\n", 2, "newline", include=True) - except DelimiterNotFoundError as e: - msg = "Does not appear to be a ASDF file." - raise ValueError(msg) from e - self._file_format_version = cls._parse_header_line(header_line) - - self._comments = cls._read_comment_section(fd) - - version = cls._find_asdf_version_in_comments(self._comments) - if version is not None: - self.version = version - else: - # If no ASDF_STANDARD comment is found... - self.version = versioning.AsdfVersion("1.0.0") - - # Now that version is set for good, we can add any additional - # extensions, which may have narrow ASDF Standard version - # requirements. - if extensions: - self.extensions = extensions - - yaml_token = fd.read(4) - tree = None - if yaml_token == b"%YAM": - reader = fd.reader_until( - constants.YAML_END_MARKER_REGEX, - 7, - "End of YAML marker", - include=True, - initial_content=yaml_token, - ) - - # For testing: just return the raw YAML content - if _get_yaml_content: - yaml_content = reader.read() - fd.close() - return yaml_content - - # We parse the YAML content into basic data structures - # now, but we don't do anything special with it until - # after the blocks have been read - tree = yamlutil.load_tree(reader) - self._blocks.read(fd) - elif yaml_token == constants.BLOCK_MAGIC: - # this file has only blocks and we're already read the first block magic - self._blocks.read(fd, after_magic=True) - elif yaml_token != b"": - msg = "ASDF file appears to contain garbage after header." - raise OSError(msg) - - if tree is None: - # At this point the tree should be tagged, but we want it to be - # tagged with the core/asdf version appropriate to this file's - # ASDF Standard version. We're using custom_tree_to_tagged_tree - # to select the correct tag for us. - tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), self) - - if self.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: - schema.fill_defaults(tree, self, reading=True) - - if get_config().validate_on_read: - try: - self._validate(tree, reading=True) - except ValidationError: - self.close() - raise - - if lazy_tree is NotSet: - lazy_tree = cfg.lazy_tree - if lazy_tree and not _force_raw_types: - obj = AsdfObject() - obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(self)) - tree = obj - else: - tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types) - - if not (ignore_missing_extensions or _force_raw_types): - self._check_extensions(tree, strict=strict_extension_check) - - self._tree = tree - - return self - - @classmethod - def _open_impl( - cls, - self, - fd, - uri=None, - mode="r", - validate_checksums=False, - extensions=None, - lazy_tree=NotSet, - _get_yaml_content=False, - _force_raw_types=False, - strict_extension_check=False, - ignore_missing_extensions=False, - ): - """Attempt to open file-like object as an AsdfFile""" - close_on_fail = isinstance(fd, (str, pathlib.Path)) - generic_file = generic_io.get_file(fd, mode=mode, uri=uri) - try: - return cls._open_asdf( - self, - generic_file, - validate_checksums=validate_checksums, - extensions=extensions, - lazy_tree=lazy_tree, - _get_yaml_content=_get_yaml_content, - _force_raw_types=_force_raw_types, - strict_extension_check=strict_extension_check, - ignore_missing_extensions=ignore_missing_extensions, - ) - except Exception: - if close_on_fail: - generic_file.close() - raise - def _write_tree(self, tree, fd, pad_blocks): fd.write(constants.ASDF_MAGIC) fd.write(b" ") @@ -1616,16 +1468,109 @@ def open_asdf( custom_schema=custom_schema, ) - return AsdfFile._open_impl( - instance, - fd, - uri=uri, - mode=mode, - validate_checksums=validate_checksums, - extensions=extensions, - lazy_tree=lazy_tree, - _get_yaml_content=_get_yaml_content, - _force_raw_types=_force_raw_types, - strict_extension_check=strict_extension_check, - ignore_missing_extensions=ignore_missing_extensions, - ) + close_on_fail = isinstance(fd, (str, pathlib.Path)) + generic_file = generic_io.get_file(fd, mode=mode, uri=uri) + try: + if strict_extension_check and ignore_missing_extensions: + msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" + raise ValueError(msg) + + with config_context() as cfg: + # validate_checksums (unlike memmap and lazy_load) is provided + # here instead of in __init__ + instance._blocks._validate_checksums = validate_checksums + instance._mode = generic_file.mode + instance._fd = generic_file + if instance._fd._uri: + instance._blocks._uri = instance._fd._uri + # The filename is currently only used for tracing warning information + instance._fname = instance._fd._uri if instance._fd._uri else "" + try: + header_line = generic_file.read_until(b"\r?\n", 2, "newline", include=True) + except DelimiterNotFoundError as e: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) from e + + instance._file_format_version = AsdfFile._parse_header_line(header_line) + + instance._comments = AsdfFile._read_comment_section(generic_file) + + version = AsdfFile._find_asdf_version_in_comments(instance._comments) + if version is not None: + instance.version = version + else: + # If no ASDF_STANDARD comment is found... + instance.version = versioning.AsdfVersion("1.0.0") + + # Now that version is set for good, we can add any additional + # extensions, which may have narrow ASDF Standard version + # requirements. + if extensions: + instance.extensions = extensions + + yaml_token = generic_file.read(4) + tree = None + if yaml_token == b"%YAM": + reader = generic_file.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + initial_content=yaml_token, + ) + + # For testing: just return the raw YAML content + if _get_yaml_content: + yaml_content = reader.read() + generic_file.close() + return yaml_content + + # We parse the YAML content into basic data structures + # now, but we don't do anything special with it until + # after the blocks have been read + tree = yamlutil.load_tree(reader) + instance._blocks.read(generic_file) + elif yaml_token == constants.BLOCK_MAGIC: + # this file has only blocks and we're already read the first block magic + instance._blocks.read(generic_file, after_magic=True) + elif yaml_token != b"": + msg = "ASDF file appears to contain garbage after header." + raise OSError(msg) + + if tree is None: + # At this point the tree should be tagged, but we want it to be + # tagged with the core/asdf version appropriate to this file's + # ASDF Standard version. We're using custom_tree_to_tagged_tree + # to select the correct tag for us. + tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), instance) + + if instance.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: + schema.fill_defaults(tree, instance, reading=True) + + if get_config().validate_on_read: + try: + instance._validate(tree, reading=True) + except ValidationError: + instance.close() + raise + + if lazy_tree is NotSet: + lazy_tree = cfg.lazy_tree + if lazy_tree and not _force_raw_types: + obj = AsdfObject() + obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(instance)) + tree = obj + else: + tree = yamlutil.tagged_tree_to_custom_tree(tree, instance, _force_raw_types) + + if not (ignore_missing_extensions or _force_raw_types): + instance._check_extensions(tree, strict=strict_extension_check) + + instance._tree = tree + + return instance + + except Exception: + if close_on_fail: + generic_file.close() + raise From 73dc8ed09b50c0a8680dea94a310aa3024319554 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 11 Aug 2025 15:45:43 -0400 Subject: [PATCH 02/11] split some low level calls into a private submodule --- asdf/_asdf.py | 69 +++--------------------------------------- asdf/_commands/edit.py | 6 ++-- asdf/_io.py | 66 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 68 deletions(-) create mode 100644 asdf/_io.py diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 80a2e5b26..9522c171b 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -11,9 +11,9 @@ from . import _compression as mcompression from . import _display as display +from . import _io, constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from . import _node_info as node_info from . import _version as version -from . import constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from ._block.manager import Manager as BlockManager from ._helpers import validate_version from .config import config_context, get_config @@ -731,67 +731,6 @@ def get_array_save_base(self, arr): """ return self._blocks._get_array_save_base(arr) - @classmethod - def _parse_header_line(cls, line): - """ - Parses the header line in a ASDF file to obtain the ASDF version. - """ - parts = line.split() - if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: - msg = "Does not appear to be a ASDF file." - raise ValueError(msg) - - try: - version = versioning.AsdfVersion(parts[1].decode("ascii")) - except ValueError as err: - msg = f"Unparsable version in ASDF file: {parts[1]}" - raise ValueError(msg) from err - - if version != versioning._FILE_FORMAT_VERSION: - msg = f"Unsupported ASDF file format version {version}" - raise ValueError(msg) - - return version - - @classmethod - def _read_comment_section(cls, fd): - """ - Reads the comment section, between the header line and the - Tree or first block. - """ - content = fd.read_until( - b"(%YAML)|(" + constants.BLOCK_MAGIC + b")", - 5, - "start of content", - include=False, - exception=False, - ) - - comments = [] - - lines = content.splitlines() - for line in lines: - if not line.startswith(b"#"): - msg = "Invalid content between header and tree" - raise ValueError(msg) - comments.append(line[1:].strip()) - - return comments - - @classmethod - def _find_asdf_version_in_comments(cls, comments): - for comment in comments: - parts = comment.split() - if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: - try: - version = versioning.AsdfVersion(parts[1].decode("ascii")) - except ValueError: - pass - else: - return version - - return None - def _write_tree(self, tree, fd, pad_blocks): fd.write(constants.ASDF_MAGIC) fd.write(b" ") @@ -1491,11 +1430,11 @@ def open_asdf( msg = "Does not appear to be a ASDF file." raise ValueError(msg) from e - instance._file_format_version = AsdfFile._parse_header_line(header_line) + instance._file_format_version = _io.parse_header_line(header_line) - instance._comments = AsdfFile._read_comment_section(generic_file) + instance._comments = _io.read_comment_section(generic_file) - version = AsdfFile._find_asdf_version_in_comments(instance._comments) + version = _io.find_asdf_version_in_comments(instance._comments) if version is not None: instance.version = version else: diff --git a/asdf/_commands/edit.py b/asdf/_commands/edit.py index 04e64336e..bcf2c3fd5 100644 --- a/asdf/_commands/edit.py +++ b/asdf/_commands/edit.py @@ -15,7 +15,7 @@ import yaml -from asdf import constants, generic_io, schema, util +from asdf import _io, constants, generic_io, schema, util from asdf._asdf import AsdfFile from asdf._block import io as bio from asdf._block.exceptions import BlockIndexError @@ -337,8 +337,8 @@ def parse_asdf_version(content): asdf.versioning.AsdfVersion ASDF Standard version """ - comments = AsdfFile._read_comment_section(generic_io.get_file(io.BytesIO(content))) - return AsdfFile._find_asdf_version_in_comments(comments) + comments = _io.read_comment_section(generic_io.get_file(io.BytesIO(content))) + return _io.find_asdf_version_in_comments(comments) def parse_yaml_version(content): diff --git a/asdf/_io.py b/asdf/_io.py new file mode 100644 index 000000000..1ac5b6342 --- /dev/null +++ b/asdf/_io.py @@ -0,0 +1,66 @@ +""" +Low-level input/output routines for AsdfFile instances +""" + +from . import constants, versioning + + +def parse_header_line(line): + """ + Parses the header line in a ASDF file to obtain the ASDF version. + """ + parts = line.split() + if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) + + try: + version = versioning.AsdfVersion(parts[1].decode("ascii")) + except ValueError as err: + msg = f"Unparsable version in ASDF file: {parts[1]}" + raise ValueError(msg) from err + + if version != versioning._FILE_FORMAT_VERSION: + msg = f"Unsupported ASDF file format version {version}" + raise ValueError(msg) + + return version + + +def read_comment_section(fd): + """ + Reads the comment section, between the header line and the + Tree or first block. + """ + content = fd.read_until( + b"(%YAML)|(" + constants.BLOCK_MAGIC + b")", + 5, + "start of content", + include=False, + exception=False, + ) + + comments = [] + + lines = content.splitlines() + for line in lines: + if not line.startswith(b"#"): + msg = "Invalid content between header and tree" + raise ValueError(msg) + comments.append(line[1:].strip()) + + return comments + + +def find_asdf_version_in_comments(comments): + for comment in comments: + parts = comment.split() + if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: + try: + version = versioning.AsdfVersion(parts[1].decode("ascii")) + except ValueError: + pass + else: + return version + + return None From d7a677750a4587b32a0b9bb2cec546536499cc4e Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 11 Aug 2025 15:53:53 -0400 Subject: [PATCH 03/11] pull out get asdf library info function --- asdf/_asdf.py | 20 ++------------------ asdf/_io.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 9522c171b..6c896cc3b 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -13,7 +13,6 @@ from . import _display as display from . import _io, constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from . import _node_info as node_info -from . import _version as version from ._block.manager import Manager as BlockManager from ._helpers import validate_version from .config import config_context, get_config @@ -30,21 +29,6 @@ from .util import NotSet -def _get_asdf_library_info(): - """ - Get information about asdf to include in the asdf_library entry - in the Tree. - """ - return Software( - { - "name": "asdf", - "version": version.version, - "homepage": "http://github.com/asdf-format/asdf", - "author": "The ASDF Developers", - }, - ) - - class AsdfFile: """ The main class that represents an ASDF file object. @@ -794,7 +778,7 @@ def _serial_write(self, fd, pad_blocks, include_block_index): try: # prep a tree for a writing tree = copy.copy(self._tree) - tree["asdf_library"] = _get_asdf_library_info() + tree["asdf_library"] = _io.get_asdf_library_info() if "history" in self._tree: tree["history"] = copy.deepcopy(self._tree["history"]) @@ -917,7 +901,7 @@ def rewrite(): self._pre_write(fd) try: - self._tree["asdf_library"] = _get_asdf_library_info() + self._tree["asdf_library"] = _io.get_asdf_library_info() # prepare block manager for writing with self._blocks.write_context(self._fd, copy_options=False): diff --git a/asdf/_io.py b/asdf/_io.py index 1ac5b6342..6468c170d 100644 --- a/asdf/_io.py +++ b/asdf/_io.py @@ -2,7 +2,23 @@ Low-level input/output routines for AsdfFile instances """ -from . import constants, versioning +from . import _version, constants, versioning +from .tags.core import Software + + +def get_asdf_library_info(): + """ + Get information about asdf to include in the asdf_library entry + in the Tree. + """ + return Software( + { + "name": "asdf", + "version": _version.version, + "homepage": "http://github.com/asdf-format/asdf", + "author": "The ASDF Developers", + }, + ) def parse_header_line(line): From d575c64a6d8cc0bd685ee59d11de3b2d5fd1d3a6 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 11 Aug 2025 15:56:16 -0400 Subject: [PATCH 04/11] remove unused _pre_write _post_write --- asdf/_asdf.py | 79 +++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 6c896cc3b..5c8fed5ca 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -766,26 +766,16 @@ def _tree_finalizer(tagged_tree): padding = util.calculate_padding(fd.tell(), pad_blocks, fd.block_size) fd.fast_forward(padding) - def _pre_write(self, fd): - pass - - def _post_write(self, fd): - pass - def _serial_write(self, fd, pad_blocks, include_block_index): with self._blocks.write_context(fd): - self._pre_write(fd) - try: - # prep a tree for a writing - tree = copy.copy(self._tree) - tree["asdf_library"] = _io.get_asdf_library_info() - if "history" in self._tree: - tree["history"] = copy.deepcopy(self._tree["history"]) - - self._write_tree(tree, fd, pad_blocks) - self._blocks.write(pad_blocks, include_block_index) - finally: - self._post_write(fd) + # prep a tree for a writing + tree = copy.copy(self._tree) + tree["asdf_library"] = _io.get_asdf_library_info() + if "history" in self._tree: + tree["history"] = copy.deepcopy(self._tree["history"]) + + self._write_tree(tree, fd, pad_blocks) + self._blocks.write(pad_blocks, include_block_index) def update( self, @@ -899,35 +889,30 @@ def rewrite(): rewrite() return - self._pre_write(fd) - try: - self._tree["asdf_library"] = _io.get_asdf_library_info() - - # prepare block manager for writing - with self._blocks.write_context(self._fd, copy_options=False): - # write out tree to temporary buffer - tree_fd = generic_io.get_file(io.BytesIO(), mode="rw") - self._write_tree(self._tree, tree_fd, False) - new_tree_size = tree_fd.tell() - - # update blocks - self._blocks.update(new_tree_size, pad_blocks, include_block_index) - end_of_file = self._fd.tell() - - # now write the tree - self._fd.seek(0) - tree_fd.seek(0) - self._fd.write(tree_fd.read()) - self._fd.flush() - - # close memmap to trigger arrays to reload themselves - self._fd.seek(end_of_file) - self._fd.truncate() - if self._fd.can_memmap(): - self._fd.close_memmap() - - finally: - self._post_write(fd) + self._tree["asdf_library"] = _io.get_asdf_library_info() + + # prepare block manager for writing + with self._blocks.write_context(self._fd, copy_options=False): + # write out tree to temporary buffer + tree_fd = generic_io.get_file(io.BytesIO(), mode="rw") + self._write_tree(self._tree, tree_fd, False) + new_tree_size = tree_fd.tell() + + # update blocks + self._blocks.update(new_tree_size, pad_blocks, include_block_index) + end_of_file = self._fd.tell() + + # now write the tree + self._fd.seek(0) + tree_fd.seek(0) + self._fd.write(tree_fd.read()) + self._fd.flush() + + # close memmap to trigger arrays to reload themselves + self._fd.seek(end_of_file) + self._fd.truncate() + if self._fd.can_memmap(): + self._fd.close_memmap() def write_to( self, From 07d0019b65b83e54d1f662c6378151e8ef64cc6a Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 10:33:30 -0400 Subject: [PATCH 05/11] WIP: simplify open --- asdf/_asdf.py | 159 ++++++++++++++--------------------------------- asdf/_io.py | 79 ++++++++++++++++++++++- asdf/yamlutil.py | 1 + 3 files changed, 124 insertions(+), 115 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 5c8fed5ca..f0ba1cfd4 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -2,7 +2,6 @@ import datetime import io import os -import pathlib import time import warnings import weakref @@ -20,8 +19,6 @@ AsdfManifestURIMismatchWarning, AsdfPackageVersionWarning, AsdfWarning, - DelimiterNotFoundError, - ValidationError, ) from .extension import Extension, ExtensionProxy, _serialization_context, get_cached_extension_manager from .search import AsdfSearchResult @@ -1278,7 +1275,6 @@ def open_asdf( custom_schema=None, strict_extension_check=False, ignore_missing_extensions=False, - _get_yaml_content=False, ): """ Open an existing ASDF file. @@ -1356,19 +1352,14 @@ def open_asdf( The new AsdfFile object. """ - if mode is not None and mode not in ["r", "rw"]: - msg = f"Unrecognized asdf mode '{mode}'. Must be either 'r' or 'rw'" + if strict_extension_check and ignore_missing_extensions: + msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" raise ValueError(msg) - if mode is None: - if isinstance(fd, io.IOBase): - mode = "rw" if fd.writable() else "r" - elif isinstance(fd, generic_io.GenericFile): - mode = fd.mode - else: - # This is the safest assumption for the default fallback - mode = "r" + if lazy_tree is NotSet: + lazy_tree = get_config().lazy_tree + # TODO pass in extensions? instance = AsdfFile( ignore_unrecognized_tag=ignore_unrecognized_tag, memmap=memmap, @@ -1376,109 +1367,53 @@ def open_asdf( custom_schema=custom_schema, ) - close_on_fail = isinstance(fd, (str, pathlib.Path)) - generic_file = generic_io.get_file(fd, mode=mode, uri=uri) - try: - if strict_extension_check and ignore_missing_extensions: - msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" - raise ValueError(msg) - - with config_context() as cfg: - # validate_checksums (unlike memmap and lazy_load) is provided - # here instead of in __init__ - instance._blocks._validate_checksums = validate_checksums - instance._mode = generic_file.mode - instance._fd = generic_file - if instance._fd._uri: - instance._blocks._uri = instance._fd._uri - # The filename is currently only used for tracing warning information - instance._fname = instance._fd._uri if instance._fd._uri else "" - try: - header_line = generic_file.read_until(b"\r?\n", 2, "newline", include=True) - except DelimiterNotFoundError as e: - msg = "Does not appear to be a ASDF file." - raise ValueError(msg) from e + with _io.maybe_close(fd, mode, uri) as generic_file: + file_format_version, comments, tree, blocks = _io.open_asdf( + generic_file, uri, mode, lazy_load, memmap, validate_checksums + ) - instance._file_format_version = _io.parse_header_line(header_line) + instance._blocks = blocks - instance._comments = _io.read_comment_section(generic_file) + instance._mode = generic_file.mode + instance._fd = generic_file + instance._fname = instance._fd._uri if instance._fd._uri else "" + instance._file_format_version = file_format_version + instance._comments = comments - version = _io.find_asdf_version_in_comments(instance._comments) - if version is not None: - instance.version = version - else: - # If no ASDF_STANDARD comment is found... - instance.version = versioning.AsdfVersion("1.0.0") - - # Now that version is set for good, we can add any additional - # extensions, which may have narrow ASDF Standard version - # requirements. - if extensions: - instance.extensions = extensions - - yaml_token = generic_file.read(4) - tree = None - if yaml_token == b"%YAM": - reader = generic_file.reader_until( - constants.YAML_END_MARKER_REGEX, - 7, - "End of YAML marker", - include=True, - initial_content=yaml_token, - ) + instance.version = _io.find_asdf_version_in_comments(comments, versioning.AsdfVersion("1.0.0")) - # For testing: just return the raw YAML content - if _get_yaml_content: - yaml_content = reader.read() - generic_file.close() - return yaml_content - - # We parse the YAML content into basic data structures - # now, but we don't do anything special with it until - # after the blocks have been read - tree = yamlutil.load_tree(reader) - instance._blocks.read(generic_file) - elif yaml_token == constants.BLOCK_MAGIC: - # this file has only blocks and we're already read the first block magic - instance._blocks.read(generic_file, after_magic=True) - elif yaml_token != b"": - msg = "ASDF file appears to contain garbage after header." - raise OSError(msg) - - if tree is None: - # At this point the tree should be tagged, but we want it to be - # tagged with the core/asdf version appropriate to this file's - # ASDF Standard version. We're using custom_tree_to_tagged_tree - # to select the correct tag for us. - tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), instance) - - if instance.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: - schema.fill_defaults(tree, instance, reading=True) - - if get_config().validate_on_read: - try: - instance._validate(tree, reading=True) - except ValidationError: - instance.close() - raise - - if lazy_tree is NotSet: - lazy_tree = cfg.lazy_tree - if lazy_tree and not _force_raw_types: - obj = AsdfObject() - obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(instance)) - tree = obj - else: - tree = yamlutil.tagged_tree_to_custom_tree(tree, instance, _force_raw_types) + # TODO pass in extensions above? + if extensions: + instance.extensions = extensions - if not (ignore_missing_extensions or _force_raw_types): - instance._check_extensions(tree, strict=strict_extension_check) + if tree is None: + # TODO ugh + # At this point the tree should be tagged, but we want it to be + # tagged with the core/asdf version appropriate to this file's + # ASDF Standard version. We're using custom_tree_to_tagged_tree + # to select the correct tag for us. + tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), instance) + + # possibly fill defaults + if instance.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: + schema.fill_defaults(tree, instance, reading=True) + + # validate + if get_config().validate_on_read: + instance._validate(tree, reading=True) + + # lazy tree? + if lazy_tree and not _force_raw_types: + obj = AsdfObject() + obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(instance)) + tree = obj + else: + tree = yamlutil.tagged_tree_to_custom_tree(tree, instance, _force_raw_types) - instance._tree = tree + # check extensions + if not (ignore_missing_extensions or _force_raw_types): + instance._check_extensions(tree, strict=strict_extension_check) - return instance + instance._tree = tree - except Exception: - if close_on_fail: - generic_file.close() - raise + return instance diff --git a/asdf/_io.py b/asdf/_io.py index 6468c170d..82099efda 100644 --- a/asdf/_io.py +++ b/asdf/_io.py @@ -2,7 +2,13 @@ Low-level input/output routines for AsdfFile instances """ -from . import _version, constants, versioning +import contextlib +import io +import pathlib + +from . import _version, constants, generic_io, versioning, yamlutil +from ._block.manager import Manager as BlockManager +from .exceptions import DelimiterNotFoundError from .tags.core import Software @@ -43,6 +49,16 @@ def parse_header_line(line): return version +def read_header_line(gf): + try: + header_line = gf.read_until(b"\r?\n", 2, "newline", include=True) + except DelimiterNotFoundError as e: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) from e + + return parse_header_line(header_line) + + def read_comment_section(fd): """ Reads the comment section, between the header line and the @@ -68,7 +84,7 @@ def read_comment_section(fd): return comments -def find_asdf_version_in_comments(comments): +def find_asdf_version_in_comments(comments, default=None): for comment in comments: parts = comment.split() if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: @@ -79,4 +95,61 @@ def find_asdf_version_in_comments(comments): else: return version - return None + return default + + +@contextlib.contextmanager +def maybe_close(fd, mode=None, uri=None): + if mode not in [None, "r", "rw"]: + msg = f"Unrecognized asdf mode '{mode}'. Must be either 'r' or 'rw'" + raise ValueError(msg) + + if isinstance(fd, (str, pathlib.Path)): + mode = mode or "r" + generic_file = generic_io.get_file(fd, mode=mode, uri=uri) + try: + yield generic_file + except Exception as e: + generic_file.close() + raise e + elif isinstance(fd, generic_io.GenericFile): + yield fd + else: + if mode is None: + # infer from fd + if isinstance(fd, io.IOBase): + mode = "rw" if fd.writable() else "r" + else: + mode = "r" + yield generic_io.get_file(fd, mode=mode, uri=uri) + + +def read_tree_and_blocks(gf, lazy_load, memmap, validate_checksums): + token = gf.read(4) + tree = None + blocks = BlockManager(uri=gf.uri, lazy_load=lazy_load, memmap=memmap, validate_checksums=validate_checksums) + if token == b"%YAM": + reader = gf.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + initial_content=token, + ) + tree = yamlutil.load_tree(reader) + blocks.read(gf, after_magic=False) + elif token == constants.BLOCK_MAGIC: + blocks.read(gf, after_magic=True) + elif token != b"": + msg = "ASDF file appears to contain garbage after header." + raise OSError(msg) + + return tree, blocks + + +def open_asdf(fd, uri=None, mode=None, lazy_load=True, memmap=False, validate_checksums=False): + with maybe_close(fd, mode, uri) as generic_file: + file_format_version = read_header_line(generic_file) + comments = read_comment_section(generic_file) + tree, blocks = read_tree_and_blocks(generic_file, lazy_load, memmap, validate_checksums) + return file_format_version, comments, tree, blocks diff --git a/asdf/yamlutil.py b/asdf/yamlutil.py index c1143814c..a240df183 100644 --- a/asdf/yamlutil.py +++ b/asdf/yamlutil.py @@ -330,6 +330,7 @@ def tagged_tree_to_custom_tree(tree, ctx, force_raw_types=False, _serialization_ Convert a tree containing only basic data types, annotated with tags, to a tree containing custom data types. """ + # TODO deprecate force_raw_types if _serialization_context is None: _serialization_context = ctx._create_serialization_context(BlockAccess.READ) From 47f7f989f368b1fadc3c03b046974af566460047 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 10:35:26 -0400 Subject: [PATCH 06/11] provide extensions early --- asdf/_asdf.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index f0ba1cfd4..5d6d5b156 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -1359,12 +1359,12 @@ def open_asdf( if lazy_tree is NotSet: lazy_tree = get_config().lazy_tree - # TODO pass in extensions? instance = AsdfFile( ignore_unrecognized_tag=ignore_unrecognized_tag, memmap=memmap, lazy_load=lazy_load, custom_schema=custom_schema, + extensions=extensions, ) with _io.maybe_close(fd, mode, uri) as generic_file: @@ -1382,10 +1382,6 @@ def open_asdf( instance.version = _io.find_asdf_version_in_comments(comments, versioning.AsdfVersion("1.0.0")) - # TODO pass in extensions above? - if extensions: - instance.extensions = extensions - if tree is None: # TODO ugh # At this point the tree should be tagged, but we want it to be From 600cf357a441161fa7e6b9f5e0fa25bbd766ea5a Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 10:52:34 -0400 Subject: [PATCH 07/11] cleanup comments --- asdf/_asdf.py | 2 -- asdf/yamlutil.py | 1 - 2 files changed, 3 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 5d6d5b156..8829a8fbb 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -1373,7 +1373,6 @@ def open_asdf( ) instance._blocks = blocks - instance._mode = generic_file.mode instance._fd = generic_file instance._fname = instance._fd._uri if instance._fd._uri else "" @@ -1383,7 +1382,6 @@ def open_asdf( instance.version = _io.find_asdf_version_in_comments(comments, versioning.AsdfVersion("1.0.0")) if tree is None: - # TODO ugh # At this point the tree should be tagged, but we want it to be # tagged with the core/asdf version appropriate to this file's # ASDF Standard version. We're using custom_tree_to_tagged_tree diff --git a/asdf/yamlutil.py b/asdf/yamlutil.py index a240df183..c1143814c 100644 --- a/asdf/yamlutil.py +++ b/asdf/yamlutil.py @@ -330,7 +330,6 @@ def tagged_tree_to_custom_tree(tree, ctx, force_raw_types=False, _serialization_ Convert a tree containing only basic data types, annotated with tags, to a tree containing custom data types. """ - # TODO deprecate force_raw_types if _serialization_context is None: _serialization_context = ctx._create_serialization_context(BlockAccess.READ) From 603892c98765afc9b0d20b8c26c6720cd5188374 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 13:38:06 -0400 Subject: [PATCH 08/11] make _fname dynamic --- asdf/_asdf.py | 5 +---- asdf/_tests/test_api.py | 12 ++++-------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 8829a8fbb..ce05825d4 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -85,8 +85,6 @@ def __init__( files follow custom conventions beyond those enforced by the standard. """ - self._fname = "" - # make a new AsdfVersion instance here so files don't share the same instance self._file_format_version = versioning.AsdfVersion(versioning._FILE_FORMAT_VERSION) @@ -251,7 +249,7 @@ def _check_extensions(self, tree, strict=False): installed = ext break - filename = f"'{self._fname}' " if self._fname else "" + filename = f"'{self._fd._uri}'" if (self._fd is not None and self._fd._uri) else "" if extension.extension_uri is not None: extension_description = f"URI '{extension.extension_uri}'" else: @@ -1375,7 +1373,6 @@ def open_asdf( instance._blocks = blocks instance._mode = generic_file.mode instance._fd = generic_file - instance._fname = instance._fd._uri if instance._fd._uri else "" instance._file_format_version = file_format_version instance._comments = comments diff --git a/asdf/_tests/test_api.py b/asdf/_tests/test_api.py index 6c849db0c..d5ef56701 100644 --- a/asdf/_tests/test_api.py +++ b/asdf/_tests/test_api.py @@ -302,8 +302,6 @@ class FooExtension: config.add_extension(proxy) af = asdf.AsdfFile() - af._fname = "test.asdf" - tree = { "history": { "extensions": [ @@ -316,10 +314,10 @@ class FooExtension: } if warns: - with pytest.warns(AsdfPackageVersionWarning, match=r"File 'test.asdf' was created with"): + with pytest.warns(AsdfPackageVersionWarning, match=r"File was created with"): af._check_extensions(tree) - with pytest.raises(RuntimeError, match=r"^File 'test.asdf' was created with"): + with pytest.raises(RuntimeError, match=r"^File was created with"): af._check_extensions(tree, strict=True) else: @@ -351,8 +349,6 @@ class FooExtension: config.add_resource_mapping(mapping) af = asdf.AsdfFile() - af._fname = "test.asdf" - tree = { "history": { "extensions": [ @@ -366,10 +362,10 @@ class FooExtension: } if warns: - with pytest.warns(AsdfPackageVersionWarning, match=r"File 'test.asdf' was created with"): + with pytest.warns(AsdfPackageVersionWarning, match=r"File was created with"): af._check_extensions(tree) - with pytest.raises(RuntimeError, match=r"^File 'test.asdf' was created with"): + with pytest.raises(RuntimeError, match=r"^File was created with"): af._check_extensions(tree, strict=True) else: From 26e8d46912660677d311fca2daaf1c415351aa86 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 14:39:54 -0400 Subject: [PATCH 09/11] simplify pytest plugin example handling --- pytest_asdf/plugin.py | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/pytest_asdf/plugin.py b/pytest_asdf/plugin.py index d1f5b5dee..7cbb51c82 100644 --- a/pytest_asdf/plugin.py +++ b/pytest_asdf/plugin.py @@ -1,4 +1,3 @@ -import io import os import pathlib from dataclasses import dataclass @@ -199,43 +198,26 @@ def from_parent( return result def runtest(self): - from asdf import AsdfFile, _block, generic_io + import asdf + from asdf import _block, generic_io from asdf.testing import helpers # Make sure that the examples in the schema files (and thus the # ASDF standard document) are valid. buff = helpers.yaml_to_asdf("example: " + self.example.example.strip(), version=self.example.version) - ff = AsdfFile( - uri=pathlib.Path(self.filename).expanduser().absolute().as_uri(), - ignore_unrecognized_tag=self.ignore_unrecognized_tag, - ) - - # Fake an external file - ff2 = AsdfFile({"data": np.empty((1024 * 1024 * 8), dtype=np.uint8)}) - - ff._external_asdf_by_uri[ - (pathlib.Path(self.filename).expanduser().absolute().parent / "external.asdf").as_uri() - ] = ff2 - + # add a few blocks + # make a block and write it out to the above generated buffer/example wb = _block.writer.WriteBlock(np.zeros(1024 * 1024 * 8, dtype=np.uint8)) with generic_io.get_file(buff, mode="rw") as f: f.seek(0, 2) _block.writer.write_blocks(f, [wb, wb], streamed_block=wb) f.seek(0) - try: - ff._open_impl(ff, buff, mode="rw") - except Exception: - print(f"Example: {self.example.description}\n From file: {self.filename}") - raise + af = asdf.loads(buff.getvalue()) - # Just test we can write it out. A roundtrip test - # wouldn't always yield the correct result, so those have - # to be covered by "real" unit tests. if b"external.asdf" not in buff.getvalue(): - buff = io.BytesIO() - ff.write_to(buff) + asdf.dumps(af) def reportinfo(self): return self.fspath, 0, "" From ac2c7f7ae2cba9be4b28fca12fd89156afc3d8f4 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 15:15:45 -0400 Subject: [PATCH 10/11] further simplify example checking --- pytest_asdf/plugin.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/pytest_asdf/plugin.py b/pytest_asdf/plugin.py index 7cbb51c82..c9f47c127 100644 --- a/pytest_asdf/plugin.py +++ b/pytest_asdf/plugin.py @@ -2,7 +2,6 @@ import pathlib from dataclasses import dataclass -import numpy as np import pytest import yaml @@ -199,25 +198,13 @@ def from_parent( def runtest(self): import asdf - from asdf import _block, generic_io - from asdf.testing import helpers + from asdf.testing.helpers import yaml_to_asdf - # Make sure that the examples in the schema files (and thus the - # ASDF standard document) are valid. - buff = helpers.yaml_to_asdf("example: " + self.example.example.strip(), version=self.example.version) - - # add a few blocks - # make a block and write it out to the above generated buffer/example - wb = _block.writer.WriteBlock(np.zeros(1024 * 1024 * 8, dtype=np.uint8)) - with generic_io.get_file(buff, mode="rw") as f: - f.seek(0, 2) - _block.writer.write_blocks(f, [wb, wb], streamed_block=wb) - f.seek(0) - - af = asdf.loads(buff.getvalue()) - - if b"external.asdf" not in buff.getvalue(): - asdf.dumps(af) + # check the example is valid + buff = yaml_to_asdf("example: " + self.example.example.strip(), version=self.example.version) + tagged_tree = asdf.util.load_yaml(buff, tagged=True) + instance = asdf.AsdfFile(version=self.example.version) + asdf.schema.validate(tagged_tree, instance, reading=True) def reportinfo(self): return self.fspath, 0, "" From 3bb0535345aa5ce6e789283d81b5362f4e3f2655 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 12 Aug 2025 15:34:09 -0400 Subject: [PATCH 11/11] add changelog --- changes/1955.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/1955.bugfix.rst diff --git a/changes/1955.bugfix.rst b/changes/1955.bugfix.rst new file mode 100644 index 000000000..c28e87ebc --- /dev/null +++ b/changes/1955.bugfix.rst @@ -0,0 +1 @@ +Fix pytest-asdf example testing to validate example without requiring round tripping.