diff --git a/asdf/_asdf.py b/asdf/_asdf.py index bd3646af9..ce05825d4 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -2,7 +2,6 @@ import datetime import io import os -import pathlib import time import warnings import weakref @@ -11,9 +10,8 @@ from . import _compression as mcompression from . import _display as display +from . import _io, constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from . import _node_info as node_info -from . import _version as version -from . import constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from ._block.manager import Manager as BlockManager from ._helpers import validate_version from .config import config_context, get_config @@ -21,8 +19,6 @@ AsdfManifestURIMismatchWarning, AsdfPackageVersionWarning, AsdfWarning, - DelimiterNotFoundError, - ValidationError, ) from .extension import Extension, ExtensionProxy, _serialization_context, get_cached_extension_manager from .search import AsdfSearchResult @@ -30,21 +26,6 @@ from .util import NotSet -def _get_asdf_library_info(): - """ - Get information about asdf to include in the asdf_library entry - in the Tree. - """ - return Software( - { - "name": "asdf", - "version": version.version, - "homepage": "http://github.com/asdf-format/asdf", - "author": "The ASDF Developers", - }, - ) - - class AsdfFile: """ The main class that represents an ASDF file object. @@ -104,8 +85,6 @@ def __init__( files follow custom conventions beyond those enforced by the standard. """ - self._fname = "" - # make a new AsdfVersion instance here so files don't share the same instance self._file_format_version = versioning.AsdfVersion(versioning._FILE_FORMAT_VERSION) @@ -270,7 +249,7 @@ def _check_extensions(self, tree, strict=False): installed = ext break - filename = f"'{self._fname}' " if self._fname else "" + filename = f"'{self._fd._uri}'" if (self._fd is not None and self._fd._uri) else "" if extension.extension_uri is not None: extension_description = f"URI '{extension.extension_uri}'" else: @@ -731,215 +710,6 @@ def get_array_save_base(self, arr): """ return self._blocks._get_array_save_base(arr) - @classmethod - def _parse_header_line(cls, line): - """ - Parses the header line in a ASDF file to obtain the ASDF version. - """ - parts = line.split() - if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: - msg = "Does not appear to be a ASDF file." - raise ValueError(msg) - - try: - version = versioning.AsdfVersion(parts[1].decode("ascii")) - except ValueError as err: - msg = f"Unparsable version in ASDF file: {parts[1]}" - raise ValueError(msg) from err - - if version != versioning._FILE_FORMAT_VERSION: - msg = f"Unsupported ASDF file format version {version}" - raise ValueError(msg) - - return version - - @classmethod - def _read_comment_section(cls, fd): - """ - Reads the comment section, between the header line and the - Tree or first block. - """ - content = fd.read_until( - b"(%YAML)|(" + constants.BLOCK_MAGIC + b")", - 5, - "start of content", - include=False, - exception=False, - ) - - comments = [] - - lines = content.splitlines() - for line in lines: - if not line.startswith(b"#"): - msg = "Invalid content between header and tree" - raise ValueError(msg) - comments.append(line[1:].strip()) - - return comments - - @classmethod - def _find_asdf_version_in_comments(cls, comments): - for comment in comments: - parts = comment.split() - if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: - try: - version = versioning.AsdfVersion(parts[1].decode("ascii")) - except ValueError: - pass - else: - return version - - return None - - @classmethod - def _open_asdf( - cls, - self, - fd, - validate_checksums=False, - extensions=None, - lazy_tree=NotSet, - _get_yaml_content=False, - _force_raw_types=False, - strict_extension_check=False, - ignore_missing_extensions=False, - ): - """Attempt to populate AsdfFile data from file-like object""" - - if strict_extension_check and ignore_missing_extensions: - msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" - raise ValueError(msg) - - with config_context() as cfg: - # validate_checksums (unlike memmap and lazy_load) is provided - # here instead of in __init__ - self._blocks._validate_checksums = validate_checksums - self._mode = fd.mode - self._fd = fd - if self._fd._uri: - self._blocks._uri = self._fd._uri - # The filename is currently only used for tracing warning information - self._fname = self._fd._uri if self._fd._uri else "" - try: - header_line = fd.read_until(b"\r?\n", 2, "newline", include=True) - except DelimiterNotFoundError as e: - msg = "Does not appear to be a ASDF file." - raise ValueError(msg) from e - self._file_format_version = cls._parse_header_line(header_line) - - self._comments = cls._read_comment_section(fd) - - version = cls._find_asdf_version_in_comments(self._comments) - if version is not None: - self.version = version - else: - # If no ASDF_STANDARD comment is found... - self.version = versioning.AsdfVersion("1.0.0") - - # Now that version is set for good, we can add any additional - # extensions, which may have narrow ASDF Standard version - # requirements. - if extensions: - self.extensions = extensions - - yaml_token = fd.read(4) - tree = None - if yaml_token == b"%YAM": - reader = fd.reader_until( - constants.YAML_END_MARKER_REGEX, - 7, - "End of YAML marker", - include=True, - initial_content=yaml_token, - ) - - # For testing: just return the raw YAML content - if _get_yaml_content: - yaml_content = reader.read() - fd.close() - return yaml_content - - # We parse the YAML content into basic data structures - # now, but we don't do anything special with it until - # after the blocks have been read - tree = yamlutil.load_tree(reader) - self._blocks.read(fd) - elif yaml_token == constants.BLOCK_MAGIC: - # this file has only blocks and we're already read the first block magic - self._blocks.read(fd, after_magic=True) - elif yaml_token != b"": - msg = "ASDF file appears to contain garbage after header." - raise OSError(msg) - - if tree is None: - # At this point the tree should be tagged, but we want it to be - # tagged with the core/asdf version appropriate to this file's - # ASDF Standard version. We're using custom_tree_to_tagged_tree - # to select the correct tag for us. - tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), self) - - if self.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: - schema.fill_defaults(tree, self, reading=True) - - if get_config().validate_on_read: - try: - self._validate(tree, reading=True) - except ValidationError: - self.close() - raise - - if lazy_tree is NotSet: - lazy_tree = cfg.lazy_tree - if lazy_tree and not _force_raw_types: - obj = AsdfObject() - obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(self)) - tree = obj - else: - tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types) - - if not (ignore_missing_extensions or _force_raw_types): - self._check_extensions(tree, strict=strict_extension_check) - - self._tree = tree - - return self - - @classmethod - def _open_impl( - cls, - self, - fd, - uri=None, - mode="r", - validate_checksums=False, - extensions=None, - lazy_tree=NotSet, - _get_yaml_content=False, - _force_raw_types=False, - strict_extension_check=False, - ignore_missing_extensions=False, - ): - """Attempt to open file-like object as an AsdfFile""" - close_on_fail = isinstance(fd, (str, pathlib.Path)) - generic_file = generic_io.get_file(fd, mode=mode, uri=uri) - try: - return cls._open_asdf( - self, - generic_file, - validate_checksums=validate_checksums, - extensions=extensions, - lazy_tree=lazy_tree, - _get_yaml_content=_get_yaml_content, - _force_raw_types=_force_raw_types, - strict_extension_check=strict_extension_check, - ignore_missing_extensions=ignore_missing_extensions, - ) - except Exception: - if close_on_fail: - generic_file.close() - raise - def _write_tree(self, tree, fd, pad_blocks): fd.write(constants.ASDF_MAGIC) fd.write(b" ") @@ -991,26 +761,16 @@ def _tree_finalizer(tagged_tree): padding = util.calculate_padding(fd.tell(), pad_blocks, fd.block_size) fd.fast_forward(padding) - def _pre_write(self, fd): - pass - - def _post_write(self, fd): - pass - def _serial_write(self, fd, pad_blocks, include_block_index): with self._blocks.write_context(fd): - self._pre_write(fd) - try: - # prep a tree for a writing - tree = copy.copy(self._tree) - tree["asdf_library"] = _get_asdf_library_info() - if "history" in self._tree: - tree["history"] = copy.deepcopy(self._tree["history"]) - - self._write_tree(tree, fd, pad_blocks) - self._blocks.write(pad_blocks, include_block_index) - finally: - self._post_write(fd) + # prep a tree for a writing + tree = copy.copy(self._tree) + tree["asdf_library"] = _io.get_asdf_library_info() + if "history" in self._tree: + tree["history"] = copy.deepcopy(self._tree["history"]) + + self._write_tree(tree, fd, pad_blocks) + self._blocks.write(pad_blocks, include_block_index) def update( self, @@ -1124,35 +884,30 @@ def rewrite(): rewrite() return - self._pre_write(fd) - try: - self._tree["asdf_library"] = _get_asdf_library_info() + self._tree["asdf_library"] = _io.get_asdf_library_info() - # prepare block manager for writing - with self._blocks.write_context(self._fd, copy_options=False): - # write out tree to temporary buffer - tree_fd = generic_io.get_file(io.BytesIO(), mode="rw") - self._write_tree(self._tree, tree_fd, False) - new_tree_size = tree_fd.tell() + # prepare block manager for writing + with self._blocks.write_context(self._fd, copy_options=False): + # write out tree to temporary buffer + tree_fd = generic_io.get_file(io.BytesIO(), mode="rw") + self._write_tree(self._tree, tree_fd, False) + new_tree_size = tree_fd.tell() - # update blocks - self._blocks.update(new_tree_size, pad_blocks, include_block_index) - end_of_file = self._fd.tell() + # update blocks + self._blocks.update(new_tree_size, pad_blocks, include_block_index) + end_of_file = self._fd.tell() - # now write the tree - self._fd.seek(0) - tree_fd.seek(0) - self._fd.write(tree_fd.read()) - self._fd.flush() + # now write the tree + self._fd.seek(0) + tree_fd.seek(0) + self._fd.write(tree_fd.read()) + self._fd.flush() - # close memmap to trigger arrays to reload themselves - self._fd.seek(end_of_file) - self._fd.truncate() - if self._fd.can_memmap(): - self._fd.close_memmap() - - finally: - self._post_write(fd) + # close memmap to trigger arrays to reload themselves + self._fd.seek(end_of_file) + self._fd.truncate() + if self._fd.can_memmap(): + self._fd.close_memmap() def write_to( self, @@ -1518,7 +1273,6 @@ def open_asdf( custom_schema=None, strict_extension_check=False, ignore_missing_extensions=False, - _get_yaml_content=False, ): """ Open an existing ASDF file. @@ -1596,36 +1350,61 @@ def open_asdf( The new AsdfFile object. """ - if mode is not None and mode not in ["r", "rw"]: - msg = f"Unrecognized asdf mode '{mode}'. Must be either 'r' or 'rw'" + if strict_extension_check and ignore_missing_extensions: + msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options" raise ValueError(msg) - if mode is None: - if isinstance(fd, io.IOBase): - mode = "rw" if fd.writable() else "r" - elif isinstance(fd, generic_io.GenericFile): - mode = fd.mode - else: - # This is the safest assumption for the default fallback - mode = "r" + if lazy_tree is NotSet: + lazy_tree = get_config().lazy_tree instance = AsdfFile( ignore_unrecognized_tag=ignore_unrecognized_tag, memmap=memmap, lazy_load=lazy_load, custom_schema=custom_schema, - ) - - return AsdfFile._open_impl( - instance, - fd, - uri=uri, - mode=mode, - validate_checksums=validate_checksums, extensions=extensions, - lazy_tree=lazy_tree, - _get_yaml_content=_get_yaml_content, - _force_raw_types=_force_raw_types, - strict_extension_check=strict_extension_check, - ignore_missing_extensions=ignore_missing_extensions, ) + + with _io.maybe_close(fd, mode, uri) as generic_file: + file_format_version, comments, tree, blocks = _io.open_asdf( + generic_file, uri, mode, lazy_load, memmap, validate_checksums + ) + + instance._blocks = blocks + instance._mode = generic_file.mode + instance._fd = generic_file + instance._file_format_version = file_format_version + instance._comments = comments + + instance.version = _io.find_asdf_version_in_comments(comments, versioning.AsdfVersion("1.0.0")) + + if tree is None: + # At this point the tree should be tagged, but we want it to be + # tagged with the core/asdf version appropriate to this file's + # ASDF Standard version. We're using custom_tree_to_tagged_tree + # to select the correct tag for us. + tree = yamlutil.custom_tree_to_tagged_tree(AsdfObject(), instance) + + # possibly fill defaults + if instance.version <= versioning.FILL_DEFAULTS_MAX_VERSION and get_config().legacy_fill_schema_defaults: + schema.fill_defaults(tree, instance, reading=True) + + # validate + if get_config().validate_on_read: + instance._validate(tree, reading=True) + + # lazy tree? + if lazy_tree and not _force_raw_types: + obj = AsdfObject() + obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(instance)) + tree = obj + else: + tree = yamlutil.tagged_tree_to_custom_tree(tree, instance, _force_raw_types) + + # check extensions + if not (ignore_missing_extensions or _force_raw_types): + instance._check_extensions(tree, strict=strict_extension_check) + + instance._tree = tree + + return instance diff --git a/asdf/_commands/edit.py b/asdf/_commands/edit.py index 04e64336e..bcf2c3fd5 100644 --- a/asdf/_commands/edit.py +++ b/asdf/_commands/edit.py @@ -15,7 +15,7 @@ import yaml -from asdf import constants, generic_io, schema, util +from asdf import _io, constants, generic_io, schema, util from asdf._asdf import AsdfFile from asdf._block import io as bio from asdf._block.exceptions import BlockIndexError @@ -337,8 +337,8 @@ def parse_asdf_version(content): asdf.versioning.AsdfVersion ASDF Standard version """ - comments = AsdfFile._read_comment_section(generic_io.get_file(io.BytesIO(content))) - return AsdfFile._find_asdf_version_in_comments(comments) + comments = _io.read_comment_section(generic_io.get_file(io.BytesIO(content))) + return _io.find_asdf_version_in_comments(comments) def parse_yaml_version(content): diff --git a/asdf/_io.py b/asdf/_io.py new file mode 100644 index 000000000..82099efda --- /dev/null +++ b/asdf/_io.py @@ -0,0 +1,155 @@ +""" +Low-level input/output routines for AsdfFile instances +""" + +import contextlib +import io +import pathlib + +from . import _version, constants, generic_io, versioning, yamlutil +from ._block.manager import Manager as BlockManager +from .exceptions import DelimiterNotFoundError +from .tags.core import Software + + +def get_asdf_library_info(): + """ + Get information about asdf to include in the asdf_library entry + in the Tree. + """ + return Software( + { + "name": "asdf", + "version": _version.version, + "homepage": "http://github.com/asdf-format/asdf", + "author": "The ASDF Developers", + }, + ) + + +def parse_header_line(line): + """ + Parses the header line in a ASDF file to obtain the ASDF version. + """ + parts = line.split() + if len(parts) != 2 or parts[0] != constants.ASDF_MAGIC: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) + + try: + version = versioning.AsdfVersion(parts[1].decode("ascii")) + except ValueError as err: + msg = f"Unparsable version in ASDF file: {parts[1]}" + raise ValueError(msg) from err + + if version != versioning._FILE_FORMAT_VERSION: + msg = f"Unsupported ASDF file format version {version}" + raise ValueError(msg) + + return version + + +def read_header_line(gf): + try: + header_line = gf.read_until(b"\r?\n", 2, "newline", include=True) + except DelimiterNotFoundError as e: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) from e + + return parse_header_line(header_line) + + +def read_comment_section(fd): + """ + Reads the comment section, between the header line and the + Tree or first block. + """ + content = fd.read_until( + b"(%YAML)|(" + constants.BLOCK_MAGIC + b")", + 5, + "start of content", + include=False, + exception=False, + ) + + comments = [] + + lines = content.splitlines() + for line in lines: + if not line.startswith(b"#"): + msg = "Invalid content between header and tree" + raise ValueError(msg) + comments.append(line[1:].strip()) + + return comments + + +def find_asdf_version_in_comments(comments, default=None): + for comment in comments: + parts = comment.split() + if len(parts) == 2 and parts[0] == constants.ASDF_STANDARD_COMMENT: + try: + version = versioning.AsdfVersion(parts[1].decode("ascii")) + except ValueError: + pass + else: + return version + + return default + + +@contextlib.contextmanager +def maybe_close(fd, mode=None, uri=None): + if mode not in [None, "r", "rw"]: + msg = f"Unrecognized asdf mode '{mode}'. Must be either 'r' or 'rw'" + raise ValueError(msg) + + if isinstance(fd, (str, pathlib.Path)): + mode = mode or "r" + generic_file = generic_io.get_file(fd, mode=mode, uri=uri) + try: + yield generic_file + except Exception as e: + generic_file.close() + raise e + elif isinstance(fd, generic_io.GenericFile): + yield fd + else: + if mode is None: + # infer from fd + if isinstance(fd, io.IOBase): + mode = "rw" if fd.writable() else "r" + else: + mode = "r" + yield generic_io.get_file(fd, mode=mode, uri=uri) + + +def read_tree_and_blocks(gf, lazy_load, memmap, validate_checksums): + token = gf.read(4) + tree = None + blocks = BlockManager(uri=gf.uri, lazy_load=lazy_load, memmap=memmap, validate_checksums=validate_checksums) + if token == b"%YAM": + reader = gf.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + initial_content=token, + ) + tree = yamlutil.load_tree(reader) + blocks.read(gf, after_magic=False) + elif token == constants.BLOCK_MAGIC: + blocks.read(gf, after_magic=True) + elif token != b"": + msg = "ASDF file appears to contain garbage after header." + raise OSError(msg) + + return tree, blocks + + +def open_asdf(fd, uri=None, mode=None, lazy_load=True, memmap=False, validate_checksums=False): + with maybe_close(fd, mode, uri) as generic_file: + file_format_version = read_header_line(generic_file) + comments = read_comment_section(generic_file) + tree, blocks = read_tree_and_blocks(generic_file, lazy_load, memmap, validate_checksums) + return file_format_version, comments, tree, blocks diff --git a/asdf/_tests/test_api.py b/asdf/_tests/test_api.py index 6c849db0c..d5ef56701 100644 --- a/asdf/_tests/test_api.py +++ b/asdf/_tests/test_api.py @@ -302,8 +302,6 @@ class FooExtension: config.add_extension(proxy) af = asdf.AsdfFile() - af._fname = "test.asdf" - tree = { "history": { "extensions": [ @@ -316,10 +314,10 @@ class FooExtension: } if warns: - with pytest.warns(AsdfPackageVersionWarning, match=r"File 'test.asdf' was created with"): + with pytest.warns(AsdfPackageVersionWarning, match=r"File was created with"): af._check_extensions(tree) - with pytest.raises(RuntimeError, match=r"^File 'test.asdf' was created with"): + with pytest.raises(RuntimeError, match=r"^File was created with"): af._check_extensions(tree, strict=True) else: @@ -351,8 +349,6 @@ class FooExtension: config.add_resource_mapping(mapping) af = asdf.AsdfFile() - af._fname = "test.asdf" - tree = { "history": { "extensions": [ @@ -366,10 +362,10 @@ class FooExtension: } if warns: - with pytest.warns(AsdfPackageVersionWarning, match=r"File 'test.asdf' was created with"): + with pytest.warns(AsdfPackageVersionWarning, match=r"File was created with"): af._check_extensions(tree) - with pytest.raises(RuntimeError, match=r"^File 'test.asdf' was created with"): + with pytest.raises(RuntimeError, match=r"^File was created with"): af._check_extensions(tree, strict=True) else: diff --git a/changes/1955.bugfix.rst b/changes/1955.bugfix.rst new file mode 100644 index 000000000..c28e87ebc --- /dev/null +++ b/changes/1955.bugfix.rst @@ -0,0 +1 @@ +Fix pytest-asdf example testing to validate example without requiring round tripping. diff --git a/pytest_asdf/plugin.py b/pytest_asdf/plugin.py index d1f5b5dee..c9f47c127 100644 --- a/pytest_asdf/plugin.py +++ b/pytest_asdf/plugin.py @@ -1,9 +1,7 @@ -import io import os import pathlib from dataclasses import dataclass -import numpy as np import pytest import yaml @@ -199,43 +197,14 @@ def from_parent( return result def runtest(self): - from asdf import AsdfFile, _block, generic_io - from asdf.testing import helpers - - # Make sure that the examples in the schema files (and thus the - # ASDF standard document) are valid. - buff = helpers.yaml_to_asdf("example: " + self.example.example.strip(), version=self.example.version) - - ff = AsdfFile( - uri=pathlib.Path(self.filename).expanduser().absolute().as_uri(), - ignore_unrecognized_tag=self.ignore_unrecognized_tag, - ) - - # Fake an external file - ff2 = AsdfFile({"data": np.empty((1024 * 1024 * 8), dtype=np.uint8)}) - - ff._external_asdf_by_uri[ - (pathlib.Path(self.filename).expanduser().absolute().parent / "external.asdf").as_uri() - ] = ff2 - - wb = _block.writer.WriteBlock(np.zeros(1024 * 1024 * 8, dtype=np.uint8)) - with generic_io.get_file(buff, mode="rw") as f: - f.seek(0, 2) - _block.writer.write_blocks(f, [wb, wb], streamed_block=wb) - f.seek(0) - - try: - ff._open_impl(ff, buff, mode="rw") - except Exception: - print(f"Example: {self.example.description}\n From file: {self.filename}") - raise - - # Just test we can write it out. A roundtrip test - # wouldn't always yield the correct result, so those have - # to be covered by "real" unit tests. - if b"external.asdf" not in buff.getvalue(): - buff = io.BytesIO() - ff.write_to(buff) + import asdf + from asdf.testing.helpers import yaml_to_asdf + + # check the example is valid + buff = yaml_to_asdf("example: " + self.example.example.strip(), version=self.example.version) + tagged_tree = asdf.util.load_yaml(buff, tagged=True) + instance = asdf.AsdfFile(version=self.example.version) + asdf.schema.validate(tagged_tree, instance, reading=True) def reportinfo(self): return self.fspath, 0, ""