diff --git a/README.rst b/README.rst index 8f15323..855874a 100644 --- a/README.rst +++ b/README.rst @@ -89,6 +89,37 @@ Usage ... print(contents) {'type': 'string', 'default': '1.0'} +Identity and equality +##################### + +Two ``SchemaPath`` instances are equal if they have the same ``parts`` +*and* point to the same ``SchemaAccessor``. ``SchemaAccessor`` identity +is per-resource-handle: same wrapped dict (by reference), same +``base_uri``, and same internal resolver instance. In practice: + +* Paths derived from the *same* accessor compare equal as expected: + + .. code-block:: python + + >>> accessor = SchemaAccessor.from_schema(d) + >>> SchemaPath(accessor) / "properties" == SchemaPath(accessor) / "properties" + True + +* Paths from *separate* ``from_dict`` or ``from_schema`` calls do **not** + compare equal even with identical arguments, because each call builds + its own accessor: + + .. code-block:: python + + >>> SchemaPath.from_dict(d) == SchemaPath.from_dict(d) + False + +* ``SchemaAccessor`` is hashable, so accessors and paths can be used as + set members and dict keys. + +This is also why the "build one accessor, reuse it" pattern below +matters: it is both a caching optimisation and the contract you need +for path equality to behave the way you expect. Resolved cache ############## @@ -117,8 +148,7 @@ it. .. code-block:: python - >>> from jsonschema_path import SchemaPath - >>> from jsonschema_path.accessors import SchemaAccessor + >>> from jsonschema_path import SchemaAccessor, SchemaPath >>> # Construct the accessor once, with caching enabled. >>> accessor = SchemaAccessor.from_schema(d, resolved_cache_maxsize=128) diff --git a/jsonschema_path/accessors.py b/jsonschema_path/accessors.py index 78c3d54..19567a0 100644 --- a/jsonschema_path/accessors.py +++ b/jsonschema_path/accessors.py @@ -28,6 +28,22 @@ class SchemaAccessor(LookupAccessor): + """Resource handle binding a schema document to its resolver. + + Identity contract: a `SchemaAccessor` is its own identity token, + discriminated by the wrapped node (by reference) and the + `_path_resolver` instance (by reference). Both are set in + `__init__` and never reassigned, so equality and hash are stable + for the accessor's lifetime even though the inner registry + evolves as `$ref`s are resolved. + + Consequence: two `from_schema(doc, ...)` calls produce non-equal + accessors even with identical arguments, because each call builds + its own `_path_resolver`. Build one accessor per schema document + and reuse it across all derived `SchemaPath`s — see "Identity and + equality" and "Recommended usage" in the README. + """ + def __init__( self, schema: Schema, @@ -46,6 +62,31 @@ def __init__( maxsize=resolved_cache_maxsize ) + def __eq__(self, other: object) -> Any: + if not isinstance(other, SchemaAccessor): + return NotImplemented + # See the class docstring for the identity contract. Both + # discriminators are reference-stable: `_node` is the + # constructor argument and `_path_resolver` is constructed + # once in `__init__` and never reassigned (only its inner + # `resolver` field is swapped when the registry evolves). + return ( + type(self) is type(other) + and self._node is other._node + and self._path_resolver is other._path_resolver + ) + + def __hash__(self) -> int: + # Reference-stable inputs only — does not depend on the schema + # dict being hashable or on the mutating registry. + return hash( + ( + type(self), + id(self._node), + id(self._path_resolver), + ) + ) + @classmethod def from_schema( cls, diff --git a/poetry.lock b/poetry.lock index 3670741..f068b9a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -763,14 +763,14 @@ testing = ["docopt", "pytest"] [[package]] name = "pathable" -version = "0.5.0" +version = "0.6.0" description = "Object-oriented paths" optional = false python-versions = "<4.0,>=3.10" groups = ["main"] files = [ - {file = "pathable-0.5.0-py3-none-any.whl", hash = "sha256:646e3d09491a6351a0c82632a09c02cdf70a252e73196b36d8a15ba0a114f0a6"}, - {file = "pathable-0.5.0.tar.gz", hash = "sha256:d81938348a1cacb525e7c75166270644782c0fb9c8cecc16be033e71427e0ef1"}, + {file = "pathable-0.6.0-py3-none-any.whl", hash = "sha256:82c4ca6c98c502ad12e0d4e9779b6210afee93c38990988c8c5d1b49bdcdf566"}, + {file = "pathable-0.6.0.tar.gz", hash = "sha256:6404b8b82aef5ff0fd478934137128b99b12212ba35afdde5525ca4f8388ea58"}, ] [[package]] @@ -1596,4 +1596,4 @@ requests = ["requests"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<4.0.0" -content-hash = "362139b13eba376dc9852d5f21f7ffeece9e918c2773cbe04feb78780dcdf1d4" +content-hash = "c5d97114b2c1e7bbf99c37320647a097c7de60b803a48d45bc9fb180522a871a" diff --git a/pyproject.toml b/pyproject.toml index bc33e13..5154571 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ classifiers = [ [tool.poetry.dependencies] attrs = ">=22.2.0" -pathable = "^0.5.0" +pathable = "^0.6.0" python = ">=3.10,<4.0.0" PyYAML = ">=5.1" requests = {version = "^2.31.0", optional = true} diff --git a/tests/unit/test_accessors.py b/tests/unit/test_accessors.py index e714264..995e95c 100644 --- a/tests/unit/test_accessors.py +++ b/tests/unit/test_accessors.py @@ -2,9 +2,14 @@ from unittest.mock import patch import pytest +from referencing import Registry +from referencing.jsonschema import DRAFT202012 +from jsonschema_path import SchemaPath from jsonschema_path.accessors import SchemaAccessor +from jsonschema_path.handlers import default_handlers from jsonschema_path.nodes import SchemaNode +from jsonschema_path.retrievers import SchemaRetriever class TestSchemaAccessorOpen: @@ -387,3 +392,131 @@ def test_prefix_cache_rebound_avoids_redundant_retrieval(self): calls = sorted(c.args[0] for c in retrieve.call_args_list) assert calls == ["x://later", "x://one", "x://primer"] + + +class TestSchemaAccessorIdentity: + """Locks in the per-resource-handle identity model. + + SchemaAccessor identity is the accessor instance itself, with + discrimination on `_node` (by reference) and `_path_resolver` + (by reference) — not a value tuple of its inputs. This forces the + recommended lifecycle: construct one SchemaAccessor per schema + document and reuse it across all derived SchemaPaths. + """ + + def test_same_instance_compares_equal_and_hashes_equal(self): + accessor = SchemaAccessor.from_schema({"a": 1}) + + assert accessor == accessor + assert hash(accessor) == hash(accessor) + + def test_accessor_is_hashable(self): + accessor = SchemaAccessor.from_schema({"a": 1}) + + # Would raise TypeError before this PR (defining __eq__ + # without __hash__ silently makes instances unhashable). + assert hash(accessor) == hash(accessor) + {accessor} # constructable as a set element + + def test_distinct_from_schema_calls_not_equal(self): + # Each from_schema() call builds its own _path_resolver, so + # the resulting accessors are distinct resource handles even + # with identical arguments. This is the "reuse the accessor" + # assertion: callers must hold onto the accessor instance, + # not reconstruct it on demand. + doc = {"a": 1} + + acc1 = SchemaAccessor.from_schema(doc) + acc2 = SchemaAccessor.from_schema(doc) + + assert acc1 != acc2 + # Hashes are allowed to collide but are very unlikely to here. + + def test_distinct_dicts_not_equal(self): + # Value-equal but distinct dict objects are distinct resources: + # `_node is other._node` is false. Included for clarity even + # though the `_path_resolver` check would also catch it. + acc1 = SchemaAccessor.from_schema({"a": 1}) + acc2 = SchemaAccessor.from_schema({"a": 1}) + + assert acc1 != acc2 + + def test_different_base_uri_not_equal(self): + # Same schema dict by reference, different base_uri → different + # resources, because $ref resolution differs. + doc = {"a": 1} + + acc1 = SchemaAccessor.from_schema(doc, base_uri="https://a/") + acc2 = SchemaAccessor.from_schema(doc, base_uri="https://b/") + + assert acc1 != acc2 + + def test_path_equality_follows_accessor_equality(self): + accessor = SchemaAccessor.from_schema({"a": {"b": 1}}) + + p1 = SchemaPath(accessor) / "a" + p2 = SchemaPath(accessor) / "a" + + # Same accessor instance + same parts → equal paths and + # equal hashes (delegated to pathable's AccessorPath identity). + assert p1 == p2 + assert hash(p1) == hash(p2) + + def test_path_inequality_across_distinct_accessors(self): + doc = {"a": {"b": 1}} + acc1 = SchemaAccessor.from_schema(doc) + acc2 = SchemaAccessor.from_schema(doc) + + p1 = SchemaPath(acc1) / "a" + p2 = SchemaPath(acc2) / "a" + + # Distinct accessor instances → distinct resources → unequal + # paths even though parts and underlying dict reference match. + assert p1 != p2 + + def test_resolved_cache_shared_when_accessor_reused(self): + # Two paths over the same accessor hit the same resolved cache. + # If a future refactor reintroduces per-path caching, this test + # fails because the second .get_resolved would return a fresh + # object instead of the cached one. + accessor = SchemaAccessor.from_schema( + {"a": {"b": 1}}, + resolved_cache_maxsize=8, + ) + + p1 = SchemaPath(accessor) / "a" / "b" + p2 = SchemaPath(accessor) / "a" / "b" + + with p1.resolve() as r1: + with p2.resolve() as r2: + assert r1 is r2 + + def test_hash_stable_across_registry_evolution(self): + # The inner `_path_resolver.resolver` is reassigned when the + # registry evolves (see CachedPathResolver._sync_registry). + # The accessor's hash must NOT read through that swappable + # field; otherwise set/dict membership corrupts mid-life. + # This is a tripwire: if a future refactor reintroduces a + # derived field (e.g. base_uri via the resolver) into + # __hash__, this test fails. + + accessor = SchemaAccessor.from_schema({"a": 1}) + + h_before = hash(accessor) + bucket = {accessor} + + # Force a registry swap by handing the path resolver a fresh + # registry instance. `_sync_registry` will rebind + # `_path_resolver.resolver`, but `_path_resolver` itself stays + # the same instance. + path_resolver = accessor._path_resolver + retriever = SchemaRetriever(default_handlers, DRAFT202012) + new_registry: Registry = Registry(retrieve=retriever) # type: ignore + new_registry = new_registry.with_resource( + "", DRAFT202012.create_resource({"a": 1}) + ) + changed = path_resolver._sync_registry(new_registry) + + assert changed, "registry swap precondition not met" + assert hash(accessor) == h_before + assert accessor in bucket