Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,37 @@ Usage
... print(contents)
{'type': 'string', 'default': '1.0'}

Identity and equality
#####################

Two ``SchemaPath`` instances are equal if they have the same ``parts``
*and* point to the same ``SchemaAccessor``. ``SchemaAccessor`` identity
is per-resource-handle: same wrapped dict (by reference), same
``base_uri``, and same internal resolver instance. In practice:

* Paths derived from the *same* accessor compare equal as expected:

.. code-block:: python

>>> accessor = SchemaAccessor.from_schema(d)
>>> SchemaPath(accessor) / "properties" == SchemaPath(accessor) / "properties"
True

* Paths from *separate* ``from_dict`` or ``from_schema`` calls do **not**
compare equal even with identical arguments, because each call builds
its own accessor:

.. code-block:: python

>>> SchemaPath.from_dict(d) == SchemaPath.from_dict(d)
False

* ``SchemaAccessor`` is hashable, so accessors and paths can be used as
set members and dict keys.

This is also why the "build one accessor, reuse it" pattern below
matters: it is both a caching optimisation and the contract you need
for path equality to behave the way you expect.

Resolved cache
##############
Expand Down Expand Up @@ -117,8 +148,7 @@ it.

.. code-block:: python

>>> from jsonschema_path import SchemaPath
>>> from jsonschema_path.accessors import SchemaAccessor
>>> from jsonschema_path import SchemaAccessor, SchemaPath

>>> # Construct the accessor once, with caching enabled.
>>> accessor = SchemaAccessor.from_schema(d, resolved_cache_maxsize=128)
Expand Down
41 changes: 41 additions & 0 deletions jsonschema_path/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@


class SchemaAccessor(LookupAccessor):
"""Resource handle binding a schema document to its resolver.

Identity contract: a `SchemaAccessor` is its own identity token,
discriminated by the wrapped node (by reference) and the
`_path_resolver` instance (by reference). Both are set in
`__init__` and never reassigned, so equality and hash are stable
for the accessor's lifetime even though the inner registry
evolves as `$ref`s are resolved.

Consequence: two `from_schema(doc, ...)` calls produce non-equal
accessors even with identical arguments, because each call builds
its own `_path_resolver`. Build one accessor per schema document
and reuse it across all derived `SchemaPath`s — see "Identity and
equality" and "Recommended usage" in the README.
"""

def __init__(
self,
schema: Schema,
Expand All @@ -46,6 +62,31 @@ def __init__(
maxsize=resolved_cache_maxsize
)

def __eq__(self, other: object) -> Any:
if not isinstance(other, SchemaAccessor):
return NotImplemented
# See the class docstring for the identity contract. Both
# discriminators are reference-stable: `_node` is the
# constructor argument and `_path_resolver` is constructed
# once in `__init__` and never reassigned (only its inner
# `resolver` field is swapped when the registry evolves).
return (
type(self) is type(other)
and self._node is other._node
and self._path_resolver is other._path_resolver
)

def __hash__(self) -> int:
# Reference-stable inputs only — does not depend on the schema
# dict being hashable or on the mutating registry.
return hash(
(
type(self),
id(self._node),
id(self._path_resolver),
)
)

@classmethod
def from_schema(
cls,
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ classifiers = [

[tool.poetry.dependencies]
attrs = ">=22.2.0"
pathable = "^0.5.0"
pathable = "^0.6.0"
python = ">=3.10,<4.0.0"
PyYAML = ">=5.1"
requests = {version = "^2.31.0", optional = true}
Expand Down
133 changes: 133 additions & 0 deletions tests/unit/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
from unittest.mock import patch

import pytest
from referencing import Registry
from referencing.jsonschema import DRAFT202012

from jsonschema_path import SchemaPath
from jsonschema_path.accessors import SchemaAccessor
from jsonschema_path.handlers import default_handlers
from jsonschema_path.nodes import SchemaNode
from jsonschema_path.retrievers import SchemaRetriever


class TestSchemaAccessorOpen:
Expand Down Expand Up @@ -387,3 +392,131 @@ def test_prefix_cache_rebound_avoids_redundant_retrieval(self):

calls = sorted(c.args[0] for c in retrieve.call_args_list)
assert calls == ["x://later", "x://one", "x://primer"]


class TestSchemaAccessorIdentity:
"""Locks in the per-resource-handle identity model.

SchemaAccessor identity is the accessor instance itself, with
discrimination on `_node` (by reference) and `_path_resolver`
(by reference) — not a value tuple of its inputs. This forces the
recommended lifecycle: construct one SchemaAccessor per schema
document and reuse it across all derived SchemaPaths.
"""

def test_same_instance_compares_equal_and_hashes_equal(self):
accessor = SchemaAccessor.from_schema({"a": 1})

assert accessor == accessor
assert hash(accessor) == hash(accessor)

def test_accessor_is_hashable(self):
accessor = SchemaAccessor.from_schema({"a": 1})

# Would raise TypeError before this PR (defining __eq__
# without __hash__ silently makes instances unhashable).
assert hash(accessor) == hash(accessor)
{accessor} # constructable as a set element

def test_distinct_from_schema_calls_not_equal(self):
# Each from_schema() call builds its own _path_resolver, so
# the resulting accessors are distinct resource handles even
# with identical arguments. This is the "reuse the accessor"
# assertion: callers must hold onto the accessor instance,
# not reconstruct it on demand.
doc = {"a": 1}

acc1 = SchemaAccessor.from_schema(doc)
acc2 = SchemaAccessor.from_schema(doc)

assert acc1 != acc2
# Hashes are allowed to collide but are very unlikely to here.

def test_distinct_dicts_not_equal(self):
# Value-equal but distinct dict objects are distinct resources:
# `_node is other._node` is false. Included for clarity even
# though the `_path_resolver` check would also catch it.
acc1 = SchemaAccessor.from_schema({"a": 1})
acc2 = SchemaAccessor.from_schema({"a": 1})

assert acc1 != acc2

def test_different_base_uri_not_equal(self):
# Same schema dict by reference, different base_uri → different
# resources, because $ref resolution differs.
doc = {"a": 1}

acc1 = SchemaAccessor.from_schema(doc, base_uri="https://a/")
acc2 = SchemaAccessor.from_schema(doc, base_uri="https://b/")

assert acc1 != acc2

def test_path_equality_follows_accessor_equality(self):
accessor = SchemaAccessor.from_schema({"a": {"b": 1}})

p1 = SchemaPath(accessor) / "a"
p2 = SchemaPath(accessor) / "a"

# Same accessor instance + same parts → equal paths and
# equal hashes (delegated to pathable's AccessorPath identity).
assert p1 == p2
assert hash(p1) == hash(p2)

def test_path_inequality_across_distinct_accessors(self):
doc = {"a": {"b": 1}}
acc1 = SchemaAccessor.from_schema(doc)
acc2 = SchemaAccessor.from_schema(doc)

p1 = SchemaPath(acc1) / "a"
p2 = SchemaPath(acc2) / "a"

# Distinct accessor instances → distinct resources → unequal
# paths even though parts and underlying dict reference match.
assert p1 != p2

def test_resolved_cache_shared_when_accessor_reused(self):
# Two paths over the same accessor hit the same resolved cache.
# If a future refactor reintroduces per-path caching, this test
# fails because the second .get_resolved would return a fresh
# object instead of the cached one.
accessor = SchemaAccessor.from_schema(
{"a": {"b": 1}},
resolved_cache_maxsize=8,
)

p1 = SchemaPath(accessor) / "a" / "b"
p2 = SchemaPath(accessor) / "a" / "b"

with p1.resolve() as r1:
with p2.resolve() as r2:
assert r1 is r2

def test_hash_stable_across_registry_evolution(self):
# The inner `_path_resolver.resolver` is reassigned when the
# registry evolves (see CachedPathResolver._sync_registry).
# The accessor's hash must NOT read through that swappable
# field; otherwise set/dict membership corrupts mid-life.
# This is a tripwire: if a future refactor reintroduces a
# derived field (e.g. base_uri via the resolver) into
# __hash__, this test fails.

accessor = SchemaAccessor.from_schema({"a": 1})

h_before = hash(accessor)
bucket = {accessor}

# Force a registry swap by handing the path resolver a fresh
# registry instance. `_sync_registry` will rebind
# `_path_resolver.resolver`, but `_path_resolver` itself stays
# the same instance.
path_resolver = accessor._path_resolver
retriever = SchemaRetriever(default_handlers, DRAFT202012)
new_registry: Registry = Registry(retrieve=retriever) # type: ignore
new_registry = new_registry.with_resource(
"", DRAFT202012.create_resource({"a": 1})
)
changed = path_resolver._sync_registry(new_registry)

assert changed, "registry swap precondition not met"
assert hash(accessor) == h_before
assert accessor in bucket
Loading