Skip to content

Commit c5bf1e8

Browse files
committed
Per-accessor identity
1 parent 146f750 commit c5bf1e8

5 files changed

Lines changed: 181 additions & 7 deletions

File tree

README.rst

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,37 @@ Usage
8989
... print(contents)
9090
{'type': 'string', 'default': '1.0'}
9191
92+
Identity and equality
93+
#####################
94+
95+
Two ``SchemaPath`` instances are equal if they have the same ``parts``
96+
*and* point to the same ``SchemaAccessor``. ``SchemaAccessor`` identity
97+
is per-resource-handle: same wrapped dict (by reference), same
98+
``base_uri``, and same internal resolver instance. In practice:
99+
100+
* Paths derived from the *same* accessor compare equal as expected:
101+
102+
.. code-block:: python
103+
104+
>>> accessor = SchemaAccessor.from_schema(d)
105+
>>> SchemaPath(accessor) / "properties" == SchemaPath(accessor) / "properties"
106+
True
107+
108+
* Paths from *separate* ``from_dict`` or ``from_schema`` calls do **not**
109+
compare equal even with identical arguments, because each call builds
110+
its own accessor:
111+
112+
.. code-block:: python
113+
114+
>>> SchemaPath.from_dict(d) == SchemaPath.from_dict(d)
115+
False
116+
117+
* ``SchemaAccessor`` is hashable, so accessors and paths can be used as
118+
set members and dict keys.
119+
120+
This is also why the "build one accessor, reuse it" pattern below
121+
matters: it is both a caching optimisation and the contract you need
122+
for path equality to behave the way you expect.
92123

93124
Resolved cache
94125
##############
@@ -117,8 +148,7 @@ it.
117148

118149
.. code-block:: python
119150
120-
>>> from jsonschema_path import SchemaPath
121-
>>> from jsonschema_path.accessors import SchemaAccessor
151+
>>> from jsonschema_path import SchemaAccessor, SchemaPath
122152
123153
>>> # Construct the accessor once, with caching enabled.
124154
>>> accessor = SchemaAccessor.from_schema(d, resolved_cache_maxsize=128)

jsonschema_path/accessors.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,47 @@ def __init__(
4646
maxsize=resolved_cache_maxsize
4747
)
4848

49+
def __eq__(self, other: object) -> Any:
50+
if not isinstance(other, SchemaAccessor):
51+
return NotImplemented
52+
# SchemaAccessor identity is the resource handle itself, not a
53+
# value tuple. Two SchemaAccessors are equal only when they
54+
# wrap the same dict (by reference), share the same base_uri,
55+
# and share the same `_path_resolver` instance. The
56+
# `_path_resolver` encodes the specification, handlers, and
57+
# registry configuration: it is constructed once in `__init__`
58+
# and never reassigned (only its inner `resolver` field is
59+
# swapped when the registry evolves), so comparing it by `is`
60+
# gives a stable identity token without depending on the
61+
# mutating registry.
62+
#
63+
# Consequence: `SchemaAccessor.from_schema(doc, ...)` called
64+
# twice produces non-equal accessors even with identical
65+
# arguments, because each call builds its own `_path_resolver`.
66+
# Build one accessor per schema and reuse it across all
67+
# derived `SchemaPath`s — see "Recommended usage" in the
68+
# README.
69+
return (
70+
type(self) is type(other)
71+
and self._node is other._node
72+
and self.base_uri == other.base_uri
73+
and self._path_resolver is other._path_resolver
74+
)
75+
76+
def __hash__(self) -> int:
77+
# Stable for the accessor's lifetime: node identity, base_uri
78+
# (set once at construction), and `_path_resolver` identity.
79+
# Does not depend on the schema dict being hashable or on the
80+
# mutating registry.
81+
return hash(
82+
(
83+
type(self),
84+
id(self._node),
85+
self.base_uri,
86+
id(self._path_resolver),
87+
)
88+
)
89+
4990
@classmethod
5091
def from_schema(
5192
cls,

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ classifiers = [
4242

4343
[tool.poetry.dependencies]
4444
attrs = ">=22.2.0"
45-
pathable = "^0.5.0"
45+
pathable = "^0.6.0"
4646
python = ">=3.10,<4.0.0"
4747
PyYAML = ">=5.1"
4848
requests = {version = "^2.31.0", optional = true}

tests/unit/test_accessors.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,106 @@ def test_prefix_cache_rebound_avoids_redundant_retrieval(self):
387387

388388
calls = sorted(c.args[0] for c in retrieve.call_args_list)
389389
assert calls == ["x://later", "x://one", "x://primer"]
390+
391+
392+
class TestSchemaAccessorIdentity:
393+
"""Locks in the per-resource-handle identity model.
394+
395+
SchemaAccessor identity is the accessor instance itself (with
396+
discrimination on node, base_uri, and `_path_resolver` instance),
397+
not a value tuple of its inputs. This forces the recommended
398+
lifecycle: construct one SchemaAccessor per schema document and
399+
reuse it across all derived SchemaPaths.
400+
"""
401+
402+
def test_same_instance_compares_equal_and_hashes_equal(self):
403+
accessor = SchemaAccessor.from_schema({"a": 1})
404+
405+
assert accessor == accessor
406+
assert hash(accessor) == hash(accessor)
407+
408+
def test_accessor_is_hashable(self):
409+
accessor = SchemaAccessor.from_schema({"a": 1})
410+
411+
# Would raise TypeError before this PR (defining __eq__
412+
# without __hash__ silently makes instances unhashable).
413+
assert hash(accessor) == hash(accessor)
414+
{accessor} # constructable as a set element
415+
416+
def test_distinct_from_schema_calls_not_equal(self):
417+
# Each from_schema() call builds its own _path_resolver, so
418+
# the resulting accessors are distinct resource handles even
419+
# with identical arguments. This is the "reuse the accessor"
420+
# assertion: callers must hold onto the accessor instance,
421+
# not reconstruct it on demand.
422+
doc = {"a": 1}
423+
424+
acc1 = SchemaAccessor.from_schema(doc)
425+
acc2 = SchemaAccessor.from_schema(doc)
426+
427+
assert acc1 != acc2
428+
# Hashes are allowed to collide but are very unlikely to here.
429+
430+
def test_distinct_dicts_not_equal(self):
431+
# Inherited from LookupAccessor: value-equal but distinct dicts
432+
# are distinct resources. Included for clarity.
433+
acc1 = SchemaAccessor.from_schema({"a": 1})
434+
acc2 = SchemaAccessor.from_schema({"a": 1})
435+
436+
assert acc1 != acc2
437+
438+
def test_different_base_uri_not_equal(self):
439+
# Same schema dict by reference, different base_uri → different
440+
# resources, because $ref resolution differs.
441+
doc = {"a": 1}
442+
443+
acc1 = SchemaAccessor.from_schema(doc, base_uri="https://a/")
444+
acc2 = SchemaAccessor.from_schema(doc, base_uri="https://b/")
445+
446+
assert acc1 != acc2
447+
448+
def test_path_equality_follows_accessor_equality(self):
449+
from jsonschema_path import SchemaPath
450+
451+
accessor = SchemaAccessor.from_schema({"a": {"b": 1}})
452+
453+
p1 = SchemaPath(accessor) / "a"
454+
p2 = SchemaPath(accessor) / "a"
455+
456+
# Same accessor instance + same parts → equal paths and
457+
# equal hashes (delegated to pathable's AccessorPath identity).
458+
assert p1 == p2
459+
assert hash(p1) == hash(p2)
460+
461+
def test_path_inequality_across_distinct_accessors(self):
462+
from jsonschema_path import SchemaPath
463+
464+
doc = {"a": {"b": 1}}
465+
acc1 = SchemaAccessor.from_schema(doc)
466+
acc2 = SchemaAccessor.from_schema(doc)
467+
468+
p1 = SchemaPath(acc1) / "a"
469+
p2 = SchemaPath(acc2) / "a"
470+
471+
# Distinct accessor instances → distinct resources → unequal
472+
# paths even though parts and underlying dict reference match.
473+
assert p1 != p2
474+
475+
def test_resolved_cache_shared_when_accessor_reused(self):
476+
# Two paths over the same accessor hit the same resolved cache.
477+
# If a future refactor reintroduces per-path caching, this test
478+
# fails because the second .get_resolved would return a fresh
479+
# object instead of the cached one.
480+
from jsonschema_path import SchemaPath
481+
482+
accessor = SchemaAccessor.from_schema(
483+
{"a": {"b": 1}},
484+
resolved_cache_maxsize=8,
485+
)
486+
487+
p1 = SchemaPath(accessor) / "a" / "b"
488+
p2 = SchemaPath(accessor) / "a" / "b"
489+
490+
with p1.resolve() as r1:
491+
with p2.resolve() as r2:
492+
assert r1 is r2

0 commit comments

Comments
 (0)