Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
# Changelog

## 1.0.0b63

### Fixed

- ``_process_index`` now falls back to the correct handler for built-in
Plone indexes when they are missing from the ``IndexRegistry`` —
previously the miss fell through to ``_handle_field`` which emits
``idx->>'name'``, bypassing the dedicated typed columns and their
indexes. On aaf-6 prod this produced 4-9 second seq-scans over a
450k-row ``object_state`` table for every folder-listing query,
saturating the worker pool and causing intermittent Varnish
backend-fetch errors. Resolution happens via a new
``_builtin_index_type(name)`` helper that combines two sources: the
three Plone-native specials (``path``, ``effectiveRange``,
``SearchableText``) hardcoded because their SQL lives inside the
handler, and every ``TEXT[]``-typed ``ExtraIdxColumn`` (derived at
dispatch time — currently ``allowedRolesAndUsers`` and
``object_provides``, extensible via
``register_extra_idx_column``). The correct handler then uses
``path`` / ``allowed_roles`` / ``object_provides`` /
``searchable_text`` columns and the DateRangeIndex composite
clause. Explicit registry entries still win so addons can override
behavior. Closes #154.

## 1.0.0b62

### Fixed
Expand Down
73 changes: 65 additions & 8 deletions src/plone/pgcatalog/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,50 @@ def _bool_to_lower_str(value):
return str(value)


#: Plone-native indexes with dedicated pgcatalog handling that isn't
#: expressible via ``ExtraIdxColumn`` — their SQL lives inside the
#: handler. Mapped to ``(IndexType, idx_key)``. ``TEXT[]``-typed
#: ExtraIdxColumn keyword indexes (``allowedRolesAndUsers``,
#: ``object_provides``, …) are added automatically by
#: ``_builtin_index_type`` — no need to list them here. See #154.
_SPECIAL_BUILTIN_INDEX_TYPES: dict[str, tuple[IndexType, str | None]] = {
"path": (IndexType.PATH, None),
"effectiveRange": (IndexType.DATE_RANGE, None),
"SearchableText": (IndexType.TEXT, None),
}


def _builtin_index_type(name):
"""Return ``(IndexType, idx_key)`` for a built-in Plone index, or None.

Resolves in two stages:

1. Plone-native specials hardcoded in ``_SPECIAL_BUILTIN_INDEX_TYPES``
(``path``, ``effectiveRange``, ``SearchableText``) — dedicated
typed columns with handler-specific SQL.
2. ``TEXT[]``-typed ``ExtraIdxColumn`` entries — every such column
represents a KEYWORD-shaped index backed by a dedicated GIN
column (``allowedRolesAndUsers`` → ``allowed_roles``,
``object_provides`` → ``object_provides``, …). Derived from
the extra-columns registry so additional entries get the
fallback treatment automatically.

Used by ``_QueryBuilder._process_index`` when the main
``IndexRegistry`` doesn't know the name — falling through to
``_handle_field`` in that case would bypass the dedicated column
index and trigger seq-scans.
"""
special = _SPECIAL_BUILTIN_INDEX_TYPES.get(name)
if special is not None:
return special
from plone.pgcatalog.columns import get_extra_idx_columns

for col in get_extra_idx_columns():
if col.column_type == "TEXT[]" and col.idx_key == name:
return (IndexType.KEYWORD, name)
return None


def _is_numeric_range(values):
"""Return True iff every value is numeric (``int`` or ``float``).

Expand Down Expand Up @@ -274,18 +318,31 @@ def _process_index(self, name, raw):

registry = get_registry()
entry = registry.get(name)
if entry is None:
# Fall back to simple JSONB field query for unregistered indexes
# (e.g. Language, TranslationGroup from plone.app.multilingual).
validate_identifier(name)
if entry is not None:
idx_type, idx_key, _source_attrs = entry
spec = _normalize_query(raw)
handler = getattr(self, self._HANDLERS[idx_type])
handler(name, idx_key, spec)
return

# Registry miss. For built-in indexes with dedicated typed
# columns / SQL, route to the correct handler anyway — the
# generic ``_handle_field`` fallback would emit
# ``idx->>'name'`` and trigger seq-scans (#154).
builtin = _builtin_index_type(name)
if builtin is not None:
idx_type, idx_key = builtin
spec = _normalize_query(raw)
self._handle_field(name, name, spec)
handler = getattr(self, self._HANDLERS[idx_type])
handler(name, idx_key, spec)
return

idx_type, idx_key, _source_attrs = entry
# Truly custom / unknown index — fall back to simple JSONB
# field query (e.g. Language, TranslationGroup from
# plone.app.multilingual).
validate_identifier(name)
spec = _normalize_query(raw)
handler = getattr(self, self._HANDLERS[idx_type])
handler(name, idx_key, spec)
self._handle_field(name, name, spec)

# -- FieldIndex / GopipIndex --------------------------------------------

Expand Down
155 changes: 155 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,3 +1163,158 @@ def test_number_conversion(self):
def test_none_conversion(self):
"""Test None converts to 'None'."""
assert _bool_to_lower_str(None) == "None"


# ---------------------------------------------------------------------------
# Built-in index fallback dispatch (#154)
# ---------------------------------------------------------------------------


class TestBuiltinIndexFallbackDispatch:
"""Regression for #154: `path`, `allowedRolesAndUsers`, `effectiveRange`,
and `SearchableText` are core Plone indexes backed by dedicated typed
columns in the pgcatalog schema. When they are missing from the
IndexRegistry (fresh install, sync-from-catalog gap, broken ZCatalog
state), `_process_index` used to fall through to `_handle_field` which
emits `idx->>'name'` — bypassing the dedicated column and its index.

On aaf-6 prod that produced ~9-second seq-scans on 450k-row
object_state tables. Built-in indexes must always route to the
correct handler even without a registry entry.
"""

def _unregister(self, names):
"""Temporarily remove *names* from the registry, returning a
teardown callable that re-registers them.
"""
from plone.pgcatalog.columns import get_registry

registry = get_registry()
saved = {}
for name in names:
entry = registry.get(name)
if entry is not None:
saved[name] = entry
del registry._indexes[name]

def restore():
for name, entry in saved.items():
idx_type, idx_key, source_attrs = entry
registry.register(name, idx_type, idx_key, source_attrs)

return restore

def test_path_unregistered_hits_typed_column(self):
"""`{path: [...]}` must use the dedicated `path` column, not
`idx->>'path'`. The `path` column has an index; `idx->>'path'`
triggers a seq-scan.
"""
restore = self._unregister(["path"])
try:
qr = build_query({"path": {"query": "/plone/folder", "depth": -1}})
assert "idx->>'path'" not in qr["where"]
# _handle_path emits `path = 'x' OR path LIKE 'x/%'` for
# subtree / depth=-1.
assert "path" in qr["where"]
finally:
restore()

def test_allowedRolesAndUsers_unregistered_hits_typed_column(self):
"""`{allowedRolesAndUsers: [...]}` must use `allowed_roles && %s::text[]`
(GIN-indexed column), not `idx->>'allowedRolesAndUsers' = ANY(%s)`.
"""
restore = self._unregister(["allowedRolesAndUsers"])
try:
qr = build_query(
{
"allowedRolesAndUsers": {
"query": ["Anonymous"],
"operator": "or",
}
}
)
assert "allowed_roles" in qr["where"]
assert "&&" in qr["where"]
assert "idx->>'allowedRolesAndUsers'" not in qr["where"]
finally:
restore()

def test_effectiveRange_unregistered_hits_date_range_handler(self):
"""`{effectiveRange: <datetime>}` must use the composite effective
<= now AND (expires >= now OR expires IS NULL) clause, not a
`idx->>'effectiveRange' = <timestamp>` scalar equality.
"""
from datetime import datetime
from datetime import UTC

restore = self._unregister(["effectiveRange"])
try:
now = datetime(2026, 6, 15, tzinfo=UTC)
qr = build_query({"effectiveRange": now})
where = qr["where"]
# Composite shape from _handle_date_range.
assert "idx->>'effective'" in where
assert "idx->>'expires'" in where
assert "IS NULL" in where
assert "idx->>'effectiveRange'" not in where
finally:
restore()

def test_searchable_text_unregistered_hits_text_handler(self):
"""`{SearchableText: "..."}` must use the dedicated `searchable_text`
tsvector column (via `_handle_text`), not `idx->>'SearchableText' = ...`.
"""
restore = self._unregister(["SearchableText"])
try:
qr = build_query({"SearchableText": "plone"})
# _handle_text emits a `@@` tsquery match against
# searchable_text.
assert "searchable_text" in qr["where"]
assert "@@" in qr["where"]
assert "idx->>'SearchableText'" not in qr["where"]
finally:
restore()

def test_explicit_registry_entry_still_wins(self):
"""Regression guard — the built-in fallback kicks in only when
the registry misses. An explicit registration (even a weird
one) takes precedence so addons can override behavior.
"""
qr = build_query({"path": {"query": "/plone/folder", "depth": -1}})
# With registry populated via `populated_registry` fixture,
# path is (PATH, None) → same typed-column path as the
# fallback would produce. The test here asserts the query
# still works correctly on the hot path.
assert "path" in qr["where"]

def test_unknown_non_builtin_still_falls_through_to_field(self):
"""Regression guard — only known built-in names (the three
Plone specials plus any ``TEXT[]``-typed ``ExtraIdxColumn``
idx-keys) get the fallback treatment. Any other unregistered
name keeps the existing ``_handle_field`` behavior for
addon-defined custom indexes (e.g. Language, TranslationGroup).
"""
qr = build_query({"my_addon_field": "some_value"})
assert "idx->>'my_addon_field' =" in qr["where"]

def test_object_provides_unregistered_hits_typed_column(self):
"""``object_provides`` is a ``TEXT[]`` ExtraIdxColumn, so the
built-in fallback picks it up via derivation from
``get_extra_idx_columns()`` — without needing a hardcoded
entry. Proves the derivation works end-to-end.
"""
restore = self._unregister(["object_provides"])
try:
qr = build_query(
{
"object_provides": {
"query": ["some.iface.IMark"],
"operator": "or",
}
}
)
assert "object_provides" in qr["where"]
assert "&&" in qr["where"]
assert "idx->>'object_provides'" not in qr["where"]
finally:
restore()
Loading