Skip to content

Commit 3f9a918

Browse files
authored
Merge pull request #155 from bluedynamics/fix/builtin-index-fallback-dispatch
fix(query): built-in index dispatch fallback — no more idx->> seq-scans (#154)
2 parents 800adfc + 111ae89 commit 3f9a918

3 files changed

Lines changed: 244 additions & 8 deletions

File tree

CHANGES.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
11
# Changelog
22

3+
## 1.0.0b63
4+
5+
### Fixed
6+
7+
- ``_process_index`` now falls back to the correct handler for built-in
8+
Plone indexes when they are missing from the ``IndexRegistry``
9+
previously the miss fell through to ``_handle_field`` which emits
10+
``idx->>'name'``, bypassing the dedicated typed columns and their
11+
indexes. On aaf-6 prod this produced 4-9 second seq-scans over a
12+
450k-row ``object_state`` table for every folder-listing query,
13+
saturating the worker pool and causing intermittent Varnish
14+
backend-fetch errors. Resolution happens via a new
15+
``_builtin_index_type(name)`` helper that combines two sources: the
16+
three Plone-native specials (``path``, ``effectiveRange``,
17+
``SearchableText``) hardcoded because their SQL lives inside the
18+
handler, and every ``TEXT[]``-typed ``ExtraIdxColumn`` (derived at
19+
dispatch time — currently ``allowedRolesAndUsers`` and
20+
``object_provides``, extensible via
21+
``register_extra_idx_column``). The correct handler then uses
22+
``path`` / ``allowed_roles`` / ``object_provides`` /
23+
``searchable_text`` columns and the DateRangeIndex composite
24+
clause. Explicit registry entries still win so addons can override
25+
behavior. Closes #154.
26+
327
## 1.0.0b62
428

529
### Fixed

src/plone/pgcatalog/query.py

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,50 @@ def _bool_to_lower_str(value):
7777
return str(value)
7878

7979

80+
#: Plone-native indexes with dedicated pgcatalog handling that isn't
81+
#: expressible via ``ExtraIdxColumn`` — their SQL lives inside the
82+
#: handler. Mapped to ``(IndexType, idx_key)``. ``TEXT[]``-typed
83+
#: ExtraIdxColumn keyword indexes (``allowedRolesAndUsers``,
84+
#: ``object_provides``, …) are added automatically by
85+
#: ``_builtin_index_type`` — no need to list them here. See #154.
86+
_SPECIAL_BUILTIN_INDEX_TYPES: dict[str, tuple[IndexType, str | None]] = {
87+
"path": (IndexType.PATH, None),
88+
"effectiveRange": (IndexType.DATE_RANGE, None),
89+
"SearchableText": (IndexType.TEXT, None),
90+
}
91+
92+
93+
def _builtin_index_type(name):
94+
"""Return ``(IndexType, idx_key)`` for a built-in Plone index, or None.
95+
96+
Resolves in two stages:
97+
98+
1. Plone-native specials hardcoded in ``_SPECIAL_BUILTIN_INDEX_TYPES``
99+
(``path``, ``effectiveRange``, ``SearchableText``) — dedicated
100+
typed columns with handler-specific SQL.
101+
2. ``TEXT[]``-typed ``ExtraIdxColumn`` entries — every such column
102+
represents a KEYWORD-shaped index backed by a dedicated GIN
103+
column (``allowedRolesAndUsers`` → ``allowed_roles``,
104+
``object_provides`` → ``object_provides``, …). Derived from
105+
the extra-columns registry so additional entries get the
106+
fallback treatment automatically.
107+
108+
Used by ``_QueryBuilder._process_index`` when the main
109+
``IndexRegistry`` doesn't know the name — falling through to
110+
``_handle_field`` in that case would bypass the dedicated column
111+
index and trigger seq-scans.
112+
"""
113+
special = _SPECIAL_BUILTIN_INDEX_TYPES.get(name)
114+
if special is not None:
115+
return special
116+
from plone.pgcatalog.columns import get_extra_idx_columns
117+
118+
for col in get_extra_idx_columns():
119+
if col.column_type == "TEXT[]" and col.idx_key == name:
120+
return (IndexType.KEYWORD, name)
121+
return None
122+
123+
80124
def _is_numeric_range(values):
81125
"""Return True iff every value is numeric (``int`` or ``float``).
82126
@@ -274,18 +318,31 @@ def _process_index(self, name, raw):
274318

275319
registry = get_registry()
276320
entry = registry.get(name)
277-
if entry is None:
278-
# Fall back to simple JSONB field query for unregistered indexes
279-
# (e.g. Language, TranslationGroup from plone.app.multilingual).
280-
validate_identifier(name)
321+
if entry is not None:
322+
idx_type, idx_key, _source_attrs = entry
323+
spec = _normalize_query(raw)
324+
handler = getattr(self, self._HANDLERS[idx_type])
325+
handler(name, idx_key, spec)
326+
return
327+
328+
# Registry miss. For built-in indexes with dedicated typed
329+
# columns / SQL, route to the correct handler anyway — the
330+
# generic ``_handle_field`` fallback would emit
331+
# ``idx->>'name'`` and trigger seq-scans (#154).
332+
builtin = _builtin_index_type(name)
333+
if builtin is not None:
334+
idx_type, idx_key = builtin
281335
spec = _normalize_query(raw)
282-
self._handle_field(name, name, spec)
336+
handler = getattr(self, self._HANDLERS[idx_type])
337+
handler(name, idx_key, spec)
283338
return
284339

285-
idx_type, idx_key, _source_attrs = entry
340+
# Truly custom / unknown index — fall back to simple JSONB
341+
# field query (e.g. Language, TranslationGroup from
342+
# plone.app.multilingual).
343+
validate_identifier(name)
286344
spec = _normalize_query(raw)
287-
handler = getattr(self, self._HANDLERS[idx_type])
288-
handler(name, idx_key, spec)
345+
self._handle_field(name, name, spec)
289346

290347
# -- FieldIndex / GopipIndex --------------------------------------------
291348

tests/test_query.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,3 +1163,158 @@ def test_number_conversion(self):
11631163
def test_none_conversion(self):
11641164
"""Test None converts to 'None'."""
11651165
assert _bool_to_lower_str(None) == "None"
1166+
1167+
1168+
# ---------------------------------------------------------------------------
1169+
# Built-in index fallback dispatch (#154)
1170+
# ---------------------------------------------------------------------------
1171+
1172+
1173+
class TestBuiltinIndexFallbackDispatch:
1174+
"""Regression for #154: `path`, `allowedRolesAndUsers`, `effectiveRange`,
1175+
and `SearchableText` are core Plone indexes backed by dedicated typed
1176+
columns in the pgcatalog schema. When they are missing from the
1177+
IndexRegistry (fresh install, sync-from-catalog gap, broken ZCatalog
1178+
state), `_process_index` used to fall through to `_handle_field` which
1179+
emits `idx->>'name'` — bypassing the dedicated column and its index.
1180+
1181+
On aaf-6 prod that produced ~9-second seq-scans on 450k-row
1182+
object_state tables. Built-in indexes must always route to the
1183+
correct handler even without a registry entry.
1184+
"""
1185+
1186+
def _unregister(self, names):
1187+
"""Temporarily remove *names* from the registry, returning a
1188+
teardown callable that re-registers them.
1189+
"""
1190+
from plone.pgcatalog.columns import get_registry
1191+
1192+
registry = get_registry()
1193+
saved = {}
1194+
for name in names:
1195+
entry = registry.get(name)
1196+
if entry is not None:
1197+
saved[name] = entry
1198+
del registry._indexes[name]
1199+
1200+
def restore():
1201+
for name, entry in saved.items():
1202+
idx_type, idx_key, source_attrs = entry
1203+
registry.register(name, idx_type, idx_key, source_attrs)
1204+
1205+
return restore
1206+
1207+
def test_path_unregistered_hits_typed_column(self):
1208+
"""`{path: [...]}` must use the dedicated `path` column, not
1209+
`idx->>'path'`. The `path` column has an index; `idx->>'path'`
1210+
triggers a seq-scan.
1211+
"""
1212+
restore = self._unregister(["path"])
1213+
try:
1214+
qr = build_query({"path": {"query": "/plone/folder", "depth": -1}})
1215+
assert "idx->>'path'" not in qr["where"]
1216+
# _handle_path emits `path = 'x' OR path LIKE 'x/%'` for
1217+
# subtree / depth=-1.
1218+
assert "path" in qr["where"]
1219+
finally:
1220+
restore()
1221+
1222+
def test_allowedRolesAndUsers_unregistered_hits_typed_column(self):
1223+
"""`{allowedRolesAndUsers: [...]}` must use `allowed_roles && %s::text[]`
1224+
(GIN-indexed column), not `idx->>'allowedRolesAndUsers' = ANY(%s)`.
1225+
"""
1226+
restore = self._unregister(["allowedRolesAndUsers"])
1227+
try:
1228+
qr = build_query(
1229+
{
1230+
"allowedRolesAndUsers": {
1231+
"query": ["Anonymous"],
1232+
"operator": "or",
1233+
}
1234+
}
1235+
)
1236+
assert "allowed_roles" in qr["where"]
1237+
assert "&&" in qr["where"]
1238+
assert "idx->>'allowedRolesAndUsers'" not in qr["where"]
1239+
finally:
1240+
restore()
1241+
1242+
def test_effectiveRange_unregistered_hits_date_range_handler(self):
1243+
"""`{effectiveRange: <datetime>}` must use the composite effective
1244+
<= now AND (expires >= now OR expires IS NULL) clause, not a
1245+
`idx->>'effectiveRange' = <timestamp>` scalar equality.
1246+
"""
1247+
from datetime import datetime
1248+
from datetime import UTC
1249+
1250+
restore = self._unregister(["effectiveRange"])
1251+
try:
1252+
now = datetime(2026, 6, 15, tzinfo=UTC)
1253+
qr = build_query({"effectiveRange": now})
1254+
where = qr["where"]
1255+
# Composite shape from _handle_date_range.
1256+
assert "idx->>'effective'" in where
1257+
assert "idx->>'expires'" in where
1258+
assert "IS NULL" in where
1259+
assert "idx->>'effectiveRange'" not in where
1260+
finally:
1261+
restore()
1262+
1263+
def test_searchable_text_unregistered_hits_text_handler(self):
1264+
"""`{SearchableText: "..."}` must use the dedicated `searchable_text`
1265+
tsvector column (via `_handle_text`), not `idx->>'SearchableText' = ...`.
1266+
"""
1267+
restore = self._unregister(["SearchableText"])
1268+
try:
1269+
qr = build_query({"SearchableText": "plone"})
1270+
# _handle_text emits a `@@` tsquery match against
1271+
# searchable_text.
1272+
assert "searchable_text" in qr["where"]
1273+
assert "@@" in qr["where"]
1274+
assert "idx->>'SearchableText'" not in qr["where"]
1275+
finally:
1276+
restore()
1277+
1278+
def test_explicit_registry_entry_still_wins(self):
1279+
"""Regression guard — the built-in fallback kicks in only when
1280+
the registry misses. An explicit registration (even a weird
1281+
one) takes precedence so addons can override behavior.
1282+
"""
1283+
qr = build_query({"path": {"query": "/plone/folder", "depth": -1}})
1284+
# With registry populated via `populated_registry` fixture,
1285+
# path is (PATH, None) → same typed-column path as the
1286+
# fallback would produce. The test here asserts the query
1287+
# still works correctly on the hot path.
1288+
assert "path" in qr["where"]
1289+
1290+
def test_unknown_non_builtin_still_falls_through_to_field(self):
1291+
"""Regression guard — only known built-in names (the three
1292+
Plone specials plus any ``TEXT[]``-typed ``ExtraIdxColumn``
1293+
idx-keys) get the fallback treatment. Any other unregistered
1294+
name keeps the existing ``_handle_field`` behavior for
1295+
addon-defined custom indexes (e.g. Language, TranslationGroup).
1296+
"""
1297+
qr = build_query({"my_addon_field": "some_value"})
1298+
assert "idx->>'my_addon_field' =" in qr["where"]
1299+
1300+
def test_object_provides_unregistered_hits_typed_column(self):
1301+
"""``object_provides`` is a ``TEXT[]`` ExtraIdxColumn, so the
1302+
built-in fallback picks it up via derivation from
1303+
``get_extra_idx_columns()`` — without needing a hardcoded
1304+
entry. Proves the derivation works end-to-end.
1305+
"""
1306+
restore = self._unregister(["object_provides"])
1307+
try:
1308+
qr = build_query(
1309+
{
1310+
"object_provides": {
1311+
"query": ["some.iface.IMark"],
1312+
"operator": "or",
1313+
}
1314+
}
1315+
)
1316+
assert "object_provides" in qr["where"]
1317+
assert "&&" in qr["where"]
1318+
assert "idx->>'object_provides'" not in qr["where"]
1319+
finally:
1320+
restore()

0 commit comments

Comments
 (0)