Skip to content

Commit a055a10

Browse files
committed
fix(cypher): make #1273 whole-row boundary route-independent (#1391)
1 parent f1aef21 commit a055a10

2 files changed

Lines changed: 69 additions & 41 deletions

File tree

graphistry/compute/gfql/cypher/lowering.py

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3202,6 +3202,36 @@ def _query_requires_general_lowering_for_connected_join(
32023202
)
32033203

32043204

3205+
def _reject_unsupported_multi_alias_whole_row_cross_alias_where(
3206+
query: CypherQuery,
3207+
*,
3208+
merged_match: Optional[MatchClause],
3209+
alias_targets: Mapping[str, ASTObject],
3210+
) -> None:
3211+
if (
3212+
merged_match is None
3213+
or query.where is None
3214+
or _cartesian_node_only_patterns(merged_match) is not None
3215+
or _match_relationship_count(merged_match) == 0
3216+
or len({item.expression.text for item in query.return_.items if item.expression.text in alias_targets}) <= 1
3217+
):
3218+
return
3219+
if any(
3220+
isinstance(predicate.left, PropertyRef)
3221+
and isinstance(predicate.right, PropertyRef)
3222+
and predicate.left.alias != predicate.right.alias
3223+
and predicate.left.alias in alias_targets
3224+
and predicate.right.alias in alias_targets
3225+
for predicate in query.where.predicates
3226+
if not isinstance(predicate, WherePatternPredicate)
3227+
):
3228+
raise _unsupported(
3229+
"Cypher row lowering currently supports one MATCH source alias at a time; for remaining multi-source residuals see issue #1273",
3230+
field=query.return_.kind,
3231+
value=[item.expression.text for item in query.return_.items],
3232+
line=query.return_.span.line,
3233+
column=query.return_.span.column,
3234+
)
32053235
def _query_has_aggregate_stage(
32063236
query: CypherQuery,
32073237
*,
@@ -3215,8 +3245,6 @@ def _query_has_aggregate_stage(
32153245
if _post_aggregate_expr_plan(item, params=params, alias_targets={}) is not None:
32163246
return True
32173247
return False
3218-
3219-
32203248
def _binding_row_aliases_for_match(
32213249
clause: Optional[MatchClause],
32223250
*,
@@ -6481,10 +6509,6 @@ def _lower_general_row_projection(
64816509
alias_targets = _alias_target(lowered.query) if query.match is not None else {}
64826510
merged_match = _merged_match_clause(query)
64836511
relationship_count = _match_relationship_count(merged_match) if merged_match is not None else 0
6484-
if query.match is not None and _cartesian_node_only_patterns(query.match) is None and query.where is not None and relationship_count > 0:
6485-
whole_row_return_aliases = sum(1 for item in query.return_.items if item.expression.text in alias_targets)
6486-
if whole_row_return_aliases > 1 and any(not isinstance(predicate, WherePatternPredicate) and isinstance(predicate.right, PropertyRef) for predicate in query.where.predicates):
6487-
raise _unsupported("Cypher row lowering currently supports one MATCH source alias at a time; for remaining multi-source residuals see issue #1273", field=query.return_.kind, value=[item.expression.text for item in query.return_.items], line=query.return_.span.line, column=query.return_.span.column)
64886512
aggregate_specs: List[_AggregateSpec] = []
64896513
non_aggregate_items: List[ReturnItem] = []
64906514
post_aggregate_items: List[_PostAggregateExprPlan] = []
@@ -7377,8 +7401,7 @@ def rewrite_text(expr: ExpressionText, field: str) -> ExpressionText:
73777401
bare_collected.update(bare)
73787402
return rewritten
73797403

7380-
# Rewrite trailing MATCH expressions (node/edge property maps via WHERE
7381-
# comes through reentry_wheres / clause.where).
7404+
# Rewrite trailing MATCH/WHERE expressions.
73827405
rewritten_reentry_matches = tuple(
73837406
_rewrite_reentry_match_clause(clause, rewrite_expr=rewrite_text)
73847407
for clause in query.reentry_matches
@@ -7387,13 +7410,7 @@ def rewrite_text(expr: ExpressionText, field: str) -> ExpressionText:
73877410
where_clause if where_clause is None else _rewrite_where_clause_and_resync(where_clause, rewrite_text, "where")
73887411
for where_clause in query.reentry_wheres
73897412
)
7390-
# Slice 4.3d.1 (#1256): Drop bare-identifier projection items that simply
7391-
# forward a secondary alias (`WITH a, x, y, collect(...)` pattern in IC3
7392-
# multi-stage chains). Their property carries already live as hidden columns
7393-
# on the reentry-source's row table; the bare items are pure forwarding
7394-
# noise. Without this pre-clean the bare-ref scanner inside
7395-
# `_collect_secondary_property_refs` would fail-fast on what is in fact a
7396-
# forwarding pattern, blocking IC3 even after #1248 admits the prefix WITH.
7413+
# Drop bare secondary-alias forwarding items before property-ref rewriting.
73977414
secondary_forwarding_re = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
73987415
from graphistry.compute.gfql.cypher.reentry import compiletime as _reentry_compiletime
73997416

@@ -7432,8 +7449,7 @@ def rewrite_text(expr: ExpressionText, field: str) -> ExpressionText:
74327449
span=query.return_.span,
74337450
)
74347451

7435-
# Synthesize prefix WITH items: drop the secondaries, append S.X AS hidden
7436-
# for each unique referenced (S, X) pair.
7452+
# Synthesize prefix WITH items: drop secondary whole-row carries, append hidden S.X.
74377453
new_items: List[ReturnItem] = []
74387454
secondary_drop_indices = {idx for idx, _item in secondary_items}
74397455
for idx, item in enumerate(prefix_stage.clause.items):
@@ -7455,28 +7471,8 @@ def rewrite_text(expr: ExpressionText, field: str) -> ExpressionText:
74557471
clause=replace(prefix_stage.clause, items=tuple(new_items)),
74567472
)
74577473

7458-
# Slice 4.3d.2 (#1256): forward hidden carry columns through every downstream
7459-
# WITH stage so each recursive bounded-reentry compile sees them as scalar
7460-
# carries. Without this, the hidden column survives the first boundary (it
7461-
# is in the prefix) but the subsequent WITH/RETURN scope-narrowing drops it,
7462-
# and references like `RETURN x.id AS xid` (rewritten to a bare hidden
7463-
# identifier) fail at the inner compile's alias resolution.
7464-
#
7465-
# Interaction with DISTINCT in downstream stages: appending the carry as a
7466-
# bare item makes it a participant in DISTINCT key sets. For multi-alias
7467-
# carry semantics this is what callers want — DISTINCT over `(friend, x.id)`
7468-
# is the desired behavior when `x.id` is referenced downstream. Multi-row
7469-
# `x` cases that could observably mutate row count are blocked upstream by
7470-
# the pre-existing `unique carried node rows` failfast in `gfql_unified.py`.
7471-
#
7472-
# Aggregate guard (W2-IMPORTANT-1): if any downstream WITH stage contains
7473-
# an aggregate function call, refuse to forward the carry through it. The
7474-
# alternative — silently appending the hidden alias next to `count(*)` — can
7475-
# produce a wrong NULL value in the projected column when the trailing MATCH
7476-
# has no relationship to trigger the existing aggregate failfast. Better to
7477-
# raise a scoped #1256 error pointing at the gap than to risk silent wrong
7478-
# results. The relationship-pattern aggregate path is also covered by an
7479-
# earlier failfast; this guard is a single tighter rule that subsumes both.
7474+
# Forward hidden carry columns through downstream WITH stages, but refuse
7475+
# aggregate stages to avoid wrong NULL carry behavior (#1256).
74807476
if refs_collected and rewritten_with_stages_tail:
74817477
for stage in rewritten_with_stages_tail:
74827478
for item in stage.clause.items:
@@ -8535,6 +8531,12 @@ def _attach_graph_context(result: CompiledCypherQuery) -> CompiledCypherQuery:
85358531
if merged_match is not None
85368532
else LoweredCypherMatch(query=[], where=[])
85378533
)
8534+
alias_targets = _alias_target(lowered.query) if query.match is not None else {}
8535+
_reject_unsupported_multi_alias_whole_row_cross_alias_where(
8536+
query,
8537+
merged_match=merged_match,
8538+
alias_targets=alias_targets,
8539+
)
85388540

85398541
def _lower_general() -> CompiledCypherQuery:
85408542
return _lower_general_row_projection(
@@ -8547,7 +8549,6 @@ def _lower_general() -> CompiledCypherQuery:
85478549
)
85488550

85498551
if query.with_stages:
8550-
alias_targets = _alias_target(lowered.query)
85518552
binding_row_aliases = _binding_row_aliases_for_match(query.match, alias_targets=alias_targets)
85528553
binding_row_aliases = _apply_bound_scope_membership(
85538554
binding_row_aliases,
@@ -8625,7 +8626,6 @@ def _lower_general() -> CompiledCypherQuery:
86258626
))
86268627

86278628
if merged_match is not None and not query.unwinds:
8628-
alias_targets = _alias_target(lowered.query)
86298629
binding_row_aliases = _binding_row_aliases_for_match(query.match, alias_targets=alias_targets)
86308630
binding_row_aliases = _apply_bound_scope_membership(
86318631
binding_row_aliases,

graphistry/tests/compute/gfql/cypher/test_lowering.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11381,6 +11381,34 @@ def test_multi_alias_connected_whole_row_return_with_cross_alias_where_still_fai
1138111381
assert "#1273" in exc_info.value.message
1138211382

1138311383

11384+
def test_multi_alias_connected_cross_alias_where_scalar_projection_remains_supported() -> None:
11385+
g = _mk_graph(
11386+
pd.DataFrame(
11387+
{
11388+
"id": ["n1", "n2", "x1", "x2"],
11389+
"animal": ["cat", "dog", "cat", "wolf"],
11390+
}
11391+
),
11392+
pd.DataFrame({"s": ["n1", "n2"], "d": ["x1", "x2"], "type": ["R", "R"]}),
11393+
)
11394+
result = g.gfql("MATCH (n)-[rel]->(x) WHERE n.animal = x.animal RETURN n.id AS n_id, x.id AS x_id")
11395+
assert result._nodes.to_dict(orient="records") == [{"n_id": "n1", "x_id": "x1"}]
11396+
11397+
11398+
def test_multi_alias_connected_cross_alias_where_single_whole_row_projection_remains_supported() -> None:
11399+
g = _mk_graph(
11400+
pd.DataFrame(
11401+
{
11402+
"id": ["n1", "n2", "x1", "x2"],
11403+
"animal": ["cat", "dog", "cat", "wolf"],
11404+
}
11405+
),
11406+
pd.DataFrame({"s": ["n1", "n2"], "d": ["x1", "x2"], "type": ["R", "R"]}),
11407+
)
11408+
result = g.gfql("MATCH (n)-[rel]->(x) WHERE n.animal = x.animal RETURN n, x.id AS x_id")
11409+
assert result._nodes.to_dict(orient="records") == [{"n": "({animal: 'cat'})", "x_id": "x1"}]
11410+
11411+
1138411412
def test_compile_cypher_tracks_seeded_top_level_row_query() -> None:
1138511413
compiled = _compile_query("UNWIND [1, 2, 3] AS x RETURN x ORDER BY x DESC LIMIT 2")
1138611414

0 commit comments

Comments
 (0)