Skip to content

Commit 2dfdb57

Browse files
committed
feat(gfql): support cypher multihop relationships
1 parent cfc3ce2 commit 2dfdb57

7 files changed

Lines changed: 246 additions & 57 deletions

File tree

graphistry/compute/chain.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,41 @@ def apply_output_slice(op: ASTObject, op_label: ASTObject, df):
432432
mapped_vals = out_df[id].map(hop_map)
433433
out_df[hc] = out_df[hc].where(out_df[hc].notna(), mapped_vals)
434434

435+
if hop_cols:
436+
hop_col = hop_cols[0]
437+
for idx, (op, _g_step) in enumerate(steps):
438+
if op._name is None or not isinstance(op, ASTNode) or op._name not in out_df.columns or idx == 0:
439+
continue
440+
prev_op, _ = steps[idx - 1]
441+
if not isinstance(prev_op, ASTEdge):
442+
continue
443+
min_hop = (
444+
prev_op.output_min_hops
445+
if prev_op.output_min_hops is not None
446+
else (
447+
prev_op.min_hops
448+
if prev_op.min_hops is not None
449+
else (prev_op.hops if prev_op.hops is not None else 1)
450+
)
451+
)
452+
max_hop = (
453+
prev_op.output_max_hops
454+
if prev_op.output_max_hops is not None
455+
else (
456+
prev_op.max_hops
457+
if prev_op.max_hops is not None
458+
else prev_op.hops
459+
)
460+
)
461+
if prev_op.to_fixed_point:
462+
max_hop = None
463+
label_mask = out_df[op._name].fillna(False).astype(bool)
464+
if min_hop > 1:
465+
label_mask = label_mask & out_df[hop_col].notna() & (out_df[hop_col] >= min_hop)
466+
if max_hop is not None:
467+
label_mask = label_mask & out_df[hop_col].notna() & (out_df[hop_col] <= max_hop)
468+
out_df[op._name] = label_mask
469+
435470
cols = list(out_df.columns)
436471
for c in cols:
437472
if c.endswith('_x'):

graphistry/compute/gfql/cypher/ast.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class RelationshipPattern:
6464
types: Tuple[str, ...]
6565
properties: Tuple[PropertyEntry, ...]
6666
span: SourceSpan
67+
min_hops: Optional[int] = None
68+
max_hops: Optional[int] = None
69+
to_fixed_point: bool = False
6770

6871

6972
PatternElement = Union[NodePattern, RelationshipPattern]

graphistry/compute/gfql/cypher/lowering.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,11 +2230,19 @@ def _lower_relationship(
22302230
line=relationship.span.line,
22312231
column=relationship.span.column,
22322232
)
2233+
edge_kwargs = {
2234+
"edge_match": edge_match,
2235+
"hops": None if (relationship.min_hops is not None or relationship.max_hops is not None or relationship.to_fixed_point) else 1,
2236+
"min_hops": relationship.min_hops,
2237+
"max_hops": relationship.max_hops,
2238+
"to_fixed_point": relationship.to_fixed_point,
2239+
"name": relationship.variable,
2240+
}
22332241
if relationship.direction == "forward":
2234-
return cast(ASTObject, e_forward(edge_match=edge_match, name=relationship.variable))
2242+
return cast(ASTObject, e_forward(**edge_kwargs))
22352243
if relationship.direction == "reverse":
2236-
return cast(ASTObject, e_reverse(edge_match=edge_match, name=relationship.variable))
2237-
return cast(ASTObject, e_undirected(edge_match=edge_match, name=relationship.variable))
2244+
return cast(ASTObject, e_reverse(**edge_kwargs))
2245+
return cast(ASTObject, e_undirected(**edge_kwargs))
22382246

22392247

22402248
def _pattern_line_column(pattern: Sequence[PatternElement], clause: MatchClause) -> Tuple[int, int]:
@@ -2257,6 +2265,9 @@ def _reverse_relationship_pattern(relationship: RelationshipPattern) -> Relation
22572265
types=relationship.types,
22582266
properties=relationship.properties,
22592267
span=relationship.span,
2268+
min_hops=relationship.min_hops,
2269+
max_hops=relationship.max_hops,
2270+
to_fixed_point=relationship.to_fixed_point,
22602271
)
22612272

22622273

graphistry/compute/gfql/cypher/parser.py

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,18 @@
7676
| rel_undirected_simple
7777
| rel_bidirectional_simple
7878
79-
rel_forward: "-" "[" variable? rel_types? properties? "]" "->"
80-
rel_reverse: "<-" "[" variable? rel_types? properties? "]" "-"
81-
rel_undirected: "-" "[" variable? rel_types? properties? "]" "-"
79+
rel_forward: "-" "[" variable? rel_types? rel_range? properties? "]" "->"
80+
rel_reverse: "<-" "[" variable? rel_types? rel_range? properties? "]" "-"
81+
rel_undirected: "-" "[" variable? rel_types? rel_range? properties? "]" "-"
8282
rel_forward_simple: REL_FWD_SIMPLE
8383
rel_reverse_simple: REL_REV_SIMPLE
8484
rel_undirected_simple: REL_UNDIR_SIMPLE
8585
rel_bidirectional_simple: REL_BIDIR_SIMPLE
8686
8787
rel_types: ":" LABEL_NAME ("|" ":"? LABEL_NAME)*
88+
rel_range: "*" INT ".." INT -> rel_range_bounded
89+
| "*" INT -> rel_range_exact
90+
| "*" -> rel_range_fixed
8891
8992
variable: NAME
9093
@@ -389,46 +392,13 @@ def _to_unsupported(message: str, *, line: Optional[int] = None, column: Optiona
389392
)
390393

391394

392-
_VARIABLE_REL_PATTERN_RE = re.compile(
393-
r"(?:<-\s*\[[^\]\n]*\*[^\]\n]*\]\s*-)|(?:-\s*\[[^\]\n]*\*[^\]\n]*\]\s*->)|(?:-\s*\[[^\]\n]*\*[^\]\n]*\]\s*-)"
394-
)
395-
396-
397395
def _line_and_column_from_offset(source: str, offset: int) -> Tuple[int, int]:
398396
line = source.count("\n", 0, offset) + 1
399397
last_newline = source.rfind("\n", 0, offset)
400398
column = offset + 1 if last_newline < 0 else offset - last_newline
401399
return line, column
402400

403401

404-
def _find_variable_length_relationship_pattern(source: str) -> Optional[Tuple[str, int, int]]:
405-
in_single_quote = False
406-
escape = False
407-
segment_start = 0
408-
for idx, ch in enumerate(source):
409-
if in_single_quote:
410-
if escape:
411-
escape = False
412-
elif ch == "\\":
413-
escape = True
414-
elif ch == "'":
415-
in_single_quote = False
416-
segment_start = idx + 1
417-
continue
418-
if ch == "'":
419-
match = _VARIABLE_REL_PATTERN_RE.search(source, segment_start, idx)
420-
if match is not None:
421-
line, column = _line_and_column_from_offset(source, match.start())
422-
return match.group(0), line, column
423-
in_single_quote = True
424-
continue
425-
match = _VARIABLE_REL_PATTERN_RE.search(source, segment_start)
426-
if match is None:
427-
return None
428-
line, column = _line_and_column_from_offset(source, match.start())
429-
return match.group(0), line, column
430-
431-
432402
def _build_transformer(source: str) -> _TransformerLike:
433403
_, Transformer, _, v_args = _lark_imports()
434404
op_map = {
@@ -523,9 +493,16 @@ def _relationship(
523493
variable: Optional[str] = None
524494
rel_types: Tuple[str, ...] = ()
525495
properties: Tuple[PropertyEntry, ...] = ()
496+
min_hops: Optional[int] = None
497+
max_hops: Optional[int] = None
498+
to_fixed_point = False
526499
for item in items:
527500
if isinstance(item, str):
528501
variable = item
502+
elif isinstance(item, dict):
503+
min_hops = cast(Optional[int], item.get("min_hops"))
504+
max_hops = cast(Optional[int], item.get("max_hops"))
505+
to_fixed_point = bool(item.get("to_fixed_point", False))
529506
elif isinstance(item, tuple) and all(isinstance(v, str) for v in item):
530507
rel_types = cast(Tuple[str, ...], item)
531508
elif isinstance(item, tuple):
@@ -536,8 +513,50 @@ def _relationship(
536513
types=rel_types,
537514
properties=properties,
538515
span=_span_from_meta(meta),
516+
min_hops=min_hops,
517+
max_hops=max_hops,
518+
to_fixed_point=to_fixed_point,
539519
)
540520

521+
def _rel_hops(self, meta: Any, token: Any) -> int:
522+
try:
523+
value = int(str(token))
524+
except Exception as exc:
525+
raise _to_syntax_error("Invalid relationship range bound", line=meta.line, column=meta.column) from exc
526+
if value <= 0:
527+
raise _to_unsupported(
528+
"Cypher zero-hop relationship ranges are not yet supported in the current GFQL Cypher compiler",
529+
line=meta.line,
530+
column=meta.column,
531+
field="match",
532+
value=self._slice(_span_from_meta(meta)),
533+
)
534+
return value
535+
536+
def rel_range_exact(self, meta: Any, items: Sequence[Any]) -> dict[str, Any]:
537+
if len(items) != 1:
538+
raise _to_syntax_error("Invalid relationship range", line=meta.line, column=meta.column)
539+
hops = self._rel_hops(meta, items[0])
540+
return {"min_hops": hops, "max_hops": hops, "to_fixed_point": False}
541+
542+
def rel_range_bounded(self, meta: Any, items: Sequence[Any]) -> dict[str, Any]:
543+
if len(items) != 2:
544+
raise _to_syntax_error("Invalid relationship range", line=meta.line, column=meta.column)
545+
min_hops = self._rel_hops(meta, items[0])
546+
max_hops = self._rel_hops(meta, items[1])
547+
if min_hops > max_hops:
548+
raise _to_unsupported(
549+
"Cypher relationship ranges require lower bound <= upper bound",
550+
line=meta.line,
551+
column=meta.column,
552+
field="match",
553+
value=self._slice(_span_from_meta(meta)),
554+
)
555+
return {"min_hops": min_hops, "max_hops": max_hops, "to_fixed_point": False}
556+
557+
def rel_range_fixed(self, meta: Any, _items: Sequence[Any]) -> dict[str, Any]:
558+
return {"min_hops": None, "max_hops": None, "to_fixed_point": True}
559+
541560
def rel_forward(self, meta: Any, items: Sequence[Any]) -> RelationshipPattern:
542561
return self._relationship(meta, items, direction="forward")
543562

@@ -1261,16 +1280,6 @@ def parse_cypher(query: str) -> Union[CypherQuery, CypherUnionQuery]:
12611280
"""
12621281
if not isinstance(query, str) or query.strip() == "":
12631282
raise _to_syntax_error("Cypher query must be a non-empty string")
1264-
variable_length_pattern = _find_variable_length_relationship_pattern(query)
1265-
if variable_length_pattern is not None:
1266-
pattern_text, line, column = variable_length_pattern
1267-
raise _to_unsupported(
1268-
"Cypher variable-length relationship patterns are not yet supported in the current GFQL Cypher compiler",
1269-
line=line,
1270-
column=column,
1271-
field="match",
1272-
value=pattern_text,
1273-
)
12741283

12751284
parser = _parser()
12761285
transformer = _build_transformer(query)

graphistry/tests/compute/gfql/cypher/test_lowering.py

Lines changed: 104 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,36 @@ def test_lower_match_clause_relationship_direction(query: str, edge_type: type)
114114
assert isinstance(ops[1], edge_type)
115115

116116

117+
@pytest.mark.parametrize(
118+
"query,edge_type,min_hops,max_hops,to_fixed_point,edge_match",
119+
[
120+
("MATCH (a)-[*2]->(b) RETURN b", ASTEdgeForward, 2, 2, False, None),
121+
("MATCH (a)<-[*3]-(b) RETURN b", ASTEdgeReverse, 3, 3, False, None),
122+
("MATCH (a)-[:R*1..4]-(b) RETURN b", ASTEdgeUndirected, 1, 4, False, {"type": "R"}),
123+
("MATCH (a)-[*]->(b) RETURN b", ASTEdgeForward, None, None, True, None),
124+
],
125+
)
126+
def test_lower_match_clause_variable_length_relationships(
127+
query: str,
128+
edge_type: type,
129+
min_hops: int | None,
130+
max_hops: int | None,
131+
to_fixed_point: bool,
132+
edge_match: dict[str, object] | None,
133+
) -> None:
134+
parsed = _parse_query(query)
135+
assert parsed.match is not None
136+
137+
ops = lower_match_clause(parsed.match)
138+
139+
assert isinstance(ops[1], edge_type)
140+
edge = ops[1]
141+
assert edge.min_hops == min_hops
142+
assert edge.max_hops == max_hops
143+
assert edge.to_fixed_point is to_fixed_point
144+
assert edge.edge_match == edge_match
145+
146+
117147
def test_lower_match_clause_relationship_type_alternation_uses_is_in_predicate() -> None:
118148
parsed = _parse_query("MATCH (n)-[r:KNOWS|HATES]->(x) RETURN r")
119149
assert parsed.match is not None
@@ -2331,23 +2361,89 @@ def test_string_cypher_failfast_rejects_graph_backed_unwind_after_with_as_valida
23312361
assert "UNWIND after WITH/RETURN" in exc_info.value.message
23322362

23332363

2334-
def test_string_cypher_failfast_rejects_variable_length_relationship_patterns_as_validation_error() -> None:
2364+
def test_string_cypher_executes_exact_multihop_relationship_pattern() -> None:
23352365
graph = _mk_graph(
23362366
pd.DataFrame({"id": ["a", "b", "c", "d", "e", "f"]}),
23372367
pd.DataFrame(
23382368
{
2339-
"s": ["a", "c", "d", "e"],
2340-
"d": ["b", "d", "e", "f"],
2341-
"type": ["R", "R", "R", "R"],
2369+
"s": ["a", "b", "c", "d", "e"],
2370+
"d": ["b", "c", "d", "e", "f"],
2371+
"type": ["R", "R", "R", "R", "R"],
23422372
}
23432373
),
23442374
)
23452375

2346-
with pytest.raises(GFQLValidationError) as exc_info:
2347-
graph.gfql("MATCH p = (a)-[*]->(b) RETURN collect(nodes(p)) AS paths, length(p) AS l ORDER BY l")
2376+
result = graph.gfql("MATCH (a {id: 'a'})-[*2]->(b) RETURN b.id AS id ORDER BY id")
23482377

2349-
assert exc_info.value.code == ErrorCode.E108
2350-
assert "variable-length relationship patterns" in exc_info.value.message
2378+
assert result._nodes.to_dict(orient="records") == [{"id": "c"}]
2379+
2380+
2381+
def test_string_cypher_executes_bounded_multihop_relationship_pattern() -> None:
2382+
graph = _mk_graph(
2383+
pd.DataFrame({"id": ["a", "b", "c", "d", "e"]}),
2384+
pd.DataFrame(
2385+
{
2386+
"s": ["a", "b", "c", "a"],
2387+
"d": ["b", "c", "d", "e"],
2388+
"type": ["R", "R", "R", "S"],
2389+
}
2390+
),
2391+
)
2392+
2393+
result = graph.gfql("MATCH (a {id: 'a'})-[:R*1..3]->(b) RETURN b.id AS id ORDER BY id")
2394+
2395+
assert result._nodes.to_dict(orient="records") == [{"id": "b"}, {"id": "c"}, {"id": "d"}]
2396+
2397+
2398+
def test_string_cypher_executes_fixed_point_relationship_pattern() -> None:
2399+
graph = _mk_graph(
2400+
pd.DataFrame({"id": ["a", "b", "c", "d", "e"]}),
2401+
pd.DataFrame(
2402+
{
2403+
"s": ["a", "b", "c", "a"],
2404+
"d": ["b", "c", "d", "e"],
2405+
"type": ["R", "R", "R", "S"],
2406+
}
2407+
),
2408+
)
2409+
2410+
result = graph.gfql("MATCH (a {id: 'a'})-[*]->(b) RETURN b.id AS id ORDER BY id")
2411+
2412+
assert result._nodes.to_dict(orient="records") == [{"id": "b"}, {"id": "c"}, {"id": "d"}, {"id": "e"}]
2413+
2414+
2415+
def test_string_cypher_executes_reverse_multihop_relationship_pattern() -> None:
2416+
graph = _mk_graph(
2417+
pd.DataFrame({"id": ["a", "b", "c", "d"]}),
2418+
pd.DataFrame(
2419+
{
2420+
"s": ["a", "b", "c"],
2421+
"d": ["b", "c", "d"],
2422+
"type": ["R", "R", "R"],
2423+
}
2424+
),
2425+
)
2426+
2427+
result = graph.gfql("MATCH (a {id: 'c'})<-[*2]-(b) RETURN b.id AS id ORDER BY id")
2428+
2429+
assert result._nodes.to_dict(orient="records") == [{"id": "a"}]
2430+
2431+
2432+
def test_string_cypher_executes_undirected_multihop_relationship_pattern() -> None:
2433+
graph = _mk_graph(
2434+
pd.DataFrame({"id": ["a", "b", "c", "d"]}),
2435+
pd.DataFrame(
2436+
{
2437+
"s": ["a", "b", "c"],
2438+
"d": ["b", "c", "d"],
2439+
"type": ["R", "R", "R"],
2440+
}
2441+
),
2442+
)
2443+
2444+
result = graph.gfql("MATCH (a {id: 'a'})-[:R*1..2]-(b) RETURN b.id AS id ORDER BY id")
2445+
2446+
assert result._nodes.to_dict(orient="records") == [{"id": "b"}, {"id": "c"}]
23512447

23522448

23532449
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)