@@ -619,6 +619,54 @@ def _mk_issue_1000_ic6_minimal_graph_cudf() -> _CypherTestGraph:
619619 return _mk_cudf_graph(graph._nodes, graph._edges)
620620
621621
622+ def _mk_issue_1396_tag_cooccurrence_join_aggregation_graph() -> _CypherTestGraph:
623+ return _mk_graph(
624+ pd.DataFrame(
625+ {
626+ "id": [501, 502, 503, 4398046511333, 2, 3, 9001, 9002, 9003],
627+ "label__Tag": [True, True, True, False, False, False, False, False, False],
628+ "label__Person": [False, False, False, True, True, True, False, False, False],
629+ "label__Post": [False, False, False, False, False, False, True, True, True],
630+ "name": [
631+ "Carl_Gustaf_Emil_Mannerheim",
632+ "Alpha",
633+ "Beta",
634+ None,
635+ None,
636+ None,
637+ None,
638+ None,
639+ None,
640+ ],
641+ }
642+ ),
643+ pd.DataFrame(
644+ {
645+ "s": [4398046511333, 4398046511333, 9001, 9002, 9003, 9001, 9002, 9003, 9001, 9002, 9003],
646+ "d": [2, 3, 2, 2, 3, 501, 501, 501, 502, 502, 503],
647+ "type": [
648+ "KNOWS",
649+ "KNOWS",
650+ "HAS_CREATOR",
651+ "HAS_CREATOR",
652+ "HAS_CREATOR",
653+ "HAS_TAG",
654+ "HAS_TAG",
655+ "HAS_TAG",
656+ "HAS_TAG",
657+ "HAS_TAG",
658+ "HAS_TAG",
659+ ],
660+ }
661+ ),
662+ )
663+
664+
665+ def _mk_issue_1396_tag_cooccurrence_join_aggregation_graph_cudf() -> _CypherTestGraph:
666+ graph = _mk_issue_1396_tag_cooccurrence_join_aggregation_graph()
667+ return _mk_cudf_graph(graph._nodes, graph._edges)
668+
669+
622670def _prefix_scalar_reentry_query(
623671 *,
624672 tag_name: str = "topic",
@@ -10734,6 +10782,35 @@ def test_string_cypher_executes_issue_1000_ic6_exact_runtime_minimal_on_cudf() -
1073410782 ]
1073510783
1073610784
10785+ def test_issue_1396_tag_cooccurrence_join_aggregation_counts() -> None:
10786+ """IC6 tag-cooccurrence join+aggregation shape keeps grouped post cardinality."""
10787+ result = _mk_issue_1396_tag_cooccurrence_join_aggregation_graph().gfql(
10788+ _issue_1000_ic6_query(),
10789+ params=_issue_1000_ic6_params(),
10790+ )
10791+
10792+ assert result._nodes.to_dict(orient="records") == [
10793+ {"tagName": "Alpha", "postCount": 2},
10794+ {"tagName": "Beta", "postCount": 1},
10795+ ]
10796+
10797+
10798+ def test_issue_1396_tag_cooccurrence_join_aggregation_counts_on_cudf() -> None:
10799+ pytest.importorskip("cudf")
10800+
10801+ result = _mk_issue_1396_tag_cooccurrence_join_aggregation_graph_cudf().gfql(
10802+ _issue_1000_ic6_query(),
10803+ params=_issue_1000_ic6_params(),
10804+ engine="cudf",
10805+ )
10806+
10807+ assert type(result._nodes).__module__.startswith("cudf")
10808+ assert result._nodes.to_pandas().to_dict(orient="records") == [
10809+ {"tagName": "Alpha", "postCount": 2},
10810+ {"tagName": "Beta", "postCount": 1},
10811+ ]
10812+
10813+
1073710814def test_string_cypher_executes_scalar_only_prefix_with_match_reentry() -> None:
1073810815 query = _prefix_scalar_reentry_query(order_by="id")
1073910816
0 commit comments