From 0c017e3868832f48f2dded0c3ec3b05b5ae1c0a0 Mon Sep 17 00:00:00 2001 From: Sam Pennington Date: Sun, 17 May 2026 09:01:44 -0400 Subject: [PATCH 1/2] fix(insights): coerce nullable string args of array-returning functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit splitByChar (and the other string-splitting functions) return Array(...). Given a Nullable string argument — e.g. a breakdown on splitByChar('@', properties.$distinct_id) — ClickHouse would produce Nullable(Array(...)), which it rejects, failing the whole insight query. The printer now wraps a nullable string argument of these functions in ifNull(arg, '') so the result type stays a plain Array. Co-Authored-By: Claude Opus 4.7 (1M context) --- posthog/hogql/printer/clickhouse.py | 28 ++++++++++++++++++++++ posthog/hogql/printer/test/test_printer.py | 6 +++++ 2 files changed, 34 insertions(+) diff --git a/posthog/hogql/printer/clickhouse.py b/posthog/hogql/printer/clickhouse.py index eafc2f037a86..fd714781d869 100644 --- a/posthog/hogql/printer/clickhouse.py +++ b/posthog/hogql/printer/clickhouse.py @@ -71,6 +71,22 @@ def team_id_guard_for_table(table_type: ast.TableOrSelectType, context: HogQLCon ) +# String functions that return an Array — a Nullable string arg would produce an +# illegal Nullable(Array(...)) type, so their nullable string args are coerced. +_ARRAY_RETURNING_STRING_FUNCTIONS = frozenset( + { + "splitByChar", + "splitByString", + "splitByRegexp", + "splitByWhitespace", + "splitByNonAlpha", + "alphaTokens", + "extractAllGroups", + "ngrams", + "tokens", + } +) + # In non-nullable materialized columns, these values are treated as NULL MAT_COL_NULL_SENTINELS = ["", "null"] @@ -155,6 +171,18 @@ def _render_function_call(self, node: ast.Call, func_meta) -> str: args.append(f"ifNull({self.visit(arg)}, '')") else: args.append(f"ifNull(toString({self.visit(arg)}), '')") + elif node.name in _ARRAY_RETURNING_STRING_FUNCTIONS: + # These return Array(...). A Nullable string argument would make the + # result Nullable(Array(...)), which ClickHouse rejects, so coerce + # nullable string args to a non-nullable empty string. + args = [] + for arg in node_args: + visited = self.visit(arg) + arg_type = arg.type.resolve_constant_type(self.context) if arg.type is not None else None + if isinstance(arg_type, ast.StringType) and arg_type.nullable: + args.append(f"ifNull({visited}, '')") + else: + args.append(visited) else: args = [self.visit(arg) for arg in node_args] diff --git a/posthog/hogql/printer/test/test_printer.py b/posthog/hogql/printer/test/test_printer.py index 898e986604fc..5418c7cdd99f 100644 --- a/posthog/hogql/printer/test/test_printer.py +++ b/posthog/hogql/printer/test/test_printer.py @@ -4813,6 +4813,12 @@ def test_can_call_parametric_function(self): ) assert query_response.results == [(6,)] + def test_split_function_on_nullable_property(self): + # splitByChar on a nullable property would produce an illegal Nullable(Array) + query = parse_select("SELECT splitByChar('@', properties.email) FROM events") + query_response = execute_hogql_query(team=self.team, query=query) + assert query_response.results is not None + class TestPostgresPrinter(BaseTest): maxDiff = None From 6a3fad625f31132f9c10f01fec69916071d54d6d Mon Sep 17 00:00:00 2001 From: Sam Pennington Date: Sun, 17 May 2026 15:51:16 -0400 Subject: [PATCH 2/2] chore(insights): address review feedback - add extractAll/extractAllGroupsHorizontal/extractAllGroupsVertical/extractGroups to coerced functions - exclude StringArrayType (not StringJSONType) from nullable string coercion - parameterize the nullable-arg test across all array-returning functions, assert ifNull is emitted - add a non-nullable-arg case asserting no spurious ifNull Co-Authored-By: Claude Opus 4.7 (1M context) --- posthog/hogql/printer/clickhouse.py | 19 +++++++++--- posthog/hogql/printer/test/test_printer.py | 34 +++++++++++++++++++--- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/posthog/hogql/printer/clickhouse.py b/posthog/hogql/printer/clickhouse.py index fd714781d869..0ecb9b4ccf6b 100644 --- a/posthog/hogql/printer/clickhouse.py +++ b/posthog/hogql/printer/clickhouse.py @@ -81,7 +81,11 @@ def team_id_guard_for_table(table_type: ast.TableOrSelectType, context: HogQLCon "splitByWhitespace", "splitByNonAlpha", "alphaTokens", + "extractAll", "extractAllGroups", + "extractAllGroupsHorizontal", + "extractAllGroupsVertical", + "extractGroups", "ngrams", "tokens", } @@ -172,14 +176,21 @@ def _render_function_call(self, node: ast.Call, func_meta) -> str: else: args.append(f"ifNull(toString({self.visit(arg)}), '')") elif node.name in _ARRAY_RETURNING_STRING_FUNCTIONS: - # These return Array(...). A Nullable string argument would make the - # result Nullable(Array(...)), which ClickHouse rejects, so coerce - # nullable string args to a non-nullable empty string. + # A Nullable string arg would make the result Nullable(Array(...)), + # which ClickHouse rejects, so coerce nullable string args to a + # non-nullable empty string. Coercion is position-agnostic on + # purpose — a nullable separator would be just as illegal. args = [] for arg in node_args: visited = self.visit(arg) arg_type = arg.type.resolve_constant_type(self.context) if arg.type is not None else None - if isinstance(arg_type, ast.StringType) and arg_type.nullable: + # StringArrayType is a StringType subclass but resolves to an + # Array — coercing it to a bare '' would be invalid, so exclude it. + if ( + isinstance(arg_type, ast.StringType) + and not isinstance(arg_type, ast.StringArrayType) + and arg_type.nullable + ): args.append(f"ifNull({visited}, '')") else: args.append(visited) diff --git a/posthog/hogql/printer/test/test_printer.py b/posthog/hogql/printer/test/test_printer.py index 5418c7cdd99f..d328a0f30ffe 100644 --- a/posthog/hogql/printer/test/test_printer.py +++ b/posthog/hogql/printer/test/test_printer.py @@ -4813,11 +4813,37 @@ def test_can_call_parametric_function(self): ) assert query_response.results == [(6,)] - def test_split_function_on_nullable_property(self): - # splitByChar on a nullable property would produce an illegal Nullable(Array) - query = parse_select("SELECT splitByChar('@', properties.email) FROM events") + @parameterized.expand( + [ + ("splitByChar", "splitByChar('@', properties.email)"), + ("splitByString", "splitByString('@@', properties.email)"), + ("splitByRegexp", "splitByRegexp('[^a-z]+', properties.email)"), + ("splitByWhitespace", "splitByWhitespace(properties.email)"), + ("splitByNonAlpha", "splitByNonAlpha(properties.email)"), + ("alphaTokens", "alphaTokens(properties.email)"), + ("ngrams", "ngrams(properties.email, 3)"), + ("tokens", "tokens(properties.email)"), + ("extractAll", "extractAll(properties.email, '[a-z]+')"), + ("extractAllGroups", "extractAllGroups(properties.email, '([a-z]+)')"), + ("extractAllGroupsHorizontal", "extractAllGroupsHorizontal(properties.email, '([a-z]+)')"), + ("extractAllGroupsVertical", "extractAllGroupsVertical(properties.email, '([a-z]+)')"), + ("extractGroups", "extractGroups(properties.email, '([a-z]+)')"), + ] + ) + def test_array_returning_string_function_on_nullable_property(self, _name, func_call): + # A nullable string arg to an array-returning function would produce an + # illegal Nullable(Array(...)) — ClickHouse rejects it (exception_code=43). + query = parse_select(f"SELECT {func_call} FROM events") + query_response = execute_hogql_query(team=self.team, query=query) + assert query_response.clickhouse is not None + assert "ifNull(" in query_response.clickhouse + + def test_array_returning_string_function_no_spurious_ifnull_on_constant(self): + # A non-nullable (constant) string arg must not be wrapped in ifNull. + query = parse_select("SELECT splitByChar('@', 'a@b') FROM events") query_response = execute_hogql_query(team=self.team, query=query) - assert query_response.results is not None + assert query_response.clickhouse is not None + assert "ifNull(" not in query_response.clickhouse class TestPostgresPrinter(BaseTest):