Skip to content

Commit bd542a5

Browse files
sampenningtonclaude
andcommitted
fix(insights): coerce aggregation_target type in multi-source funnels
A funnel that combines series from multiple source tables (e.g. a regular events step plus a data-warehouse step) builds one subquery per source and combines them with UNION ALL. Each source resolves aggregation_target to its own type — person_id is a UUID, a warehouse column is often a string — and UNION ALL has no common type for UUID and String, failing the whole query with NO_COMMON_TYPE. Coerce every branch's aggregation_target to a string when more than one source query is unioned. Single-source funnels (the common case) are unchanged, so the person UUID actor id is preserved there. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c206fa4 commit bd542a5

3 files changed

Lines changed: 49 additions & 5 deletions

File tree

posthog/hogql_queries/insights/funnels/funnel_event_query.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,15 @@ def to_query(self, skip_entity_filter=False, skip_step_filter=False) -> ast.Sele
159159
if len(queries) == 1:
160160
return queries[0]
161161

162+
# Multiple source tables are combined with UNION ALL, which needs a common
163+
# column type. Different sources resolve aggregation_target to different
164+
# types (e.g. a person_id UUID and a warehouse string column), so coerce
165+
# every branch's aggregation_target to a string.
166+
for query in queries:
167+
for select_expr in query.select:
168+
if isinstance(select_expr, ast.Alias) and select_expr.alias == "aggregation_target":
169+
select_expr.expr = ast.Call(name="toString", args=[select_expr.expr])
170+
162171
# Take the field and alias names from the first query. UNION enforces identical column sets
163172
# across all selects, which makes this reliable.
164173
aliased_fields = alias_columns_in_select(queries[0].select, self.EVENT_TABLE_ALIAS)

posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_data_warehouse.ambr

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
e.step_1 AS step_1
9999
FROM
100100
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
101-
if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
101+
toString(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS aggregation_target,
102102
e.uuid AS uuid,
103103
e.`$session_id` AS `$session_id`,
104104
e.`$window_id` AS `$window_id`,
@@ -114,7 +114,7 @@
114114
HAVING ifNull(equals(tupleElement(argMax(tuple(person_distinct_id_overrides.is_deleted), person_distinct_id_overrides.version), 1), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
115115
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), in(e.event, tuple('$pageview'))), ifNull(equals(step_0, 1), 0))
116116
UNION ALL SELECT toTimeZone(e.created, 'UTC') AS timestamp,
117-
accurateCastOrNull(e.user_id, 'UUID') AS aggregation_target,
117+
toString(accurateCastOrNull(e.user_id, 'UUID')) AS aggregation_target,
118118
tupleElement(tuple(throwIf(isNull(e.uuid), 'Encountered a null value in posthog_test_test_table_1.uuid, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.uuid), reinterpretAsUUID(hex(MD5(concat('posthog_test_test_table_1_0_', ifNull(toString(e.uuid), ''))))))), 2) AS uuid,
119119
NULL AS `$session_id`,
120120
NULL AS `$window_id`,
@@ -372,7 +372,7 @@
372372
e.step_2 AS step_2
373373
FROM
374374
(SELECT toTimeZone(e.created_date, 'UTC') AS timestamp,
375-
coalesce(e.converted_opportunity_id, e.id) AS aggregation_target,
375+
toString(coalesce(e.converted_opportunity_id, e.id)) AS aggregation_target,
376376
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_lead.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_lead_0_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
377377
NULL AS `$session_id`,
378378
NULL AS `$window_id`,
@@ -382,7 +382,7 @@
382382
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_salesforce_lead/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` String, `created_date` DateTime64(3, \'UTC\'), `converted_opportunity_id` Nullable(String)') AS e
383383
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('2024-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('2024-06-30 23:59:59.999999', 6, 'UTC'))), ifNull(equals(step_0, 1), 0))
384384
UNION ALL SELECT toTimeZone(e.created_date, 'UTC') AS timestamp,
385-
e.id AS aggregation_target,
385+
toString(e.id) AS aggregation_target,
386386
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_opportunity.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_opportunity_1_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
387387
NULL AS `$session_id`,
388388
NULL AS `$window_id`,
@@ -392,7 +392,7 @@
392392
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_salesforce_opportunity/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` String, `close_date` Nullable(Date), `created_date` DateTime64(3, \'UTC\')') AS e
393393
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('2024-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('2024-06-30 23:59:59.999999', 6, 'UTC'))), ifNull(equals(step_1, 1), 0))
394394
UNION ALL SELECT e.close_date AS timestamp,
395-
e.id AS aggregation_target,
395+
toString(e.id) AS aggregation_target,
396396
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_opportunity.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_opportunity_2_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
397397
NULL AS `$session_id`,
398398
NULL AS `$window_id`,

posthog/hogql_queries/insights/funnels/test/test_funnel_data_warehouse.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,41 @@ def test_funnels_data_warehouse_and_regular_nodes(self):
261261
assert results[0]["count"] == 2
262262
assert results[1]["count"] == 2
263263

264+
def test_funnels_data_warehouse_and_regular_nodes_string_aggregation_target(self):
265+
# A mixed funnel where the warehouse series aggregates by a plain string
266+
# column (not cast to UUID) must not fail the UNION ALL with NO_COMMON_TYPE
267+
# against the events series' person_id UUID.
268+
table_name = self.setup_data_warehouse()
269+
with freeze_time("2025-11-07"):
270+
_create_person(
271+
distinct_ids=["person1"],
272+
team_id=self.team.pk,
273+
uuid="bc53b62b-7cc4-b3b8-0688-c6ee3dfb8539",
274+
)
275+
journeys_for(
276+
{"person1": [{"event": "$pageview", "timestamp": datetime(2025, 11, 1, 0, 0, 0)}]},
277+
self.team,
278+
create_people=False,
279+
)
280+
281+
funnels_query = FunnelsQuery(
282+
kind="FunnelsQuery",
283+
dateRange=DateRange(date_from="2025-11-01"),
284+
series=[
285+
EventsNode(event="$pageview"),
286+
FunnelsDataWarehouseNode(
287+
id=table_name,
288+
table_name=table_name,
289+
id_field="uuid",
290+
aggregation_target_field="user_id",
291+
timestamp_field="created",
292+
),
293+
],
294+
)
295+
296+
response = FunnelsQueryRunner(query=funnels_query, team=self.team, just_summarize=True).calculate()
297+
assert response.results is not None
298+
264299
@snapshot_clickhouse_queries
265300
def test_funnels_data_warehouse_non_uuid_id_column(self):
266301
table_name = self.setup_data_warehouse()

0 commit comments

Comments
 (0)