Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions posthog/hogql_queries/insights/funnels/funnel_event_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@ def to_query(self, skip_entity_filter=False, skip_step_filter=False) -> ast.Sele
if len(queries) == 1:
return queries[0]

# Multiple source tables are combined with UNION ALL, which needs a common
# column type. Different sources resolve aggregation_target to different
# types (e.g. a person_id UUID and a warehouse string column), so coerce
# every branch's aggregation_target to a string.
for query in queries:
for select_expr in query.select:
if isinstance(select_expr, ast.Alias) and select_expr.alias == "aggregation_target":
select_expr.expr = ast.Call(name="toString", args=[select_expr.expr])

# Take the field and alias names from the first query. UNION enforces identical column sets
# across all selects, which makes this reliable.
aliased_fields = alias_columns_in_select(queries[0].select, self.EVENT_TABLE_ALIAS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
e.step_1 AS step_1
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
toString(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
Expand All @@ -114,7 +114,7 @@
HAVING ifNull(equals(tupleElement(argMax(tuple(person_distinct_id_overrides.is_deleted), person_distinct_id_overrides.version), 1), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), in(e.event, tuple('$pageview'))), ifNull(equals(step_0, 1), 0))
UNION ALL SELECT toTimeZone(e.created, 'UTC') AS timestamp,
accurateCastOrNull(e.user_id, 'UUID') AS aggregation_target,
toString(accurateCastOrNull(e.user_id, 'UUID')) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.uuid), 'Encountered a null value in posthog_test_test_table_1.uuid, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.uuid), reinterpretAsUUID(hex(MD5(concat('posthog_test_test_table_1_0_', ifNull(toString(e.uuid), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
Expand Down Expand Up @@ -145,6 +145,168 @@
use_hive_partitioning=0
'''
# ---
# name: TestFunnelDataWarehouse.test_funnels_data_warehouse_and_regular_nodes_string_aggregation_target
'''
SELECT sum(step_1) AS step_1,
sum(step_2) AS step_2,
arrayMap(x -> if(isNaN(x), NULL, x), [avgArrayOrNull(step_1_conversion_times)])[1] AS step_1_average_conversion_time,
arrayMap(x -> if(isNaN(x), NULL, x), [medianArrayOrNull(step_1_conversion_times)])[1] AS step_1_median_conversion_time,
groupArray(row_number) AS row_number,
final_prop AS final_prop
FROM
(SELECT countIf(ifNull(notEquals(bitAnd(steps_bitfield, 1), 0), 1)) AS step_1,
countIf(ifNull(notEquals(bitAnd(steps_bitfield, 2), 0), 1)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, '', arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
[''] AS prop,
arrayJoin(aggregate_funnel(2, 1209600, 'first_touch', 'ordered', prop, [], arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
af_tuple.1 AS step_reached,
plus(af_tuple.1, 1) AS steps,
af_tuple.2 AS breakdown,
af_tuple.3 AS timings,
af_tuple.5 AS steps_bitfield,
aggregation_target AS aggregation_target
FROM
(SELECT e.timestamp AS timestamp,
e.aggregation_target AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
e.step_0 AS step_0,
e.step_1 AS step_1
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
toString(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
if(equals(e.event, '$pageview'), 1, 0) AS step_0,
0 AS step_1
FROM events AS e
LEFT OUTER JOIN
(SELECT tupleElement(argMax(tuple(person_distinct_id_overrides.person_id), person_distinct_id_overrides.version), 1) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(tupleElement(argMax(tuple(person_distinct_id_overrides.is_deleted), person_distinct_id_overrides.version), 1), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), in(e.event, tuple('$pageview'))), ifNull(equals(step_0, 1), 0))
UNION ALL SELECT toTimeZone(e.created, 'UTC') AS timestamp,
toString(e.user_id) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.uuid), 'Encountered a null value in posthog_test_test_table_1.uuid, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.uuid), reinterpretAsUUID(hex(MD5(concat('posthog_test_test_table_1_0_', ifNull(toString(e.uuid), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
0 AS step_0,
if(1, 1, 0) AS step_1
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_test_table_1/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` Int64, `uuid` String, `created` DateTime64(3, \'UTC\'), `user_id` String, `event_name` String, `id_decimal` Decimal(18, 2), `properties` Nullable(String), `id_with_nulls` Nullable(Int64)') AS e
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), ifNull(equals(step_1, 1), 0))) AS e)
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
GROUP BY breakdown
ORDER BY step_2 DESC,
step_1 DESC)
GROUP BY final_prop
ORDER BY step_2 DESC,
step_1 DESC
LIMIT 100 SETTINGS join_algorithm='auto',
format_csv_allow_double_quotes=1,
readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=23622320128,
enable_analyzer=1,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1,
use_hive_partitioning=0
'''
# ---
# name: TestFunnelDataWarehouse.test_funnels_data_warehouse_and_regular_nodes_string_aggregation_target_actors
'''
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, '', arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
[''] AS prop,
arrayJoin(aggregate_funnel(2, 1209600, 'first_touch', 'ordered', prop, [], arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
af_tuple.1 AS step_reached,
plus(af_tuple.1, 1) AS steps,
af_tuple.2 AS breakdown,
af_tuple.3 AS timings,
af_tuple.5 AS steps_bitfield,
aggregation_target AS aggregation_target
FROM
(SELECT e.timestamp AS timestamp,
e.aggregation_target AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
e.step_0 AS step_0,
e.step_1 AS step_1
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
toString(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
if(equals(e.event, '$pageview'), 1, 0) AS step_0,
0 AS step_1
FROM events AS e
LEFT OUTER JOIN
(SELECT tupleElement(argMax(tuple(person_distinct_id_overrides.person_id), person_distinct_id_overrides.version), 1) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(tupleElement(argMax(tuple(person_distinct_id_overrides.is_deleted), person_distinct_id_overrides.version), 1), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(e.timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), in(e.event, tuple('$pageview'))), ifNull(equals(step_0, 1), 0))
UNION ALL SELECT toTimeZone(e.created, 'UTC') AS timestamp,
toString(e.user_id) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.uuid), 'Encountered a null value in posthog_test_test_table_1.uuid, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.uuid), reinterpretAsUUID(hex(MD5(concat('posthog_test_test_table_1_0_', ifNull(toString(e.uuid), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
0 AS step_0,
if(1, 1, 0) AS step_1
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_test_table_1/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` Int64, `uuid` String, `created` DateTime64(3, \'UTC\'), `user_id` String, `event_name` String, `id_decimal` Decimal(18, 2), `properties` Nullable(String), `id_with_nulls` Nullable(Int64)') AS e
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('explicit_redacted_timestamp', 6, 'UTC'))), ifNull(equals(step_1, 1), 0))) AS e)
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE bitTest(steps_bitfield, 1)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='auto') AS source
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
format_csv_allow_double_quotes=1,
readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0,
enable_analyzer=1,
transform_null_in=1,
optimize_min_equality_disjunction_chain_length=4294967295,
allow_experimental_join_condition=1,
use_hive_partitioning=0
'''
# ---
# name: TestFunnelDataWarehouse.test_funnels_data_warehouse_non_uuid_id_column
'''
SELECT sum(step_1) AS step_1,
Expand Down Expand Up @@ -372,7 +534,7 @@
e.step_2 AS step_2
FROM
(SELECT toTimeZone(e.created_date, 'UTC') AS timestamp,
coalesce(e.converted_opportunity_id, e.id) AS aggregation_target,
toString(coalesce(e.converted_opportunity_id, e.id)) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_lead.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_lead_0_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
Expand All @@ -382,7 +544,7 @@
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_salesforce_lead/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` String, `created_date` DateTime64(3, \'UTC\'), `converted_opportunity_id` Nullable(String)') AS e
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('2024-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('2024-06-30 23:59:59.999999', 6, 'UTC'))), ifNull(equals(step_0, 1), 0))
UNION ALL SELECT toTimeZone(e.created_date, 'UTC') AS timestamp,
e.id AS aggregation_target,
toString(e.id) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_opportunity.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_opportunity_1_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
Expand All @@ -392,7 +554,7 @@
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql_queries.insights.funnels.funnel_data_warehouse/posthog_test_salesforce_opportunity/*.csv', 'object_storage_root_user', 'object_storage_root_password', 'CSVWithNames', '`id` String, `close_date` Nullable(Date), `created_date` DateTime64(3, \'UTC\')') AS e
WHERE and(and(greaterOrEquals(timestamp, toDateTime64('2024-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(timestamp, toDateTime64('2024-06-30 23:59:59.999999', 6, 'UTC'))), ifNull(equals(step_1, 1), 0))
UNION ALL SELECT e.close_date AS timestamp,
e.id AS aggregation_target,
toString(e.id) AS aggregation_target,
tupleElement(tuple(throwIf(isNull(e.id), 'Encountered a null value in posthog_test_salesforce_opportunity.id, but a non-null value is required. Please ensure this column contains no null values, or add a filter to exclude rows with null values.'), toUUIDOrDefault(toString(e.id), reinterpretAsUUID(hex(MD5(concat('posthog_test_salesforce_opportunity_2_', ifNull(toString(e.id), ''))))))), 2) AS uuid,
NULL AS `$session_id`,
NULL AS `$window_id`,
Expand Down
Loading
Loading