Skip to content

Commit 9eddb15

Browse files
Merge pull request #208 from dbt-labs/let-there-be-median
Let There Be Median
2 parents 1f1100c + 3591522 commit 9eddb15

29 files changed

Lines changed: 518 additions & 163 deletions
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
kind: Features
2+
body: Adding median
3+
time: 2023-01-09T14:55:30.09271-06:00
4+
custom:
5+
Author: callum-mcdata
6+
Issue: "180"
7+
PR: "208"

integration_tests/dbt_project.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ version: "1.0.0"
66
config-version: 2
77

88
# This setting configures which "profile" dbt uses for this project.
9-
profile: "dbt_metrics_integration_tests_postgres"
9+
profile: "dbt_metrics_integration_tests_bigquery"
1010

1111
model-paths: ["models"]
1212
analysis-paths: ["analyses"]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
version: 2
2+
metrics:
3+
- name: base_median_metric
4+
model: ref('fact_orders')
5+
label: Total Discount ($)
6+
timestamp: order_date
7+
time_grains: [day, week, month, all_time]
8+
calculation_method: median
9+
expression: discount_total
10+
dimensions:
11+
- had_discount
12+
- order_country
13+
14+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
select *
2+
from
3+
{{ metrics.calculate(metric('base_median_metric'),
4+
grain='month',
5+
dimensions=['had_discount'])
6+
}}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
select *
2+
from
3+
{{ metrics.calculate(metric('base_median_metric'))
4+
}}

macros/calculate.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
{#- Need this here, since the actual ref is nested within loops/conditions: -#}
88
-- depends on: {{ ref(var('dbt_metrics_calendar_model', 'dbt_metrics_default_calendar')) }}
99

10-
{# ############
10+
{#- ############
1111
VARIABLE SETTING - Creating the metric tree and making sure metric list is a list!
1212
############ -#}
1313

macros/get_metric_sql.sql

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,10 @@ metrics there are -#}
4848
start_date=start_date,
4949
end_date=end_date)
5050
}}
51-
5251
{#- Next we check if it is a composite metric or single metric by checking the length of the list -#}
5352
{#- This filter forms the basis of how we construct the SQL -#}
54-
5553
{#- If composite, we begin by looping through each of the metric names that make
5654
up the composite metric. -#}
57-
5855
{%- for metric_name in metric_tree["parent_set"] -%}
5956

6057
{{ metrics.build_metric_sql(

macros/sql_gen/build_metric_sql.sql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{%- macro build_metric_sql(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, dimensions_provided, total_dimension_count) %}
2-
2+
33
{%- set treat_null_values_as_zero = metric_dictionary.get("config").get("treat_null_values_as_zero", True) -%}
44
{#- This is the SQL Gen part - we've broken each component out into individual macros -#}
55
{#- We broke this out so it can loop for composite metrics -#}
@@ -28,7 +28,6 @@
2828

2929
{%- endif -%}
3030

31-
3231
{{ metrics.gen_spine_time_cte(
3332
metric_name=metric_dictionary.name,
3433
grain=grain,
@@ -53,4 +52,4 @@
5352
treat_null_values_as_zero=treat_null_values_as_zero
5453
)}}
5554

56-
{% endmacro -%}
55+
{%- endmacro -%}

macros/sql_gen/gen_aggregate_cte.sql

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
and THEN aggregating, we are instead aggregating from the beginning and then
1010
joining downstream for performance. Additionally, we're using a subquery instead
1111
of a CTE, which was significantly more performant during our testing. -#}
12+
{#- #}
1213
select
1314
1415
{%- if grain %}
@@ -37,12 +38,11 @@
3738

3839
{%- if grain %}
3940
{{ bool_or('metric_date_day is not null') }} as has_data,
40-
{% endif %}
41+
{%- endif %}
4142

4243
{#- This line performs the relevant aggregation by calling the
4344
gen_primary_metric_aggregate macro. Take a look at that one if you're curious -#}
4445
{{ metrics.gen_primary_metric_aggregate(metric_dictionary.calculation_method, 'property_to_aggregate') }} as {{ metric_dictionary.name }}
45-
4646
from ({{ metrics.gen_base_query(
4747
metric_dictionary=metric_dictionary,
4848
grain=grain,
@@ -57,11 +57,9 @@
5757
) as base_query
5858
5959
where 1=1
60-
61-
{% if metric_dictionary.window is not none and grain %}
60+
{%- if metric_dictionary.window is not none and grain %}
6261
and date_{{grain}} = window_filter_date
63-
{% endif %}
64-
62+
{%- endif %}
6563
{{ metrics.gen_group_by(grain, dimensions, calendar_dimensions, relevant_periods) }}
6664
6765
)

macros/sql_gen/gen_base_query.sql

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@
33
{% endmacro %}
44

55
{% macro default__gen_base_query(metric_dictionary, grain, dimensions, secondary_calculations, start_date, end_date, calendar_tbl, relevant_periods, calendar_dimensions, total_dimension_count) %}
6-
76
{# This is the "base" CTE which selects the fields we need to correctly
8-
calculate the metric. #}
7+
calculate the metric. -#}
98
select
10-
11-
{% if grain %}
9+
{% if grain -%}
1210
cast(base_model.{{metric_dictionary.timestamp}} as date) as metric_date_day,
1311
calendar_table.date_{{ grain }} as date_{{grain}},
1412
calendar_table.date_day as window_filter_date,
@@ -17,36 +15,23 @@
1715
calendar_table.date_{{ period }},
1816
{% endfor -%}
1917
{%- endif -%}
20-
{%- endif -%}
21-
22-
{% for dim in dimensions %}
18+
{%- endif %}
19+
{#- -#}
20+
{%- for dim in dimensions -%}
2321
base_model.{{ dim }},
24-
{% endfor %}
25-
26-
{% for calendar_dim in calendar_dimensions %}
22+
{%- endfor -%}
23+
{%- for calendar_dim in calendar_dimensions -%}
2724
calendar_table.{{ calendar_dim }},
28-
{% endfor %}
29-
30-
31-
{%- if metric_dictionary.expression and metric_dictionary.expression | replace('*', '') | trim != '' %}
32-
33-
({{ metric_dictionary.expression }}) as property_to_aggregate
34-
{%- elif metric_dictionary.calculation_method == 'count' -%}
35-
{# We use 1 as the property to aggregate in count so that it matches count(*) #}
36-
1 as property_to_aggregate
37-
{%- else -%}
38-
{%- do exceptions.raise_compiler_error("Expression to aggregate is required for non-count aggregation in metric `" ~ metric_dictionary.name ~ "`") -%}
39-
{%- endif %}
40-
41-
25+
{%- endfor -%}
26+
{{ metrics.gen_property_to_aggregate(metric_dictionary, grain, dimensions, calendar_dimensions) }}
4227
from {{ metric_dictionary.metric_model }} base_model
43-
44-
{% if grain or calendar_dimensions|length > 0 %}
28+
{# -#}
29+
{%- if grain or calendar_dimensions|length > 0 -%}
4530
{{ metrics.gen_calendar_table_join(metric_dictionary, calendar_tbl) }}
46-
{% endif %}
47-
31+
{%- endif -%}
32+
{# #}
4833
where 1=1
49-
34+
{#- -#}
5035
{{ metrics.gen_filters(metric_dictionary, start_date, end_date) }}
5136

5237
{%- endmacro -%}

0 commit comments

Comments
 (0)