Skip to content

Commit 6ab5a90

Browse files
committed
QuantileTransformer working on BigQuery, clarify that it's untested on Redshift
1 parent 3600fc9 commit 6ab5a90

File tree

4 files changed

+47
-6
lines changed

4 files changed

+47
-6
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The macros are:
1616
| [MinMaxScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html#sklearn.preprocessing.MinMaxScaler) | min_max_scaler | Y | Y | Y | [![example](images/min_max_scaler.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#minmaxscaler) |
1717
| [Normalizer](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html#sklearn.preprocessing.Normalizer) | normalizer | Y | Y | Y | [![example](images/normalizer.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#normalizer) |
1818
| [OneHotEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html#sklearn.preprocessing.OneHotEncoder) | one_hot_encoder | Y | Y | Y | ![example](images/one_hot_encoder.gif) |
19-
| [QuantileTransformer](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html#sklearn.preprocessing.QuantileTransformer) | quantile_transformer | Y | N | Y | [![example](images/quantile_transformer.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#quantiletransformer-uniform-output) |
19+
| [QuantileTransformer](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html#sklearn.preprocessing.QuantileTransformer) | quantile_transformer | Y | Y | N | [![example](images/quantile_transformer.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#quantiletransformer-uniform-output) |
2020
| [RobustScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html#sklearn.preprocessing.RobustScaler) | robust_scaler | Y | Y | Y | [![example](images/robust_scaler.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#robustscaler) |
2121
| [StandardScaler](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html#sklearn.preprocessing.StandardScaler) | standard_scaler | Y | Y | Y | [![example](images/standard_scaler.png)](https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#standardscaler) |
2222

dbt_project.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: 'dbt_ml_preprocessing'
2-
version: '0.3.0'
2+
version: '0.4.0'
33

44
require-dbt-version: ">=0.15.1"
55

integration_tests/macros/quantile_transformer_model_macro.sql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,17 @@ with data as (
88
select * from data
99
{% endmacro %}
1010

11+
-- other adapters we generate an empty test result to force a test pass
12+
{% macro bigquery__quantile_transformer_model_macro() %}
13+
with data as (
14+
15+
{{ dbt_ml_preprocessing.quantile_transformer( ref('data_quantile_transformer') ,'col_to_transform') }}
16+
17+
)
18+
select * from data
19+
{% endmacro %}
20+
1121
-- other adapters we generate an empty test result to force a test pass
1222
{% macro default__quantile_transformer_model_macro() %}
1323
select 1 as empty_result from (select 1) where 1=2
14-
{% endmacro %}
24+
{% endmacro %}

macros/quantile_transformer.sql

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,42 @@ coalesce(y1 + ((x-x1)/(x2-x1)) * (y2-y1),0) as {{ source_column }}_transformed
3333
from linear_interpolation_variables
3434
{% endmacro %}
3535

36+
{% macro bigquery__quantile_transformer(source_table,source_column,n_quantiles,output_distribution,subsample,include_columns) %}
37+
with quantile_values as(
38+
{% for quartile_index in range(n_quantiles) %}
39+
{% set quartile = quartile_index / (n_quantiles-1) %}
40+
select distinct {{ quartile }} as quantile,percentile_cont({{ source_column }},{{ quartile }}) OVER() as quantile_value from {{ source_table }}
41+
{% if not loop.last %} union all {% endif %}
42+
{% endfor %}
43+
),
44+
-- fold all quantiles and quantile values into a single row, an array of structs that we can safely cross join on
45+
quantile_values_array as(
46+
select ARRAY_AGG(struct (quantile, quantile_value)) as quantile_values from quantile_values
47+
),
48+
-- prepare to apply linear interpolation formula
49+
linear_interpolation_variables as(
50+
select
51+
{{include_columns}},
52+
{{ source_column }} as x,
53+
(select max(b.quantile) from UNNEST(quantile_values) b where b.quantile_value<a.{{ source_column }}) as y1,
54+
(select min(b.quantile) from UNNEST(quantile_values) b where b.quantile_value>=a.{{ source_column }}) as y2,
55+
(select max(b.quantile_value) from UNNEST(quantile_values) b where b.quantile_value<a.{{ source_column }}) as x1,
56+
(select min(b.quantile_value) from UNNEST(quantile_values) b where b.quantile_value>=a.{{ source_column }}) as x2
57+
from {{ source_table }} a,
58+
quantile_values_array
59+
where {{ source_column }} is not null
60+
order by {{ source_column }}
61+
)
62+
select
63+
{{include_columns}},
64+
coalesce(y1 + ((x-x1)/(x2-x1)) * (y2-y1),0) as {{ source_column }}_transformed
65+
from linear_interpolation_variables
66+
{% endmacro %}
67+
3668
{% macro default__quantile_transformer(source_table,source_column,n_quantiles,output_distribution,subsample,include_columns) %}
3769

3870
{% set error_message %}
39-
The `quantile_transformer` macro is only supported on Snowflake at this time. It should work on other DBs, it just requires some rework.
71+
The `quantile_transformer` macro is only supported on Snowflake and BigQuery at this time. It should work on other DBs, it just requires some rework.
4072
{% endset %}
4173
{%- do exceptions.raise_compiler_error(error_message) -%}
42-
43-
{% endmacro %}
74+
{% endmacro %}

0 commit comments

Comments
 (0)