-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathschema.yml
More file actions
221 lines (219 loc) · 9.2 KB
/
schema.yml
File metadata and controls
221 lines (219 loc) · 9.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
models:
- name: pinval.comp
description: '{{ doc("table_pinval_comp") }}'
columns:
- name: assessment_year
description: '{{ doc("column_pinval_comps_assessment_year") }}'
data_tests:
- not_null:
name: pinval_comp_assessment_year_not_null
- name: run_id
description: '{{ doc("column_pinval_comps_run_id") }}'
data_tests:
- count_is_consistent:
name: pinval_comp_run_id_one_per_assessment_year
group_column: assessment_year
min_value: 1
max_value: 1
data_tests:
- unique_combination_of_columns:
name: pinval_comp_unique_by_assessment_year_pin_card_comp_num
combination_of_columns:
- assessment_year
- pin
- card
- comp_num
- name: pinval.assessment_card
description: '{{ doc("table_pinval_assessment_card") }}'
columns:
- name: assessment_year
description: '{{ doc("column_pinval_assessment_card_assessment_year") }}'
data_tests:
- not_null:
name: pinval_assessment_card_assessment_year_not_null
- name: char_class
description: '{{ doc("column_pinval_char_class") }}'
data_tests:
- not_null:
name: pinval_assessment_card_char_class_not_null
config:
where: reason_report_ineligible = 'non_regression_class'
- name: char_class_desc
description: '{{ doc("column_pinval_char_class_desc") }}'
- name: combined_bldg_sf
description: '{{ doc("column_pinval_combined_bldg_sf")}}'
data_tests:
- not_null:
name: pinval_assessment_card_combined_bldg_sf_not_null_when_is_parcel_small_multicard
config:
where: is_report_eligible
- name: is_frankencard
description: '{{ doc("column_pinval_is_frankencard") }}'
data_tests:
- count_is_consistent:
name: pinval_assessment_card_is_frankencard_contains_both_values_when_small_multicard
group_column: run_id, meta_pin
min_value: 2
max_value: 2
config:
where: is_parcel_small_multicard
- accepted_values:
name: pinval_assessment_card_is_frankencard_false_when_not_small_multicard
values:
- FALSE
quote: false
config:
where: not is_parcel_small_multicard
- name: is_report_eligible
description: '{{ doc("column_pinval_is_report_eligible") }}'
data_tests:
- not_null:
name: pinval_assessment_card_is_report_eligible_not_null
- name: meta_card_num
description: '{{ doc("column_pinval_meta_card_num") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_card_num_not_null_when_is_report_eligible
config:
where: is_report_eligible
- is_null:
name: pinval_assessment_card_meta_card_num_null_when_reason_report_ineligible_is_missing_card
config:
where: reason_report_ineligible = 'missing_card'
- name: meta_nbhd_code
description: Assessor neighborhood code. Guaranteed to never be null.
data_tests:
- not_null:
name: pinval_assessment_card_meta_nbhd_code_not_null
- name: meta_pin
description: '{{ doc("shared_column_pin") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_pin_not_null
- name: meta_pin_num_cards
description: '{{ doc("shared_column_pin_num_cards") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_pin_num_cards_not_null
config:
where: is_report_eligible
- name: meta_township_code
description: '{{ doc("shared_column_township_code") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_township_code_not_null
- name: meta_township_name
description: '{{ doc("shared_column_township_name") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_township_name_not_null
- name: meta_triad_name
description: '{{ doc("shared_column_triad_name") }}'
data_tests:
- not_null:
name: pinval_assessment_card_meta_triad_name_not_null
- name: pin_class
description: '{{ doc("column_pinval_pin_class") }}'
- name: reason_report_ineligible
description: '{{ doc("column_pinval_reason_report_ineligible") }}'
data_tests:
- not_null:
name: pinval_assessment_card_reason_report_ineligible_is_not_null_when_not_report_eligible
config:
where: NOT is_report_eligible
- is_null:
name: pinval_assessment_card_reason_report_ineligible_is_null_when_is_report_eligible
config:
where: is_report_eligible
# There are a handful of known PINs that had the wrong class
# at modeling time, and so got a model value even though it
# should not have
- not_accepted_values:
name: pinval_assessment_card_reason_report_ineligible_not_non_tri_for_tri
values:
- non_tri
config:
where: LOWER(meta_triad_name) = LOWER(assessment_triad_name)
- not_accepted_values:
name: pinval_assessment_card_reason_report_ineligible_not_unknown
values:
- unknown
- name: run_id
description: '{{ doc("column_pinval_assessment_card_run_id") }}'
data_tests:
- not_null:
name: pinval_assessment_card_run_id_not_null_when_report_eligible
config:
where: is_report_eligible
# Test that a random SHAP column is not null to confirm that the join to
# the SHAP table works correctly. We choose char_bldg_sf because it is
# very unlikely to be removed from future models
- name: shap_char_bldg_sf
data_tests:
- not_null:
name: pinval_assessment_card_shap_char_bldg_sf_not_null
config:
where: is_report_eligible
- name: shap_run_id
description: '{{ doc("column_pinval_assessment_card_shap_run_id") }}'
data_tests:
- not_null:
name: pinval_assessment_card_shap_run_id_not_null_when_report_eligible
config:
where: is_report_eligible
data_tests:
- expression_is_true:
name: pinval_assessment_card_contains_a_column_for_all_predictors
# Test that every `model_predictor_all_name` array for every row is a
# subset of the columns in the table. This ensures that we never face
# a situation where we accidentally forget to update columns to add
# new model predictors, which might cause empty characteristics in
# the rendered reports.
#
# This test query is a bit complicated, but basically we're
# using `ARRAY_EXCEPT()` to perform a set difference between each
# `model_predictor_all_name` array on the left side, and the columns
# in the table on the right side. The `CARDINALITY(...) = 0` expression
# ensures that we return any rows where the length of the set
# difference is >0.
#
# It'd be nice to use the `model` Relation that is the first argument
# to the `expression_is_true` generic test instead of relying on
# `ref()` to extract the database and table name, but since this
# expression gets evaluated at compile time, it does not point to
# the correct model if we try to reference `model`
expression: |
CARDINALITY(
ARRAY_EXCEPT(
model_predictor_all_name,
(
SELECT ARRAY_AGG(column_name)
FROM information_schema.columns
WHERE table_schema = '{{ ref("pinval.assessment_card").schema }}'
AND table_name = '{{ ref("pinval.assessment_card").identifier }}'
)
)
) = 0
config:
where: model_predictor_all_name IS NOT NULL
- expression_is_true:
name: pinval_assessment_card_predictors_match_pinval_vars_dict
# Test that every `model_predictor_all_name` array for every row is a
# subset of the labels in the `pinval.vars_dict` seed. This ensures
# that we never forget to update the seed when predictors change
expression: |
CARDINALITY(
ARRAY_EXCEPT(
model_predictor_all_name,
(
SELECT ARRAY_AGG(code)
FROM {{ ref("pinval.vars_dict") }}
)
)
) = 0
- unique_combination_of_columns:
name: pinval_assessment_card_unique_assessment_year_meta_pin_meta_card_num
combination_of_columns:
- assessment_year
- meta_pin
- meta_card_num