ccao-data · jeancochrane · Jul 11, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
@@ -1,3 +1,24 @@
+## char_class
+
+{% docs column_pinval_char_class %}
+The class for the card or parcel that this row represents.
+
+If a row represents a card that was part of the model assessment set, then
+this column will be the card class that we used as a predictor in the model.
+If a row instead represents a parcel that was _not_ part of the assessment set,
+then this column will be the parcel class, and is used in the
+`reason_report_ineligible` column to explain why we excluded the parcel from
+the assessment set.
+
+This column will never be null.
+{% enddocs %}
+
+## char_class_desc
+
+{% docs column_pinval_char_class_desc %}
+A short description explaining the code contained in `char_class`
+{% enddocs %}
+
 ## is_report_eligible
 
 {% docs column_pinval_is_report_eligible %}
@@ -6,7 +27,7 @@ When `TRUE`, this PIN is eligible for a PINVAL report for the given model run
 
 ## meta_card_num
 
-{% docs meta_card_num %}
+{% docs column_pinval_meta_card_num %}
 The card number for the card.
 
 There are two cases in which this column might be null:
@@ -22,86 +43,6 @@ There are two cases in which this column might be null:
 
 {% enddocs %}
 
-## model_run_id
-
-{% docs column_pinval_model_run_id %}
-Run ID for the model run associated with this card and its values.
-
-Prefer this column to `run_id`, which comes from `model.assessment_card`,
-because `run_id` will be null if the parcel is ineligible for a report
-for this model run. In contrast, this column will never be null.
-
-In the case of a parcel that is ineligible for a PINVAL report, the presence
-of a value for this column might seem confusing because that parcel wasn't
-actually valued in the model run. However, since this table requires a row
-for every parcel in every eligible model run in order to compute parcel
-eligibility in the `is_report_eligible` and `reason_report_ineligible` columns,
-we add parcels to model runs that considered them ineligible. As a result,
-every model run in this table should have a row for every parcel in its data
-year, regardless of whether that parcel was actually part of the model run.
-{% enddocs %}
-
-## parcel_class
-
-{% docs column_pinval_parcel_class %}
-The class for the parcel that this card is associated with.
-
-This field is different from `char_class`, which comes from
-`model.assessment_card` and represents the card class. Card classes do not
-necessarily match the class of the parcel that the card is associated with.
-This field will also always be present even if `char_class` is null, because
-this field comes from `default.vw_pin_universe` which contains PINs that
-are not present in the assessment set due to not being a residential
-regression class.
-{% enddocs %}
-
-## parcel_class_description
-
-{% docs column_pinval_parcel_class_description %}
-The short description for the card's parcel class.
-
-See `parcel_class` for details on the difference between parcel classes and
-card classes in the context of this view.
-{% enddocs %}
-
-## parcel_township_code
-
-{% docs column_pinval_parcel_township_code %}
-Township code for the card's parcel.
-
-See `parcel_class` for details on the difference between parcel classes and
-card classes in the context of this view.
-{% enddocs %}
-
-## parcel_township_name
-
-{% docs column_pinval_parcel_township_name %}
-Township name for the card's parcel.
-
-See `parcel_class` for details on the difference between parcel classes and
-card classes in the context of this view.
-{% enddocs %}
-
-## parcel_triad_name
-
-{% docs column_pinval_parcel_triad_name %}
-Triad name for the card's parcel.
-
-See `parcel_class` for details on the difference between parcel classes and
-card classes in the context of this view.
-{% enddocs %}
-
-## pin
-
-{% docs column_pinval_pin %}
-The card's parcel identification number (PIN).
-
-In general, you should prefer this column to `meta_pin` when querying from this
-table, since `meta_pin` comes from `model.assessment_card` and will be null for
-PINs that the res model does not value. You can safely use `meta_pin` when
-filtering by `is_report_eligible = TRUE`, however.
-{% enddocs %}
-
 ## reason_report_ineligible
 
 {% docs column_pinval_reason_report_ineligible %}
@@ -128,3 +69,18 @@ Possible values for this variable are:
   the case when `is_report_eligible` is `TRUE`, and our data integrity
   tests check to make sure this is true
 {% enddocs %}
+
+## run_id
+
+{% docs column_pinval_run_id %}
+Run ID for the model run associated with this card and its values.
+
+In the case of a parcel that is ineligible for a PINVAL report, the presence
+of a value for this column might seem confusing because that parcel wasn't
+actually valued in the model run. However, since this table requires a row
+for every parcel in every eligible model run in order to compute parcel
+eligibility in the `is_report_eligible` and `reason_report_ineligible` columns,
+we add parcels to model runs that considered them ineligible. As a result,
+every model run in this table should have a row for every parcel in its data
+year, regardless of whether that parcel was actually part of the model run.
+{% enddocs %}
@@ -11,29 +11,7 @@ do not have reports. Use the `is_report_eligible` column to filter for
 report-eligible PINs, and use the `reason_report_ineligible` column to explain
 missing reports.
 
-In general, parcel-level attributes that come from `default.vw_pin_universe`
-should have a `parcel_` prefix and should always be non-null. Card-level
-attributes that come from `model.assessment_card` should have one of the
-standard modeling prefixes and may be null. Standard modeling prefixes include:
-
-- `meta_`
-- `pred_`
-- `char_`
-- `loc_`
-- `prox_`
-- `acs5_`
-- `other_`
-- `time_`
-- `ind_`
-- `lag_`
-- `ccao_`
-- `shp_`
-- `flag_`
-
-See the docs for `model.assessment_card` for a detailed list of modeling
-attributes.
-
-**Primary Key**: `run_id`, `pin`, `meta_card_num`
+**Primary Key**: `run_id`, `meta_pin`, `meta_card_num`
 {% enddocs %}
 
 # vw_comp

@@ -30,21 +30,20 @@ school_districts AS (
 )
 
 SELECT
-    -- Select PIN from `default.vw_pin_universe` so that we always have a PIN
-    -- even if no row exists in `model.assesssment_card`, in which case
-    -- `meta_pin` will be null
-    uni.pin,
-    -- Use the `parcel_` prefix to mark attributes that come from
-    -- `default.vw_pin_universe` (except `pin` above, since `parcel_pin` would
-    -- be redundant). We use these attributes when explaining to end users
-    -- why a PIN is ineligible for a report, so we want to query these attrs
-    -- even when they duplicate attrs in `model.assessment_card` because we
-    -- need to be sure they will always be non-null
-    uni.township_code AS parcel_township_code,
-    uni.township_name AS parcel_township_name,
-    LOWER(uni.triad_name) AS parcel_triad_name,
-    uni.class AS parcel_class,
-    pin_cd.class_desc AS parcel_class_description,
+    -- For essential attributes like PIN and class, fall back to values from
+    -- `default.vw_pin_universe` when no row exists in `model.assesssment_card`
+    -- so we can ensure a row for every card regardless of whether it was
+    -- included in the assessment set for a given model run. We need these
+    -- essential attrs even when parcels aren't in the assessment set in order
+    -- to generate detailed descriptions for why those parcels don't have
+    -- reports
+    COALESCE(ac.meta_pin, uni.pin) AS meta_pin,
+    ac.meta_card_num,
+    COALESCE(ac.township_code, uni.township_code) AS meta_township_code,
+    uni.township_name AS meta_township_name,
+    LOWER(uni.triad_name) AS meta_triad_name,
+    COALESCE(ac.char_class, uni.class) AS char_class,
+    COALESCE(card_cd.class_desc, pin_cd.class_desc) AS char_class_desc,
     -- Three possible reasons we would decline to build a PINVAL report for a
     -- PIN:
     --
@@ -93,8 +92,110 @@ SELECT
             THEN NULL
         ELSE 'unknown'
     END AS reason_report_ineligible,
-    ac.*,
+    -- Select all predictors from `model.assessment_card`. Unfortunately we
+    -- have to add predictors to this list manually whenever we add them to
+    -- the model, but we have a data integrity test on this table that should
+    -- alert us if we ever fall out of sync with the model.
+    --
+    -- Never remove predictors from this list, only add them. Outdated
+    -- predictors are most likely necessary to support reports for prior
+    -- assessment years
+    ac.meta_nbhd_code,
+    ac.meta_sale_count_past_n_years,
+    ac.char_yrblt,
+    ac.char_air,
+    ac.char_apts,
+    ac.char_attic_fnsh,
+    ac.char_attic_type,
+    ac.char_beds,
+    ac.char_bldg_sf,
+    ac.char_bsmt,
+    ac.char_bsmt_fin,
+    ac.char_ext_wall,
+    ac.char_fbath,
+    ac.char_frpl,
+    ac.char_gar1_att,
+    ac.char_gar1_cnst,
+    ac.char_gar1_size,
+    ac.char_hbath,
+    ac.char_land_sf,
+    ac.char_heat,
+    ac.char_ncu,
+    ac.char_porch,
+    ac.char_roof_cnst,
+    ac.char_rooms,
+    ac.char_tp_dsgn,
+    ac.char_type_resd,
+    ac.char_recent_renovation,
+    ac.loc_longitude,
+    ac.loc_latitude,
+    ac.loc_census_tract_geoid,
+    ac.loc_env_flood_fs_factor,
+    ac.loc_school_elementary_district_geoid,
+    ac.loc_school_secondary_district_geoid,
+    ac.loc_access_cmap_walk_nta_score,
+    ac.loc_access_cmap_walk_total_score,
+    ac.loc_tax_municipality_name,
+    ac.prox_num_pin_in_half_mile,
+    ac.prox_num_bus_stop_in_half_mile,
+    ac.prox_num_foreclosure_per_1000_pin_past_5_years,
+    ac.prox_avg_school_rating_in_half_mile,
+    ac.prox_airport_dnl_total,
+    ac.prox_nearest_bike_trail_dist_ft,
+    ac.prox_nearest_cemetery_dist_ft,
+    ac.prox_nearest_cta_route_dist_ft,
+    ac.prox_nearest_cta_stop_dist_ft,
+    ac.prox_nearest_hospital_dist_ft,
+    ac.prox_lake_michigan_dist_ft,
+    ac.prox_nearest_metra_route_dist_ft,
+    ac.prox_nearest_metra_stop_dist_ft,
+    ac.prox_nearest_park_dist_ft,
+    ac.prox_nearest_railroad_dist_ft,
+    ac.prox_nearest_university_dist_ft,
+    ac.prox_nearest_vacant_land_dist_ft,
+    ac.prox_nearest_water_dist_ft,
+    ac.prox_nearest_golf_course_dist_ft,
+    ac.prox_nearest_road_highway_dist_ft,
+    ac.prox_nearest_road_arterial_dist_ft,
+    ac.prox_nearest_road_collector_dist_ft,
+    ac.prox_nearest_road_arterial_daily_traffic,
+    ac.prox_nearest_road_collector_daily_traffic,
+    ac.prox_nearest_new_construction_dist_ft,
+    ac.prox_nearest_stadium_dist_ft,
+    ac.acs5_percent_age_children,
+    ac.acs5_percent_age_senior,
+    ac.acs5_median_age_total,
+    ac.acs5_percent_household_family_married,
+    ac.acs5_percent_household_nonfamily_alone,
+    ac.acs5_percent_education_high_school,
+    ac.acs5_percent_education_bachelor,
+    ac.acs5_percent_education_graduate,
+    ac.acs5_percent_income_below_poverty_level,
+    ac.acs5_median_income_household_past_year,
+    ac.acs5_median_income_per_capita_past_year,
+    ac.acs5_percent_income_household_received_snap_past_year,
+    ac.acs5_percent_employment_unemployed,
+    ac.acs5_median_household_total_occupied_year_built,
+    ac.acs5_median_household_renter_occupied_gross_rent,
+    ac.acs5_percent_household_owner_occupied,
+    ac.other_tax_bill_rate,
+    ac.time_sale_year,
+    ac.time_sale_day,
+    ac.time_sale_quarter_of_year,
+    ac.time_sale_month_of_year,
+    ac.time_sale_day_of_year,
+    ac.time_sale_day_of_month,
+    ac.time_sale_day_of_week,
+    ac.time_sale_post_covid,
+    ac.shp_parcel_centroid_dist_ft_sd,
+    ac.shp_parcel_edge_len_ft_sd,
+    ac.shp_parcel_interior_angle_sd,
+    ac.shp_parcel_mrr_area_ratio,
+    ac.shp_parcel_mrr_side_ratio,
+    ac.shp_parcel_num_vertices,
+    ac.pred_card_initial_fmv,
     ap.pred_pin_final_fmv_round,
+    -- Pull some additional parcel-level info from `model.assessment_pin`
     CAST(
         ROUND(
             ac.pred_card_initial_fmv / NULLIF(ac.char_bldg_sf, 0), 0
@@ -103,6 +204,8 @@ SELECT
         AS pred_card_initial_fmv_per_sqft,
     ap.loc_property_address AS property_address,
     CAST(ap.meta_pin_num_cards AS INTEGER) AS ap_meta_pin_num_cards,
+    -- Format some card-level predictors to make them more interpretable to
+    -- non-technical users
     CONCAT(CAST(ac.char_class AS VARCHAR), ': ', card_cd.class_desc)
         AS char_class_detailed,
     COALESCE(
@@ -126,7 +229,12 @@ SELECT
     END AS combined_bldg_sf,
     elem_sd.name AS school_elementary_district_name,
     sec_sd.name AS school_secondary_district_name,
-    run.run_id AS model_run_id,
+    -- Pull model run metadata from `model.metadata` and `model.final_model`.
+    -- This metadata will be duplicated across all cards in a model run, but
+    -- that's fine because this table is only ever intended to be used to
+    -- extract individual rows and use those rows as the basis for PINVAL
+    -- reports, in which case row-level duplication is useful
+    run.run_id,
     run.model_predictor_all_name,
     run.assessment_triad AS assessment_triad_name,
     run.assessment_year,
@@ -136,6 +244,9 @@ SELECT
 -- PIN is valid but not in assessment_card
 FROM {{ ref('default.vw_pin_universe') }} AS uni
 INNER JOIN runs_to_include AS run
+-- We use prior year characteristics for model predictors, so we need to
+-- pull parcel information based on the model's data year, not its
+-- assessment year
     ON uni.year = run.assessment_data_year
 LEFT JOIN {{ source('model', 'assessment_card') }} AS ac
     ON run.run_id = ac.run_id

@@ -21,6 +21,7 @@ pivoted_comp AS (
         SELECT
             pin,
             card,
+            year,
             {{ i }} AS comp_num,
             comp_pin_{{ i }} AS comp_pin,
             comp_score_{{ i }} AS comp_score,
@@ -38,7 +39,7 @@ school_districts AS (
         geoid,
         year,
         MAX(name) AS name
-    FROM spatial.school_district
+    FROM {{ source('spatial', 'school_district') }}
     WHERE geoid IS NOT NULL
     GROUP BY geoid, year
 ),
@@ -57,7 +58,12 @@ sale_years AS (
 )
 
 SELECT
-    pc.*,
+    pc.pin,
+    pc.card,
+    pc.comp_num,
+    pc.comp_pin,
+    pc.comp_score,
+    pc.comp_document_num,
     COALESCE(pc.pin = pc.comp_pin, FALSE) AS is_subject_pin_sale,
     CASE
         WHEN train.ind_pin_is_multicard = TRUE THEN 'Subject card'
@@ -82,8 +88,12 @@ SELECT
             || CAST(sy.max_year AS VARCHAR)
     END AS sale_year_range
 FROM pivoted_comp AS pc
-LEFT JOIN {{ source('model', 'pinval_test_training_data') }} AS train
-    ON pc.comp_pin = train.meta_pin
+LEFT JOIN {{ ref('model.training_data') }} AS train
+-- Join on year rather than run ID because `model.training_data` is
+-- guaranteed to be unique by year but may have a different run ID
+-- than the comps run
+    ON pc.year = train.assessment_year
+    AND pc.comp_pin = train.meta_pin
     AND pc.comp_document_num = train.meta_sale_document_num
 LEFT JOIN school_districts AS elem_sd
     ON train.loc_school_elementary_district_geoid = elem_sd.geoid