|
| 1 | +/** |
| 2 | + * # google_bigquery_syndicated_dataset |
| 3 | + * |
| 4 | + * Creates a BigQuery dataset configured for syndication to Mozilla Data Platform |
| 5 | + * infrastructure (mozdata and data-shared projects). This module is meant to |
| 6 | + * simplify the steps in [Importing Data from OLTP Databases to BigQuery via Federated Queries](https://mozilla-hub.atlassian.net/wiki/spaces/IP/pages/473727279/Importing+Data+from+OLTP+Databases+to+BigQuery+via+Federated+Queries) |
| 7 | + * |
| 8 | + * This module abstracts away the syndication boilerplate: |
| 9 | + * - Resolves syndication service accounts via workgroup |
| 10 | + * - Looks up the org custom role for syndication |
| 11 | + * - Auto-discovers whether syndicated datasets exist in data platform projects |
| 12 | + * - Adds dataset authorizations only when targets exist |
| 13 | + * |
| 14 | + * ## Target Inference |
| 15 | + * |
| 16 | + * The `syndicated_dataset_id` (defaults to `dataset_id`) determines targets: |
| 17 | + * - Does NOT end in `_syndicate` → user-facing → both mozdata and data-shared |
| 18 | + * - Ends in `_syndicate` → data-shared only |
| 19 | + * - Eventually the syndication datasets themselves will be inferred from bqetl metadata available to all MozCloud tenant infrastructure |
| 20 | + * |
| 21 | + * ## State propagation |
| 22 | + * |
| 23 | + * While this module reduces the amount of PRs required to set up syndication, it will not automatically |
| 24 | + * propagate those changes. You still need to follow the steps on |
| 25 | + * https://mozilla-hub.atlassian.net/wiki/spaces/SRE/pages/27924945/Atlantis+-+Terraform+Automation#Invoking-Atlantis-without-terraform-changes |
| 26 | + * in order to authorize datasets on the tenant infra side. Eventually policy-as-code and drift |
| 27 | + * detection automation will make these manual steps unnecessary. |
| 28 | + * |
| 29 | + */ |
| 30 | + |
| 31 | +locals { |
| 32 | + target_realm = coalesce(var.target_realm, var.realm) |
| 33 | + syndicated_dataset_id = coalesce(var.syndicated_dataset_id, var.dataset_id) |
| 34 | + is_user_facing = !endswith(local.syndicated_dataset_id, "_syndicate") |
| 35 | + |
| 36 | + target_env = local.target_realm == "prod" ? "prod" : "stage" |
| 37 | + |
| 38 | + # Syndication target configuration: data-shared always, mozdata only for user-facing datasets |
| 39 | + target_config = merge( |
| 40 | + { |
| 41 | + data-shared = { |
| 42 | + project_ids = { prod = "moz-fx-data-shared-prod", nonprod = "moz-fx-data-shar-nonprod-efed" } |
| 43 | + state_path = "bigquery-new" |
| 44 | + } |
| 45 | + }, |
| 46 | + local.is_user_facing ? { |
| 47 | + mozdata = { |
| 48 | + project_ids = { prod = "mozdata", nonprod = "mozdata-nonprod" } |
| 49 | + state_path = "bigquery" |
| 50 | + } |
| 51 | + } : {} |
| 52 | + ) |
| 53 | + |
| 54 | + targets = { |
| 55 | + for name, cfg in local.target_config : |
| 56 | + name => { |
| 57 | + project_id = cfg.project_ids[local.target_realm] |
| 58 | + state_prefix = "projects/${name}/${local.target_realm}/envs/${local.target_env}/${cfg.state_path}" |
| 59 | + } |
| 60 | + } |
| 61 | +} |
| 62 | + |
| 63 | +# Remote state from syndication targets to check if datasets exist |
| 64 | +data "terraform_remote_state" "syndication_target" { |
| 65 | + for_each = local.targets |
| 66 | + |
| 67 | + backend = "gcs" |
| 68 | + |
| 69 | + config = { |
| 70 | + bucket = "${each.value.project_id}-tf" |
| 71 | + prefix = each.value.state_prefix |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +locals { |
| 76 | + # Authorized dataset access for targets where the syndicated dataset exists |
| 77 | + syndication_dataset_access = [ |
| 78 | + for name, target in local.targets : { |
| 79 | + project_id = target.project_id |
| 80 | + dataset_id = local.syndicated_dataset_id |
| 81 | + } |
| 82 | + if contains( |
| 83 | + values(data.terraform_remote_state.syndication_target[name].outputs.syndicate_datasets), |
| 84 | + local.syndicated_dataset_id |
| 85 | + ) |
| 86 | + ] |
| 87 | +} |
| 88 | + |
| 89 | +data "terraform_remote_state" "org" { |
| 90 | + backend = "gcs" |
| 91 | + |
| 92 | + config = { |
| 93 | + bucket = "moz-fx-platform-mgmt-global-tf" |
| 94 | + prefix = "projects/org" |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +# Service accounts that perform syndication |
| 99 | +# Currently Jenkins with plans to move to Airflow, see https://mozilla-hub.atlassian.net/browse/SVCSE-3005 |
| 100 | +module "syndication_workgroup" { |
| 101 | + source = "github.com/mozilla/terraform-modules//mozilla_workgroup?ref=main" |
| 102 | + ids = var.syndication_workgroup_ids |
| 103 | + # TODO this config will need to be removed when SVCSE-4008 is complete |
| 104 | + terraform_remote_state_bucket = "moz-fx-data-terraform-state-global" |
| 105 | + terraform_remote_state_prefix = "projects/data-shared/global/access-groups" |
| 106 | +} |
| 107 | + |
| 108 | +resource "google_bigquery_dataset" "dataset" { |
| 109 | + count = var.create_dataset ? 1 : 0 |
| 110 | + |
| 111 | + dataset_id = var.dataset_id |
| 112 | + location = var.location |
| 113 | + friendly_name = var.friendly_name |
| 114 | + description = var.description |
| 115 | + labels = var.labels |
| 116 | + default_table_expiration_ms = var.default_table_expiration_ms |
| 117 | + default_partition_expiration_ms = var.default_partition_expiration_ms |
| 118 | + max_time_travel_hours = var.max_time_travel_hours |
| 119 | + delete_contents_on_destroy = var.delete_contents_on_destroy |
| 120 | + |
| 121 | + # projectOwners access is implied unless explicitly disabled |
| 122 | + dynamic "access" { |
| 123 | + for_each = var.disable_project_owners_access ? [] : [1] |
| 124 | + content { |
| 125 | + role = "OWNER" |
| 126 | + special_group = "projectOwners" |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + # App-specific IAM access |
| 131 | + dynamic "access" { |
| 132 | + for_each = [for a in var.access : a if a.role != null && a.dataset == null && a.view == null] |
| 133 | + content { |
| 134 | + role = access.value.role |
| 135 | + user_by_email = access.value.user_by_email |
| 136 | + group_by_email = access.value.group_by_email |
| 137 | + special_group = access.value.special_group |
| 138 | + domain = access.value.domain |
| 139 | + iam_member = access.value.iam_member |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + # App-specific non-syndicate authorized dataset access |
| 144 | + dynamic "access" { |
| 145 | + for_each = [for a in var.access : a if a.dataset != null] |
| 146 | + content { |
| 147 | + dataset { |
| 148 | + dataset { |
| 149 | + project_id = access.value.dataset.dataset.project_id |
| 150 | + dataset_id = access.value.dataset.dataset.dataset_id |
| 151 | + } |
| 152 | + target_types = access.value.dataset.target_types |
| 153 | + } |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + # App-specific authorized views |
| 158 | + dynamic "access" { |
| 159 | + for_each = [for a in var.access : a if a.view != null] |
| 160 | + content { |
| 161 | + view { |
| 162 | + project_id = access.value.view.project_id |
| 163 | + dataset_id = access.value.view.dataset_id |
| 164 | + table_id = access.value.view.table_id |
| 165 | + } |
| 166 | + } |
| 167 | + } |
| 168 | + |
| 169 | + # Syndication service account access |
| 170 | + dynamic "access" { |
| 171 | + for_each = module.syndication_workgroup.service_accounts |
| 172 | + content { |
| 173 | + role = data.terraform_remote_state.org.outputs.bigquery_jobs_manage_syndicate_dataset_role_id |
| 174 | + user_by_email = access.value |
| 175 | + } |
| 176 | + } |
| 177 | + |
| 178 | + # Syndication authorized dataset access for syndicates |
| 179 | + dynamic "access" { |
| 180 | + for_each = local.syndication_dataset_access |
| 181 | + content { |
| 182 | + dataset { |
| 183 | + dataset { |
| 184 | + project_id = access.value.project_id |
| 185 | + dataset_id = access.value.dataset_id |
| 186 | + } |
| 187 | + target_types = ["VIEWS"] |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | +} |
| 192 | + |
| 193 | +# Non-authoritative syndication access for externally-managed datasets |
| 194 | +resource "google_bigquery_dataset_access" "syndication_role" { |
| 195 | + for_each = var.create_dataset ? {} : { |
| 196 | + for sa in module.syndication_workgroup.service_accounts : sa => sa |
| 197 | + } |
| 198 | + |
| 199 | + dataset_id = var.dataset_id |
| 200 | + role = data.terraform_remote_state.org.outputs.bigquery_jobs_manage_syndicate_dataset_role_id |
| 201 | + user_by_email = each.value |
| 202 | +} |
| 203 | + |
| 204 | +resource "google_bigquery_dataset_access" "syndicated_authorization" { |
| 205 | + for_each = var.create_dataset ? {} : { |
| 206 | + for entry in local.syndication_dataset_access : "${entry.project_id}/${entry.dataset_id}" => entry |
| 207 | + } |
| 208 | + |
| 209 | + dataset_id = var.dataset_id |
| 210 | + |
| 211 | + dataset { |
| 212 | + dataset { |
| 213 | + project_id = each.value.project_id |
| 214 | + dataset_id = each.value.dataset_id |
| 215 | + } |
| 216 | + target_types = ["VIEWS"] |
| 217 | + } |
| 218 | +} |
0 commit comments