|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Copyright 2019 Google Inc. |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License.#!/usr/bin/env bash |
| 15 | + |
| 16 | +# please make sure you have owner permission in your project |
| 17 | +set -x |
| 18 | +# export some env variables |
| 19 | +export PROJECT_ID=$(gcloud config get-value project) |
| 20 | +export DATA_STORAGE_BUCKET=${PROJECT_ID}-demo-data |
| 21 | +export TEK=$(openssl rand -base64 32) |
| 22 | +export KEY_RING_NAME=demo-key-ring |
| 23 | +export KEY_NAME=demo-key |
| 24 | +export KEK_FILE_NAME=kek.json |
| 25 | +export PROJECT_NUMBER=$(gcloud projects list --filter=${PROJECT_ID} --format="value(PROJECT_NUMBER)") |
| 26 | +export SERVICE_ACCOUNT_NAME=demo-service-account |
| 27 | +export REGION=us-central1 |
| 28 | +export BQ_DATASET_NAME=demo_dataset |
| 29 | +# enable the required APIs |
| 30 | +gcloud services enable dlp.googleapis.com |
| 31 | +gcloud services enable cloudkms.googleapis.com |
| 32 | +gcloud services enable bigquery |
| 33 | +gcloud services enable storage_component |
| 34 | +gcloud services enable dataflow |
| 35 | +gcloud services enable cloudbuild.googleapis.com |
| 36 | +# create BQ dataset. Table will be dynamically generated from dataflow pipeline |
| 37 | +bq --location=US mk -d --description "De-Identified PII Dataset" ${BQ_DATASET_NAME} |
| 38 | +# create a data bucket to store the PII data |
| 39 | +gsutil mb -c standard -l ${REGION} gs://${DATA_STORAGE_BUCKET} |
| 40 | +# allow some additional access to cloud build service account |
| 41 | +gcloud projects add-iam-policy-binding ${PROJECT_ID} --member serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com --role roles/cloudkms.cryptoKeyEncrypter |
| 42 | +gcloud projects add-iam-policy-binding ${PROJECT_ID} --member serviceAccount:$PROJECT_NUMBER@cloudbuild.gserviceaccount.com --role roles/cloudkms.admin |
| 43 | +# trigger the first cloud build script to create the KEK |
| 44 | +gcloud builds submit . --config dlp-demo-part-1-crypto-key.yaml --substitutions _GCS_BUCKET_NAME=gs://${DATA_STORAGE_BUCKET},_KEY_RING_NAME=${KEY_RING_NAME},_KEY_NAME=${KEY_NAME},_TEK=${TEK},_KEK=${KEK_FILE_NAME},_API_KEY=$(gcloud auth print-access-token) |
| 45 | +# DLP requires a service account to be used for API call |
| 46 | +gcloud iam service-accounts create ${SERVICE_ACCOUNT_NAME} --display-name "DLP Demo Service Account" |
| 47 | +gcloud projects add-iam-policy-binding ${PROJECT_ID} --member serviceAccount:${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com --role roles/editor |
| 48 | +gcloud projects add-iam-policy-binding ${PROJECT_ID} --member serviceAccount:${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com --role roles/storage.admin |
| 49 | +gcloud iam service-accounts keys create --iam-account ${SERVICE_ACCOUNT_NAME}@${PROJECT_ID}.iam.gserviceaccount.com demo_key.json --user-output-enabled |
| 50 | +gcloud auth activate-service-account --key-file demo_key.json |
| 51 | +# trigger the cloud build script to create DLP templates |
| 52 | +gcloud builds submit . --config dlp-demo-part-2-dlp-template.yaml --substitutions _KEK_CONFIG_FILE=gs://${DATA_STORAGE_BUCKET}/${KEK_FILE_NAME},_GCS_BUCKET_NAME=gs://${DATA_STORAGE_BUCKET},_API_KEY=$(gcloud auth print-access-token) |
| 53 | +# download the json file to parse template name using jq |
| 54 | +gsutil cp gs://${DATA_STORAGE_BUCKET}/deid-template.json . |
| 55 | +gsutil cp gs://${DATA_STORAGE_BUCKET}/inspect-template.json . |
| 56 | +export DEID_TEMPLATE_NAME=$(jq -r '.name' deid-template.json) |
| 57 | +export INSPECT_TEMPLATE_NAME=$(jq -r '.name' inspect-template.json) |
| 58 | +# trigger the dataflow pipeline |
| 59 | +export jobId="demo-dlp-deid-pipeline-`date +%Y%m%d-%H%M%S`" |
| 60 | +gcloud dataflow jobs run ${jobId} --gcs-location gs://dataflow-templates/latest/Stream_DLP_GCS_Text_to_BigQuery --parameters --region=us-central1,inputFilePattern=gs://${DATA_STORAGE_BUCKET}/CCRecords_1564602825.csv,dlpProjectId=${PROJECT_ID},deidentifyTemplateName=${DEID_TEMPLATE_NAME},inspectTemplateName=${INSPECT_TEMPLATE_NAME},datasetName=${BQ_DATASET_NAME},batchSize=500 |
| 61 | + |
| 62 | + |
0 commit comments