Skip to content

Commit 92d4dff

Browse files
committed
gaarf-wf: [workflow] Fix: unified views were created per batch instead of for all accounts
gaarf-wf: [workflow] added cid_ignore argument Change-Id: I32404b02d811aaecb825da87a72a3cbb439f92d7
1 parent 1e09126 commit 92d4dff

File tree

6 files changed

+81
-33
lines changed

6 files changed

+81
-33
lines changed

gcp/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Please notice the escaping of quotes for job's argument.
8888
* `dataset` - BigQuery dataset id for writing results of ads queries (required)
8989
* `bq_dataset_location` - BigQuery dataset location, e.g. "europe", by default "us" (optional)
9090
* `cid` - Ads customer id, can be either a MCC or child account, without dashes (required), or a list of CIDs comma separated (required)
91+
* `cid_ignore` - Ads customer id (or a comma separated list) to exclude from the resulting list
9192
* `customer_ids_query` - a path to a file with GAQL query that refines for which accounts to execute scripts (optional)
9293
* `customer_ids_batchsize` - a batch size for customer ids (cids), if not specified accounts will be processed by 1000 accounts (see gaarf-getids CF)
9394
* `customer_ids_offset` - an offset in resulting list of accounts if you need to implemented an external batching - i.e. execute workflow only for a subset of accounts from specified seed account(s). It differs from internal batching where accounts processed by batches to workaround the maximum steps limitation of Cloud Workflows (100K runtime steps).

gcp/functions/build/src/gaarf-getcids.js

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gcp/functions/build/src/gaarf-getcids.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gcp/functions/src/gaarf-getcids.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,15 @@ async function main_getcids_unsafe(
3939
await logger.info('Ads API config', ads_config_wo_token);
4040

4141
let customerIds = parseCustomerIds(<string>req.query.customer_id, adsConfig);
42-
42+
let customerIdsIgnore: Array<string> = [];
43+
if (req.query.customer_ids_ignore) {
44+
const customer_ids_ignore = <string>req.query.customer_ids_ignore;
45+
if (customer_ids_ignore.includes(',')) {
46+
customerIdsIgnore = customer_ids_ignore.split(',');
47+
} else {
48+
customerIdsIgnore = [customer_ids_ignore];
49+
}
50+
}
4351
if (!customerIds || customerIds.length === 0) {
4452
throw new Error(
4553
"Customer id is not specified in either 'customer_id' query argument or google-ads.yaml"
@@ -67,9 +75,7 @@ async function main_getcids_unsafe(
6775
customerIds,
6876
customer_ids_query
6977
);
70-
await logger.info(
71-
`Loaded ${customerIds.length} accounts`
72-
);
78+
await logger.info(`Loaded ${customerIds.length} accounts`);
7379
}
7480
customerIds = customerIds || [];
7581
customerIds.sort();
@@ -97,6 +103,8 @@ async function main_getcids_unsafe(
97103
}
98104
}
99105

106+
customerIds = customerIds.filter(cid => customerIdsIgnore.indexOf(cid) < 0);
107+
100108
if (req.query.flatten) {
101109
res.json(customerIds);
102110
res.end();

gcp/workflow/workflow-ads.yaml

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ main:
1414
- macros: ${map.get(args, "macros")}
1515
- bq_writer_options: ${map.get(args, "bq_writer_options")}
1616
- concurrency_limit: ${default(map.get(args, "concurrency_limit"), 20)}
17-
- disable_strict_views: ${default(map.get(args,"disable_strict_views"), false)}
1817

1918
# get CF 'gaarf' function's URL
2019
- get_function:
@@ -29,18 +28,9 @@ main:
2928
#args:
3029
# name: ${"projects/" + project + "/locations/" + location + "/functions/" + cloud_function}
3130
#result: function
32-
# get CF 'gaarf-bq-view' function's URL
33-
- get_function_view:
34-
call: http.get
35-
args:
36-
url: ${"https://cloudfunctions.googleapis.com/v2/projects/" + project + "/locations/" + location + "/functions/" + cloud_function + "-bq-view"}
37-
auth:
38-
type: OAuth2
39-
result: function_view
4031
- initialize_functions_uris:
4132
assign:
4233
- gaarf_main_cf_uri: ${function.body.serviceConfig.uri}
43-
- gaarf_bqview_cf_uri: ${function_view.body.serviceConfig.uri}
4434

4535
- iterate_over_scripts:
4636
parallel:
@@ -91,22 +81,6 @@ main:
9181
bq_dataset_location: ${bq_dataset_location}
9282
ads_config_path: ${ads_config_path}
9383
bq_writer_options: ${bq_writer_options}
94-
# create a view in BQ to combine all account tables into a single view
95-
- call_create_view_cf:
96-
call: http.post
97-
args:
98-
url: ${gaarf_bqview_cf_uri}
99-
timeout: 1800
100-
query:
101-
project_id: ${project}
102-
dataset: ${bq_dataset}
103-
dataset_location: ${bq_dataset_location}
104-
script_path: ${script_item}
105-
body:
106-
accounts: ${if(disable_strict_views, [], accounts)}
107-
auth:
108-
type: OIDC
109-
result: create_view_response
11084

11185
executeAdsQuery:
11286
params: [cf_uri, script_path, account, macros, project, bq_dataset, bq_dataset_location, ads_config_path, bq_writer_options, is_constant: false]

gcp/workflow/workflow.yaml

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ main:
3030
queries_path: ${args.ads_queries_path}
3131
ads_config_path: ${args.ads_config_path}
3232
cid: ${args.cid}
33+
cid_ignore: ${map.get(args, "cid_ignore")}
3334
customer_ids_query: ${map.get(args, "customer_ids_query")}
3435
customer_ids_offset: ${map.get(args, "customer_ids_offset")}
3536
customer_ids_batchsize: ${map.get(args, "customer_ids_batchsize")}
@@ -69,7 +70,7 @@ main:
6970
return: ${accounts}
7071

7172
runAdsQueries:
72-
params: [project, location, function_name, gcs_bucket, queries_path, ads_config_path, cid, customer_ids_query, customer_ids_offset, customer_ids_batchsize, bq_dataset, bq_dataset_location, macros, bq_writer_options, concurrency_limit, workflow_ads_id, disable_strict_views]
73+
params: [project, location, function_name, gcs_bucket, queries_path, ads_config_path, cid, cid_ignore, customer_ids_query, customer_ids_offset, customer_ids_batchsize, bq_dataset, bq_dataset_location, macros, bq_writer_options, concurrency_limit, workflow_ads_id, disable_strict_views]
7374
# NOTE: currently it's assumed that CF's project is the same as project for BQ datasets
7475
steps:
7576
# get CF 'gaarf-getcids' function's URL
@@ -80,6 +81,14 @@ runAdsQueries:
8081
auth:
8182
type: OAuth2
8283
result: function_cids
84+
# get CF 'gaarf-bq-view' function's URL
85+
- get_function_view:
86+
call: http.get
87+
args:
88+
url: ${"https://cloudfunctions.googleapis.com/v2/projects/" + project + "/locations/" + location + "/functions/" + function_name + "-bq-view"}
89+
auth:
90+
type: OAuth2
91+
result: function_view
8392

8493
#call 'gaarf-getcids' CF to get a list of customer ids for further processing
8594
- call_gaarf_cids_cf:
@@ -90,6 +99,7 @@ runAdsQueries:
9099
query:
91100
ads_config_path: ${ads_config_path}
92101
customer_id: ${cid}
102+
customer_ids_ignore: ${default(cid_ignore, "")}
93103
customer_ids_query: ${default(customer_ids_query, "")}
94104
customer_ids_offset: ${default(customer_ids_offset, "")}
95105
customer_ids_batchsize: ${default(customer_ids_batchsize, "")}
@@ -172,9 +182,53 @@ runAdsQueries:
172182
macros: ${macros}
173183
bq_writer_options: ${bq_writer_options}
174184
concurrency_limit: ${concurrency_limit}
175-
disable_strict_views: ${disable_strict_views}
176185
result: execution_result
177186

187+
# collection account ids from `accounts` where they are slit into batched into a flatten list
188+
- initialize_accounts_flatten:
189+
assign:
190+
- accounts_flatten: []
191+
- flatten_accounts:
192+
for:
193+
value: accounts_batch
194+
in: ${accounts}
195+
steps:
196+
- loop_over_batch:
197+
for:
198+
value: account
199+
in: ${accounts_batch}
200+
steps:
201+
- append:
202+
assign:
203+
- accounts_flatten: ${list.concat(accounts_flatten, account)}
204+
205+
# for each script (excluding constants) create a unified view
206+
- create_views:
207+
for:
208+
value: script_item
209+
in: ${scripts}
210+
steps:
211+
- check_for_constant:
212+
switch:
213+
- condition: ${text.match_regex(script_item, "_constant")}
214+
next: continue
215+
next: call_create_view_cf
216+
- call_create_view_cf:
217+
call: http.post
218+
args:
219+
url: ${function_view.body.serviceConfig.uri}
220+
timeout: 1800
221+
query:
222+
project_id: ${project}
223+
dataset: ${bq_dataset}
224+
dataset_location: ${bq_dataset_location}
225+
script_path: ${script_item}
226+
body:
227+
accounts: ${if(disable_strict_views, [], accounts_flatten)}
228+
auth:
229+
type: OIDC
230+
result: create_view_response
231+
178232
- return_result:
179233
return: ${accounts}
180234

0 commit comments

Comments
 (0)