Skip to content

Commit

Permalink
gaarf-wf: [workflow] Fix: unified views were created per batch instea…
Browse files Browse the repository at this point in the history
…d of for all accounts

gaarf-wf: [workflow] added cid_ignore argument

Change-Id: I32404b02d811aaecb825da87a72a3cbb439f92d7
  • Loading branch information
evil-shrike committed Jul 18, 2024
1 parent 1e09126 commit 92d4dff
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 33 deletions.
1 change: 1 addition & 0 deletions gcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Please notice the escaping of quotes for job's argument.
* `dataset` - BigQuery dataset id for writing results of ads queries (required)
* `bq_dataset_location` - BigQuery dataset location, e.g. "europe", by default "us" (optional)
* `cid` - Ads customer id, can be either a MCC or child account, without dashes (required), or a list of CIDs comma separated (required)
* `cid_ignore` - Ads customer id (or a comma separated list) to exclude from the resulting list
* `customer_ids_query` - a path to a file with GAQL query that refines for which accounts to execute scripts (optional)
* `customer_ids_batchsize` - a batch size for customer ids (cids), if not specified accounts will be processed by 1000 accounts (see gaarf-getids CF)
* `customer_ids_offset` - an offset in resulting list of accounts if you need to implemented an external batching - i.e. execute workflow only for a subset of accounts from specified seed account(s). It differs from internal batching where accounts processed by batches to workaround the maximum steps limitation of Cloud Workflows (100K runtime steps).
Expand Down
11 changes: 11 additions & 0 deletions gcp/functions/build/src/gaarf-getcids.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion gcp/functions/build/src/gaarf-getcids.js.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 12 additions & 4 deletions gcp/functions/src/gaarf-getcids.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,15 @@ async function main_getcids_unsafe(
await logger.info('Ads API config', ads_config_wo_token);

let customerIds = parseCustomerIds(<string>req.query.customer_id, adsConfig);

let customerIdsIgnore: Array<string> = [];
if (req.query.customer_ids_ignore) {
const customer_ids_ignore = <string>req.query.customer_ids_ignore;
if (customer_ids_ignore.includes(',')) {
customerIdsIgnore = customer_ids_ignore.split(',');
} else {
customerIdsIgnore = [customer_ids_ignore];
}
}
if (!customerIds || customerIds.length === 0) {
throw new Error(
"Customer id is not specified in either 'customer_id' query argument or google-ads.yaml"
Expand Down Expand Up @@ -67,9 +75,7 @@ async function main_getcids_unsafe(
customerIds,
customer_ids_query
);
await logger.info(
`Loaded ${customerIds.length} accounts`
);
await logger.info(`Loaded ${customerIds.length} accounts`);
}
customerIds = customerIds || [];
customerIds.sort();
Expand Down Expand Up @@ -97,6 +103,8 @@ async function main_getcids_unsafe(
}
}

customerIds = customerIds.filter(cid => customerIdsIgnore.indexOf(cid) < 0);

if (req.query.flatten) {
res.json(customerIds);
res.end();
Expand Down
26 changes: 0 additions & 26 deletions gcp/workflow/workflow-ads.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ main:
- macros: ${map.get(args, "macros")}
- bq_writer_options: ${map.get(args, "bq_writer_options")}
- concurrency_limit: ${default(map.get(args, "concurrency_limit"), 20)}
- disable_strict_views: ${default(map.get(args,"disable_strict_views"), false)}

# get CF 'gaarf' function's URL
- get_function:
Expand All @@ -29,18 +28,9 @@ main:
#args:
# name: ${"projects/" + project + "/locations/" + location + "/functions/" + cloud_function}
#result: function
# get CF 'gaarf-bq-view' function's URL
- get_function_view:
call: http.get
args:
url: ${"https://cloudfunctions.googleapis.com/v2/projects/" + project + "/locations/" + location + "/functions/" + cloud_function + "-bq-view"}
auth:
type: OAuth2
result: function_view
- initialize_functions_uris:
assign:
- gaarf_main_cf_uri: ${function.body.serviceConfig.uri}
- gaarf_bqview_cf_uri: ${function_view.body.serviceConfig.uri}

- iterate_over_scripts:
parallel:
Expand Down Expand Up @@ -91,22 +81,6 @@ main:
bq_dataset_location: ${bq_dataset_location}
ads_config_path: ${ads_config_path}
bq_writer_options: ${bq_writer_options}
# create a view in BQ to combine all account tables into a single view
- call_create_view_cf:
call: http.post
args:
url: ${gaarf_bqview_cf_uri}
timeout: 1800
query:
project_id: ${project}
dataset: ${bq_dataset}
dataset_location: ${bq_dataset_location}
script_path: ${script_item}
body:
accounts: ${if(disable_strict_views, [], accounts)}
auth:
type: OIDC
result: create_view_response

executeAdsQuery:
params: [cf_uri, script_path, account, macros, project, bq_dataset, bq_dataset_location, ads_config_path, bq_writer_options, is_constant: false]
Expand Down
58 changes: 56 additions & 2 deletions gcp/workflow/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ main:
queries_path: ${args.ads_queries_path}
ads_config_path: ${args.ads_config_path}
cid: ${args.cid}
cid_ignore: ${map.get(args, "cid_ignore")}
customer_ids_query: ${map.get(args, "customer_ids_query")}
customer_ids_offset: ${map.get(args, "customer_ids_offset")}
customer_ids_batchsize: ${map.get(args, "customer_ids_batchsize")}
Expand Down Expand Up @@ -69,7 +70,7 @@ main:
return: ${accounts}

runAdsQueries:
params: [project, location, function_name, gcs_bucket, queries_path, ads_config_path, cid, customer_ids_query, customer_ids_offset, customer_ids_batchsize, bq_dataset, bq_dataset_location, macros, bq_writer_options, concurrency_limit, workflow_ads_id, disable_strict_views]
params: [project, location, function_name, gcs_bucket, queries_path, ads_config_path, cid, cid_ignore, customer_ids_query, customer_ids_offset, customer_ids_batchsize, bq_dataset, bq_dataset_location, macros, bq_writer_options, concurrency_limit, workflow_ads_id, disable_strict_views]
# NOTE: currently it's assumed that CF's project is the same as project for BQ datasets
steps:
# get CF 'gaarf-getcids' function's URL
Expand All @@ -80,6 +81,14 @@ runAdsQueries:
auth:
type: OAuth2
result: function_cids
# get CF 'gaarf-bq-view' function's URL
- get_function_view:
call: http.get
args:
url: ${"https://cloudfunctions.googleapis.com/v2/projects/" + project + "/locations/" + location + "/functions/" + function_name + "-bq-view"}
auth:
type: OAuth2
result: function_view

#call 'gaarf-getcids' CF to get a list of customer ids for further processing
- call_gaarf_cids_cf:
Expand All @@ -90,6 +99,7 @@ runAdsQueries:
query:
ads_config_path: ${ads_config_path}
customer_id: ${cid}
customer_ids_ignore: ${default(cid_ignore, "")}
customer_ids_query: ${default(customer_ids_query, "")}
customer_ids_offset: ${default(customer_ids_offset, "")}
customer_ids_batchsize: ${default(customer_ids_batchsize, "")}
Expand Down Expand Up @@ -172,9 +182,53 @@ runAdsQueries:
macros: ${macros}
bq_writer_options: ${bq_writer_options}
concurrency_limit: ${concurrency_limit}
disable_strict_views: ${disable_strict_views}
result: execution_result

# collection account ids from `accounts` where they are slit into batched into a flatten list
- initialize_accounts_flatten:
assign:
- accounts_flatten: []
- flatten_accounts:
for:
value: accounts_batch
in: ${accounts}
steps:
- loop_over_batch:
for:
value: account
in: ${accounts_batch}
steps:
- append:
assign:
- accounts_flatten: ${list.concat(accounts_flatten, account)}

# for each script (excluding constants) create a unified view
- create_views:
for:
value: script_item
in: ${scripts}
steps:
- check_for_constant:
switch:
- condition: ${text.match_regex(script_item, "_constant")}
next: continue
next: call_create_view_cf
- call_create_view_cf:
call: http.post
args:
url: ${function_view.body.serviceConfig.uri}
timeout: 1800
query:
project_id: ${project}
dataset: ${bq_dataset}
dataset_location: ${bq_dataset_location}
script_path: ${script_item}
body:
accounts: ${if(disable_strict_views, [], accounts_flatten)}
auth:
type: OIDC
result: create_view_response

- return_result:
return: ${accounts}

Expand Down

0 comments on commit 92d4dff

Please sign in to comment.