Skip to content

Commit 4c80faf

Browse files
Merge pull request #40 from teamdatatonic/refactor/train_pipeline
Update training pipeline
2 parents fbc608f + 07fb10b commit 4c80faf

35 files changed

+759
-1047
lines changed

Makefile

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,59 +15,56 @@
1515
-include env.sh
1616
export
1717

18-
19-
help: ## Display this help screen
18+
help: ## Display this help screen.
2019
@grep -h -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
21-
22-
pre-commit: ## Runs the pre-commit checks over entire repo
23-
cd pipelines && \
24-
poetry run pre-commit run --all-files
2520

2621
env ?= dev
2722
AUTO_APPROVE_FLAG :=
28-
deploy: ## Deploy the Terraform infrastructure to your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env=<dev|test|prod> (default = dev)
23+
deploy: ## Deploy infrastructure to your project. Optionally set env=<dev|test|prod> (default = dev).
2924
@if [ "$(auto-approve)" = "true" ]; then \
3025
AUTO_APPROVE_FLAG="-auto-approve"; \
3126
fi; \
3227
cd terraform/envs/$(env) && \
3328
terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \
3429
terraform apply -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' $$AUTO_APPROVE_FLAG
3530

36-
undeploy: ## DESTROY the Terraform infrastructure in your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env=<dev|test|prod> (default = dev)
31+
undeploy: ## DESTROY the infrastructure in your project. Optionally set env=<dev|test|prod> (default = dev).
3732
@if [ "$(auto-approve)" = "true" ]; then \
3833
AUTO_APPROVE_FLAG="-auto-approve"; \
3934
fi; \
4035
cd terraform/envs/$(env) && \
4136
terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \
4237
terraform destroy -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' $$AUTO_APPROVE_FLAG
4338

44-
install: ## Set up local environment for Python development on pipelines
39+
install: ## Set up local Python environment for development.
4540
@cd pipelines && \
4641
poetry install --with dev && \
4742
cd ../components && \
48-
poetry install --with dev
43+
poetry install --with dev && \
44+
cd ../model && \
45+
poetry install
4946

50-
compile: ## Compile the pipeline to pipeline.yaml. Must specify pipeline=<training|prediction>
47+
compile: ## Compile pipeline. Must set pipeline=<training|prediction>.
5148
@cd pipelines/src && \
52-
poetry run kfp dsl compile --py pipelines/${pipeline}/pipeline.py --output pipelines/${pipeline}/pipeline.yaml --function pipeline
49+
echo "Compiling $$pipeline pipeline" && \
50+
poetry run kfp dsl compile --py pipelines/${pipeline}.py --output pipelines/${pipeline}.yaml --function pipeline
5351

54-
targets ?= training serving
55-
build: ## Build and push training and/or serving container(s) image using Docker. Specify targets=<training serving> e.g. targets=training or targets="training serving" (default)
52+
images ?= training serving
53+
build: ## Build and push container(s). Set images=<training serving> e.g. images=training (default = training serving).
5654
@cd model && \
57-
for target in $$targets ; do \
58-
echo "Building $$target image" && \
55+
for image in $$images ; do \
56+
echo "Building $$image image" && \
5957
gcloud builds submit . \
6058
--region=${VERTEX_LOCATION} \
6159
--project=${VERTEX_PROJECT_ID} \
6260
--gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source \
63-
--substitutions=_DOCKER_TARGET=$$target,_DESTINATION_IMAGE_URI=${CONTAINER_IMAGE_REGISTRY}/$$target:${RESOURCE_SUFFIX} ; \
61+
--substitutions=_DOCKER_TARGET=$$image,_DESTINATION_IMAGE_URI=${CONTAINER_IMAGE_REGISTRY}/$$image:${RESOURCE_SUFFIX} ; \
6462
done
6563

66-
6764
compile ?= true
6865
build ?= true
6966
wait ?= false
70-
run: ## Run pipeline in sandbox environment. Must specify pipeline=<training|prediction>. Optionally specify wait=<true|false> (default = false). Set compile=false to skip recompiling the pipeline and set build=false to skip rebuilding container images
67+
run: ## Run pipeline. Must set pipeline=<training|prediction>. Optionally set wait=<true|false> (default = false), compile=<true|false> (default = true) to recompile pipeline, build=<true|false> (default = true) to rebuild container image(s), images=<training serving> (default = training serving) to set which images are rebuilt.
7168
@if [ $(compile) = "true" ]; then \
7269
$(MAKE) compile ; \
7370
elif [ $(compile) != "false" ]; then \
@@ -81,19 +78,30 @@ run: ## Run pipeline in sandbox environment. Must specify pipeline=<training|pre
8178
exit ; \
8279
fi && \
8380
cd pipelines/src && \
84-
poetry run python -m pipelines.utils.trigger_pipeline --template_path=pipelines/${pipeline}/pipeline.yaml --display_name=${pipeline} --wait=${wait}
81+
echo "Running $$pipeline pipeline" && \
82+
poetry run python -m pipelines.utils.trigger_pipeline --template_path=pipelines/${pipeline}.yaml --display_name=${pipeline} --wait=${wait}
83+
84+
training: ## Shortcut to run training pipeline. Rebuilds training and serving images. Supports same options as run.
85+
$(MAKE) run pipeline=training images=training prediction
86+
87+
prediction: ## Shortcut to run prediction pipeline. Doesn't rebuilt images. Supports same options as run.
88+
$(MAKE) run pipeline=prediction build=false
8589

8690
components ?= true
87-
test: ## Run unit tests. Specify components=<true|false> to test scripts and optionally components
91+
test: ## Run unit tests for pipelines. Optionally set components=<true|false> (default = true) to test components package.
8892
@if [ $(components) = "true" ]; then \
89-
echo "Testing components" && \
93+
echo "Running unit tests in components" && \
9094
cd components && \
9195
poetry run pytest && \
9296
cd .. ; \
9397
elif [ $(components) != "false" ]; then \
9498
echo "ValueError: components must be either true or false" ; \
9599
exit ; \
96100
fi && \
97-
echo "Testing scripts" && \
101+
echo "Running unit tests in pipelines" && \
98102
cd pipelines && \
99-
poetry run python -m pytest tests/utils
103+
poetry run python -m pytest
104+
105+
pre-commit: ## Run pre-commit checks for pipelines.
106+
cd pipelines && \
107+
poetry run pre-commit run --all-files

README.md

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
-->
1616

17-
# Vertex Pipelines End-to-end Samples
17+
# Vertex Pipelines End-to-End Samples
1818

1919
_AKA "Vertex AI Turbo Templates"_
2020

@@ -71,19 +71,19 @@ Before your CI/CD pipelines can deploy the infrastructure, you will need to set
7171
```bash
7272
export DEV_PROJECT_ID=my-dev-gcp-project
7373
export DEV_LOCATION=europe-west2
74-
gsutil mb -l DEV_LOCATION -p DEV_PROJECT_ID --pap=enforced gs://DEV_PROJECT_ID-tfstate && \
75-
gsutil ubla set on gs://DEV_PROJECT_ID-tfstate
74+
gsutil mb -l $DEV_LOCATION -p $DEV_PROJECT_ID --pap=enforced gs://$DEV_PROJECT_ID-tfstate && \
75+
gsutil ubla set on gs://$DEV_PROJECT_ID-tfstate
7676
```
7777

7878
Enable APIs in admin project:
7979

8080
```bash
8181
export ADMIN_PROJECT_ID=my-admin-gcp-project
82-
gcloud services enable cloudresourcemanager.googleapis.com serviceusage.googleapis.com --project=ADMIN_PROJECT_ID
82+
gcloud services enable cloudresourcemanager.googleapis.com serviceusage.googleapis.com --project=$ADMIN_PROJECT_ID
8383
```
8484

8585
```bash
86-
make deploy env=dev VERTEX_PROJECT_ID=<DEV PROJECT ID>
86+
make deploy env=dev
8787
```
8888

8989
More details about infrastructure is explained in [this README](docs/INFRASTRUCTURE.md).
@@ -117,10 +117,10 @@ You can modify this to suit your own use case.
117117
Build the training and serving container images and push them to Artifact Registry with:
118118

119119
```bash
120-
make build [ targets=training serving ]
120+
make build [ images=training serving ]
121121
```
122122

123-
Optionally specify the `targets` variable to only build one of the images.
123+
Optionally specify the `images` variable to only build one of the images.
124124

125125
**Execute pipelines:** Vertex AI Pipelines uses KubeFlow to orchestrate your training steps, as such you'll need to:
126126

@@ -136,10 +136,17 @@ make run pipeline=training [ wait=<true|false> ] [ build=<true|false> ] [ compil
136136

137137
The command has the following true/false flags:
138138

139-
- `build` - re-build containers for training & serving code (limit by setting targets=training to build only one of the containers)
139+
- `build` - re-build containers for training & serving code (limit by setting images=training to build only one of the containers)
140140
- `compile` - re-compile the pipeline to YAML
141141
- `wait` - run the pipeline (a-)sync
142142

143+
**Shortcuts:** Use these commands which support the same options as `run` to run the training or prediction pipeline:
144+
145+
```bash
146+
make training
147+
make prediction
148+
```
149+
143150
## Test
144151

145152
Unit tests are performed using [pytest](https://docs.pytest.org).

cloudbuild/e2e-test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ steps:
5151
- ENABLE_PIPELINE_CACHING=${_TEST_ENABLE_PIPELINE_CACHING}
5252
- VERTEX_LOCATION=${_TEST_VERTEX_LOCATION}
5353
- VERTEX_PROJECT_ID=${_TEST_VERTEX_PROJECT_ID}
54+
- BQ_LOCATION=${_TEST_BQ_LOCATION}
5455
- VERTEX_SA_EMAIL=${_TEST_VERTEX_SA_EMAIL}
5556
- VERTEX_CMEK_IDENTIFIER=${_TEST_VERTEX_CMEK_IDENTIFIER}
5657
- VERTEX_NETWORK=${_TEST_VERTEX_NETWORK}

cloudbuild/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ steps:
4747
cd pipelines && \
4848
poetry run python -m pipelines.utils.upload_pipeline \
4949
--dest=https://${_VERTEX_LOCATION}-kfp.pkg.dev/$$proj/vertex-pipelines \
50-
--yaml=src/pipelines/training/pipeline.yaml \
50+
--yaml=src/pipelines/training.yaml \
5151
--tag=latest \
5252
--tag=${TAG_NAME} && \
5353
poetry run python -m pipelines.utils.upload_pipeline \
5454
--dest=https://${_VERTEX_LOCATION}-kfp.pkg.dev/$$proj/vertex-pipelines \
55-
--yaml=src/pipelines/prediction/pipeline.yaml \
55+
--yaml=src/pipelines/prediction.yaml \
5656
--tag=latest \
5757
--tag=${TAG_NAME}; \
5858
done

components/src/components/extract_table.py

Lines changed: 24 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,81 +11,32 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
from kfp.dsl import Dataset, Output, ContainerSpec, container_component
1415

15-
from kfp.dsl import Dataset, Output, component
1616

17-
18-
@component(
19-
base_image="python:3.9",
20-
packages_to_install=["google-cloud-bigquery==2.30.0"],
21-
)
17+
@container_component
2218
def extract_table(
23-
bq_client_project_id: str,
24-
source_project_id: str,
25-
dataset_id: str,
26-
table_name: str,
27-
dataset: Output[Dataset],
28-
destination_gcs_uri: str = None,
29-
dataset_location: str = "EU",
30-
extract_job_config: dict = None,
31-
skip_if_exists: bool = True,
19+
project: str,
20+
location: str,
21+
table: str,
22+
data: Output[Dataset],
23+
destination_format: str = "CSV",
24+
compression: str = "NONE",
25+
field_delimiter: str = ",",
26+
print_header: str = "true",
3227
):
33-
"""
34-
Extract BQ table in GCS.
35-
Args:
36-
bq_client_project_id (str): project id that will be used by the bq client
37-
source_project_id (str): project id from where BQ table will be extracted
38-
dataset_id (str): dataset id from where BQ table will be extracted
39-
table_name (str): table name (without project id and dataset id)
40-
dataset (Output[Dataset]): output dataset artifact generated by the operation,
41-
this parameter will be passed automatically by the orchestrator
42-
dataset_location (str): bq dataset location. Defaults to "EU".
43-
extract_job_config (dict): dict containing optional parameters
44-
required by the bq extract operation. Defaults to None.
45-
See available parameters here
46-
https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.ExtractJobConfig.html # noqa
47-
destination_gcs_uri (str): GCS URI to use for saving query results (optional).
48-
49-
Returns:
50-
Outputs (NamedTuple (str, list)): Output dataset directory and its GCS uri.
51-
"""
52-
53-
import logging
54-
from pathlib import Path
55-
from google.cloud.exceptions import GoogleCloudError
56-
from google.cloud import bigquery
57-
58-
# set uri of output dataset if destination_gcs_uri is provided
59-
if destination_gcs_uri:
60-
dataset.uri = destination_gcs_uri
61-
62-
logging.info(f"Checking if destination exists: {dataset.path}")
63-
if Path(dataset.path).exists() and skip_if_exists:
64-
logging.info("Destination already exists, skipping table extraction!")
65-
return
66-
67-
full_table_id = f"{source_project_id}.{dataset_id}.{table_name}"
68-
table = bigquery.table.Table(table_ref=full_table_id)
69-
70-
if extract_job_config is None:
71-
extract_job_config = {}
72-
job_config = bigquery.job.ExtractJobConfig(**extract_job_config)
73-
74-
logging.info(f"Extract table {table} to {dataset.uri}")
75-
client = bigquery.client.Client(
76-
project=bq_client_project_id, location=dataset_location
28+
return ContainerSpec(
29+
image="google/cloud-sdk:alpine",
30+
command=["bq"],
31+
args=[
32+
"extract",
33+
f"--project_id={project}",
34+
f"--location={location}",
35+
f"--destination_format={destination_format}",
36+
f"--compression={compression}",
37+
f"--field_delimiter={field_delimiter}",
38+
f"--print_header={print_header}",
39+
table,
40+
data.uri,
41+
],
7742
)
78-
extract_job = client.extract_table(
79-
table,
80-
dataset.uri,
81-
job_config=job_config,
82-
)
83-
84-
try:
85-
result = extract_job.result()
86-
logging.info("Table extracted, result: {}".format(result))
87-
except GoogleCloudError as e:
88-
logging.error(e)
89-
logging.error(extract_job.error_result)
90-
logging.error(extract_job.errors)
91-
raise e

components/src/components/lookup_model.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,19 @@
2222
)
2323
def lookup_model(
2424
model_name: str,
25-
project_location: str,
26-
project_id: str,
25+
location: str,
26+
project: str,
2727
model: Output[Model],
28-
order_models_by: str = "create_time desc",
2928
fail_on_model_not_found: bool = False,
3029
) -> NamedTuple("Outputs", [("model_resource_name", str), ("training_dataset", dict)]):
3130
"""
3231
Fetch a model given a model name (display name) and export to GCS.
3332
3433
Args:
3534
model_name (str): display name of the model
36-
project_location (str): location of the Google Cloud project
37-
project_id (str): project id of the Google Cloud project
35+
location (str): location of the Google Cloud project
36+
project (str): project id of the Google Cloud project
3837
model (Output[Model]): a Vertex AI model
39-
order_models_by (str): if multiple models are found based on the display name,
40-
use a filter clause:
41-
A comma-separated list of fields to order by, sorted in
42-
ascending order. Use "desc" after a field name for descending.
43-
Supported fields: `display_name`, `create_time`, `update_time`
44-
Defaults to "create_time desc".
4538
fail_on_model_not_found (bool): if set to True, raise runtime error if
4639
model is not found
4740
@@ -60,25 +53,23 @@ def lookup_model(
6053
logging.info(f"listing models with display name {model_name}")
6154
models = Model.list(
6255
filter=f'display_name="{model_name}"',
63-
order_by=order_models_by,
64-
location=project_location,
65-
project=project_id,
56+
location=location,
57+
project=project,
6658
)
67-
logging.info(f"found {len(models)} models")
59+
logging.info(f"found {len(models)} model(s)")
6860

6961
training_dataset = {}
7062
model_resource_name = ""
7163
if len(models) == 0:
7264
logging.error(
73-
f"No model found with name {model_name}"
74-
+ f"(project: {project_id} location: {project_location})"
65+
f"No model found with name {model_name} "
66+
+ f"(project: {project} location: {location})"
7567
)
7668
if fail_on_model_not_found:
7769
raise RuntimeError(f"Failed as model was not found")
7870
elif len(models) == 1:
7971
target_model = models[0]
8072
model_resource_name = target_model.resource_name
81-
logging.info(f"choosing model by order ({order_models_by})")
8273
logging.info(f"model display name: {target_model.display_name}")
8374
logging.info(f"model resource name: {target_model.resource_name}")
8475
logging.info(f"model uri: {target_model.uri}")

0 commit comments

Comments
 (0)