diff --git a/.circleci/config.yml b/.circleci/config.yml index 5a7c497c..6fbb6fbb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -139,6 +139,7 @@ jobs: name: Test Code command: docker run app:build python3 -m pytest + build-job-dap-collector-ppa-prod: docker: - image: << pipeline.parameters.git-image >> @@ -155,6 +156,7 @@ jobs: name: Test Code command: docker run app:build python3 -m pytest + build-job-desktop-mobile-mau-2020: docker: - image: << pipeline.parameters.git-image >> @@ -345,6 +347,19 @@ jobs: command: docker run app:build pytest + build-job-wclouser-fxa-db-counts: + docker: + - image: << pipeline.parameters.git-image >> + steps: + - checkout + - compare-branch: + pattern: ^jobs/wclouser-fxa-db-counts/ + - setup_remote_docker: + version: << pipeline.parameters.docker-version >> + - run: + name: Build Docker image + command: docker build -t app:build jobs/wclouser-fxa-db-counts/ + build-job-webcompat-kb: docker: - image: << pipeline.parameters.git-image >> @@ -501,7 +516,6 @@ workflows: branches: only: main - job-fxci-taskcluster-export: jobs: - build-job-fxci-taskcluster-export @@ -620,6 +634,20 @@ workflows: branches: only: main + job-wclouser-fxa-db-counts: + jobs: + - build-job-wclouser-fxa-db-counts + - gcp-gcr/build-and-push-image: + context: data-eng-airflow-gcr + docker-context: jobs/wclouser-fxa-db-counts/ + path: jobs/wclouser-fxa-db-counts/ + image: wclouser-fxa-db-counts_docker_etl + requires: + - build-job-wclouser-fxa-db-counts + filters: + branches: + only: main + job-webcompat-kb: jobs: - build-job-webcompat-kb diff --git a/jobs/wclouser-fxa-db-counts/.dockerignore b/jobs/wclouser-fxa-db-counts/.dockerignore new file mode 100644 index 00000000..cff5d6ab --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/.dockerignore @@ -0,0 +1,7 @@ +.ci_job.yaml +.ci_workflow.yaml +.DS_Store +*.pyc +.pytest_cache/ +__pycache__/ +venv/ diff --git a/jobs/wclouser-fxa-db-counts/.flake8 b/jobs/wclouser-fxa-db-counts/.flake8 new file mode 100644 index 00000000..2bcd70e3 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 88 diff --git a/jobs/wclouser-fxa-db-counts/.gitignore b/jobs/wclouser-fxa-db-counts/.gitignore new file mode 100644 index 00000000..2e9942c0 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +*.pyc +__pycache__/ +venv/ diff --git a/jobs/wclouser-fxa-db-counts/Dockerfile b/jobs/wclouser-fxa-db-counts/Dockerfile new file mode 100644 index 00000000..1658d8f8 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.8 +MAINTAINER Wil Clouser + +# https://github.com/mozilla-services/Dockerflow/blob/master/docs/building-container.md +ARG USER_ID="10001" +ARG GROUP_ID="app" +ARG HOME="/app" + +ENV HOME=${HOME} +RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \ + useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir ${HOME} ${GROUP_ID} + +WORKDIR ${HOME} + +RUN pip install --upgrade pip + +COPY requirements.txt requirements.txt +RUN pip install -r requirements.txt + +COPY . . + +RUN pip install . + +# Drop root and change ownership of the application folder to the user +RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME} +USER ${USER_ID} diff --git a/jobs/wclouser-fxa-db-counts/README.md b/jobs/wclouser-fxa-db-counts/README.md new file mode 100644 index 00000000..a5b02ee2 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/README.md @@ -0,0 +1,24 @@ +# wclouser-fxa-db-counts + +This is a simple job to enable us to identify trends within accounts data. E.g. "How many inactive accounts are there?" + +## Usage + +This script is intended to be run in a docker container. +Build the docker image with: + +```sh +docker build -t wclouser_fxa_db_counts . +``` + +To run locally, install dependencies with: + +```sh +pip install -r requirements.txt +``` + +Run the script with + +```sh +python3 -m wclouser_fxa_db_counts.main +``` \ No newline at end of file diff --git a/jobs/wclouser-fxa-db-counts/ci_job.yaml b/jobs/wclouser-fxa-db-counts/ci_job.yaml new file mode 100644 index 00000000..6c555e1e --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/ci_job.yaml @@ -0,0 +1,12 @@ +build-job-wclouser-fxa-db-counts: + docker: + - image: << pipeline.parameters.git-image >> + steps: + - checkout + - compare-branch: + pattern: ^jobs/wclouser-fxa-db-counts/ + - setup_remote_docker: + version: << pipeline.parameters.docker-version >> + - run: + name: Build Docker image + command: docker build -t app:build jobs/wclouser-fxa-db-counts/ \ No newline at end of file diff --git a/jobs/wclouser-fxa-db-counts/ci_workflow.yaml b/jobs/wclouser-fxa-db-counts/ci_workflow.yaml new file mode 100644 index 00000000..f2650ffc --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/ci_workflow.yaml @@ -0,0 +1,13 @@ +job-wclouser-fxa-db-counts: + jobs: + - build-job-wclouser-fxa-db-counts + - gcp-gcr/build-and-push-image: + context: data-eng-airflow-gcr + docker-context: jobs/wclouser-fxa-db-counts/ + path: jobs/wclouser-fxa-db-counts/ + image: wclouser-fxa-db-counts_docker_etl + requires: + - build-job-wclouser-fxa-db-counts + filters: + branches: + only: main \ No newline at end of file diff --git a/jobs/wclouser-fxa-db-counts/requirements.txt b/jobs/wclouser-fxa-db-counts/requirements.txt new file mode 100644 index 00000000..8a001db5 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/requirements.txt @@ -0,0 +1,5 @@ +click==8.0.4 +pytest==6.0.2 +pytest-black==0.3.11 +pytest-flake8==1.0.6 +google-cloud-bigquery==3.26.0 diff --git a/jobs/wclouser-fxa-db-counts/setup.py b/jobs/wclouser-fxa-db-counts/setup.py new file mode 100644 index 00000000..e8cb4120 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/setup.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +readme = open("README.md").read() + +setup( + name="wclouser_fxa_db_counts", + version="0.1.0", + author="Wil Clouser ", + packages=find_packages(include=["docker_etl"]), # TODO: change placeholder name + long_description=readme, + include_package_data=True, + license="MPL 2.0", +) diff --git a/jobs/wclouser-fxa-db-counts/wclouser_fxa_db_counts/main.py b/jobs/wclouser-fxa-db-counts/wclouser_fxa_db_counts/main.py new file mode 100644 index 00000000..1a20d5a5 --- /dev/null +++ b/jobs/wclouser-fxa-db-counts/wclouser_fxa_db_counts/main.py @@ -0,0 +1,136 @@ +import click +from datetime import datetime +from google.cloud import bigquery + + +@click.command() +@click.option("--bq_project_id", help="GCP BigQuery project id", show_default=True, default="moz-fx-fxa-prod") +def main(bq_project_id): + + query = """ + SELECT 'account_customers' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_customers_v1` + UNION ALL + SELECT 'account_groups' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_groups_v1` + UNION ALL + SELECT 'account_reset_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_reset_tokens_v1` + UNION ALL + SELECT 'accounts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` + UNION ALL + SELECT 'carts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_carts_v1` + UNION ALL + SELECT 'device_commands' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_device_commands_v1` + UNION ALL + SELECT 'devices' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_devices_v1` + UNION ALL + SELECT 'email_bounces' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_email_bounces_v1` + UNION ALL + SELECT 'emails' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` + UNION ALL + SELECT 'linked_accounts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` + UNION ALL + SELECT 'oauth_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_codes_v1` + UNION ALL + SELECT 'oauth_refresh_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_refresh_tokens_v1` + UNION ALL + SELECT 'oauth_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_tokens_v1` + UNION ALL + SELECT 'password_change_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_password_change_tokens_v1` + UNION ALL + SELECT 'password_forgot_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_password_forgot_tokens_v1` + UNION ALL + SELECT 'paypal_customers' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_paypal_customers_v1` + UNION ALL + SELECT 'recovery_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_recovery_codes_v1` + UNION ALL + SELECT 'security_events' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_security_events_v1` + UNION ALL + SELECT 'sent_emails' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_sent_emails_v1` + UNION ALL + SELECT 'session_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_session_tokens_v1` + UNION ALL + SELECT 'signin_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_signin_codes_v1` + UNION ALL + SELECT 'totp' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_totp_v1` + UNION ALL + SELECT 'unblock_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_unblock_codes_v1` + UNION ALL + SELECT 'unverified_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_unverified_tokens_v1` + UNION ALL + SELECT 'verification_reminders' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_verification_reminders_v1` + UNION ALL + (SELECT + "accounts_with_secondary_emails" AS table_name, + COUNT(DISTINCT accounts.uid) AS total_rows + FROM + `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts + JOIN + `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails + ON + accounts.uid = emails.uid + WHERE + emails.isPrimary = FALSE + ) + UNION ALL + (SELECT + "accounts_with_unverified_emails" AS table_name, + COUNT(DISTINCT accounts.uid) AS total_rows + FROM + `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts + JOIN + `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails + ON + accounts.uid = emails.uid + WHERE + emails.isVerified = FALSE + ) + UNION ALL + ( + SELECT + "accounts_linked_to_google" AS table_name, + COUNT(uid) AS total_rows + FROM + `moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` + WHERE + providerId=1 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts + ) + UNION ALL + ( + SELECT + "accounts_linked_to_apple" AS table_name, + COUNT(uid) AS total_rows + FROM + `moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` + WHERE + providerId=2 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts + ) + """ + + client = bigquery.Client(project=bq_project_id) + query_job = client.query(query) + + print("Running query:", query_job.job_id) + + results = query_job.result() + + rows_to_insert = [ + { + "date": datetime.now().strftime("%Y-%m-%d"), + "name": row["table_name"], + "count": row["total_rows"], + } + for row in results + ] + + # Insert rows into the new table + errors = client.insert_rows_json("mozdata.analysis.wclouser_fxa_health_db_counts", rows_to_insert) + + # Check for errors during insertion + if errors: + print("Errors occurred while inserting rows: ", errors) + else: + print("Data inserted successfully.") + + + +if __name__ == "__main__": + main()