Skip to content

Commit 857818c

Browse files
committed
add wclouser_fxa_db_counts job. Fixes DSRE-1779
1 parent ff7c18b commit 857818c

File tree

11 files changed

+273
-1
lines changed

11 files changed

+273
-1
lines changed

Diff for: .circleci/config.yml

+29-1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ jobs:
139139
name: Test Code
140140
command: docker run app:build python3 -m pytest
141141

142+
142143
build-job-dap-collector-ppa-prod:
143144
docker:
144145
- image: << pipeline.parameters.git-image >>
@@ -155,6 +156,7 @@ jobs:
155156
name: Test Code
156157
command: docker run app:build python3 -m pytest
157158

159+
158160
build-job-desktop-mobile-mau-2020:
159161
docker:
160162
- image: << pipeline.parameters.git-image >>
@@ -345,6 +347,19 @@ jobs:
345347
command: docker run app:build pytest
346348

347349

350+
build-job-wclouser-fxa-db-counts:
351+
docker:
352+
- image: << pipeline.parameters.git-image >>
353+
steps:
354+
- checkout
355+
- compare-branch:
356+
pattern: ^jobs/wclouser-fxa-db-counts/
357+
- setup_remote_docker:
358+
version: << pipeline.parameters.docker-version >>
359+
- run:
360+
name: Build Docker image
361+
command: docker build -t app:build jobs/wclouser-fxa-db-counts/
362+
348363
build-job-webcompat-kb:
349364
docker:
350365
- image: << pipeline.parameters.git-image >>
@@ -501,7 +516,6 @@ workflows:
501516
branches:
502517
only: main
503518

504-
505519
job-fxci-taskcluster-export:
506520
jobs:
507521
- build-job-fxci-taskcluster-export
@@ -620,6 +634,20 @@ workflows:
620634
branches:
621635
only: main
622636

637+
job-wclouser-fxa-db-counts:
638+
jobs:
639+
- build-job-wclouser-fxa-db-counts
640+
- gcp-gcr/build-and-push-image:
641+
context: data-eng-airflow-gcr
642+
docker-context: jobs/wclouser-fxa-db-counts/
643+
path: jobs/wclouser-fxa-db-counts/
644+
image: wclouser-fxa-db-counts_docker_etl
645+
requires:
646+
- build-job-wclouser-fxa-db-counts
647+
filters:
648+
branches:
649+
only: main
650+
623651
job-webcompat-kb:
624652
jobs:
625653
- build-job-webcompat-kb

Diff for: jobs/wclouser-fxa-db-counts/.dockerignore

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.ci_job.yaml
2+
.ci_workflow.yaml
3+
.DS_Store
4+
*.pyc
5+
.pytest_cache/
6+
__pycache__/
7+
venv/

Diff for: jobs/wclouser-fxa-db-counts/.flake8

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[flake8]
2+
max-line-length = 88

Diff for: jobs/wclouser-fxa-db-counts/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.DS_Store
2+
*.pyc
3+
__pycache__/
4+
venv/

Diff for: jobs/wclouser-fxa-db-counts/Dockerfile

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
FROM python:3.8
2+
MAINTAINER Wil Clouser <[email protected]>
3+
4+
# https://github.com/mozilla-services/Dockerflow/blob/master/docs/building-container.md
5+
ARG USER_ID="10001"
6+
ARG GROUP_ID="app"
7+
ARG HOME="/app"
8+
9+
ENV HOME=${HOME}
10+
RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \
11+
useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir ${HOME} ${GROUP_ID}
12+
13+
WORKDIR ${HOME}
14+
15+
RUN pip install --upgrade pip
16+
17+
COPY requirements.txt requirements.txt
18+
RUN pip install -r requirements.txt
19+
20+
COPY . .
21+
22+
RUN pip install .
23+
24+
# Drop root and change ownership of the application folder to the user
25+
RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME}
26+
USER ${USER_ID}

Diff for: jobs/wclouser-fxa-db-counts/README.md

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# wclouser-fxa-db-counts
2+
3+
This is a simple job to enable us to identify trends within accounts data. E.g. "How many inactive accounts are there?"
4+
5+
## Usage
6+
7+
This script is intended to be run in a docker container.
8+
Build the docker image with:
9+
10+
```sh
11+
docker build -t wclouser_fxa_db_counts .
12+
```
13+
14+
To run locally, install dependencies with:
15+
16+
```sh
17+
pip install -r requirements.txt
18+
```
19+
20+
Run the script with
21+
22+
```sh
23+
python3 -m wclouser_fxa_db_counts.main
24+
```

Diff for: jobs/wclouser-fxa-db-counts/ci_job.yaml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
build-job-wclouser-fxa-db-counts:
2+
docker:
3+
- image: << pipeline.parameters.git-image >>
4+
steps:
5+
- checkout
6+
- compare-branch:
7+
pattern: ^jobs/wclouser-fxa-db-counts/
8+
- setup_remote_docker:
9+
version: << pipeline.parameters.docker-version >>
10+
- run:
11+
name: Build Docker image
12+
command: docker build -t app:build jobs/wclouser-fxa-db-counts/

Diff for: jobs/wclouser-fxa-db-counts/ci_workflow.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
job-wclouser-fxa-db-counts:
2+
jobs:
3+
- build-job-wclouser-fxa-db-counts
4+
- gcp-gcr/build-and-push-image:
5+
context: data-eng-airflow-gcr
6+
docker-context: jobs/wclouser-fxa-db-counts/
7+
path: jobs/wclouser-fxa-db-counts/
8+
image: wclouser-fxa-db-counts_docker_etl
9+
requires:
10+
- build-job-wclouser-fxa-db-counts
11+
filters:
12+
branches:
13+
only: main

Diff for: jobs/wclouser-fxa-db-counts/requirements.txt

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
click==8.0.4
2+
pytest==6.0.2
3+
pytest-black==0.3.11
4+
pytest-flake8==1.0.6
5+
google-cloud-bigquery==3.26.0

Diff for: jobs/wclouser-fxa-db-counts/setup.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/usr/bin/env python
2+
3+
from setuptools import setup, find_packages
4+
5+
readme = open("README.md").read()
6+
7+
setup(
8+
name="wclouser_fxa_db_counts",
9+
version="0.1.0",
10+
author="Wil Clouser <[email protected]>",
11+
packages=find_packages(include=["docker_etl"]), # TODO: change placeholder name
12+
long_description=readme,
13+
include_package_data=True,
14+
license="MPL 2.0",
15+
)
+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import click
2+
from datetime import datetime
3+
from google.cloud import bigquery
4+
5+
6+
@click.command()
7+
@click.option("--bq_project_id", help="GCP BigQuery project id", show_default=True, default="moz-fx-fxa-prod")
8+
def main(bq_project_id):
9+
10+
query = """
11+
SELECT 'account_customers' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_customers_v1`
12+
UNION ALL
13+
SELECT 'account_groups' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_groups_v1`
14+
UNION ALL
15+
SELECT 'account_reset_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_account_reset_tokens_v1`
16+
UNION ALL
17+
SELECT 'accounts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`
18+
UNION ALL
19+
SELECT 'carts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_carts_v1`
20+
UNION ALL
21+
SELECT 'device_commands' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_device_commands_v1`
22+
UNION ALL
23+
SELECT 'devices' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_devices_v1`
24+
UNION ALL
25+
SELECT 'email_bounces' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_email_bounces_v1`
26+
UNION ALL
27+
SELECT 'emails' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`
28+
UNION ALL
29+
SELECT 'linked_accounts' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
30+
UNION ALL
31+
SELECT 'oauth_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_codes_v1`
32+
UNION ALL
33+
SELECT 'oauth_refresh_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_refresh_tokens_v1`
34+
UNION ALL
35+
SELECT 'oauth_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_tokens_v1`
36+
UNION ALL
37+
SELECT 'password_change_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_password_change_tokens_v1`
38+
UNION ALL
39+
SELECT 'password_forgot_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_password_forgot_tokens_v1`
40+
UNION ALL
41+
SELECT 'paypal_customers' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_paypal_customers_v1`
42+
UNION ALL
43+
SELECT 'recovery_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_recovery_codes_v1`
44+
UNION ALL
45+
SELECT 'security_events' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_security_events_v1`
46+
UNION ALL
47+
SELECT 'sent_emails' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_sent_emails_v1`
48+
UNION ALL
49+
SELECT 'session_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_session_tokens_v1`
50+
UNION ALL
51+
SELECT 'signin_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_signin_codes_v1`
52+
UNION ALL
53+
SELECT 'totp' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_totp_v1`
54+
UNION ALL
55+
SELECT 'unblock_codes' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_unblock_codes_v1`
56+
UNION ALL
57+
SELECT 'unverified_tokens' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_unverified_tokens_v1`
58+
UNION ALL
59+
SELECT 'verification_reminders' AS table_name, COUNT(*) AS total_rows FROM `moz-fx-data-shared-prod.accounts_db_external.fxa_verification_reminders_v1`
60+
UNION ALL
61+
(SELECT
62+
"accounts_with_secondary_emails" AS table_name,
63+
COUNT(DISTINCT accounts.uid) AS total_rows
64+
FROM
65+
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts
66+
JOIN
67+
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails
68+
ON
69+
accounts.uid = emails.uid
70+
WHERE
71+
emails.isPrimary = FALSE
72+
)
73+
UNION ALL
74+
(SELECT
75+
"accounts_with_unverified_emails" AS table_name,
76+
COUNT(DISTINCT accounts.uid) AS total_rows
77+
FROM
78+
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts
79+
JOIN
80+
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails
81+
ON
82+
accounts.uid = emails.uid
83+
WHERE
84+
emails.isVerified = FALSE
85+
)
86+
UNION ALL
87+
(
88+
SELECT
89+
"accounts_linked_to_google" AS table_name,
90+
COUNT(uid) AS total_rows
91+
FROM
92+
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
93+
WHERE
94+
providerId=1 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts
95+
)
96+
UNION ALL
97+
(
98+
SELECT
99+
"accounts_linked_to_apple" AS table_name,
100+
COUNT(uid) AS total_rows
101+
FROM
102+
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
103+
WHERE
104+
providerId=2 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts
105+
)
106+
"""
107+
108+
client = bigquery.Client(project=bq_project_id)
109+
query_job = client.query(query)
110+
111+
print("Running query:", query_job.job_id)
112+
113+
results = query_job.result()
114+
115+
rows_to_insert = [
116+
{
117+
"date": datetime.now().strftime("%Y-%m-%d"),
118+
"name": row["table_name"],
119+
"count": row["total_rows"],
120+
}
121+
for row in results
122+
]
123+
124+
# Insert rows into the new table
125+
errors = client.insert_rows_json("mozdata.analysis.wclouser_fxa_health_db_counts", rows_to_insert)
126+
127+
# Check for errors during insertion
128+
if errors:
129+
print("Errors occurred while inserting rows: ", errors)
130+
else:
131+
print("Data inserted successfully.")
132+
133+
134+
135+
if __name__ == "__main__":
136+
main()

0 commit comments

Comments
 (0)