Skip to content

Commit 72dfbd4

Browse files
author
CircleCI generate-sql job
committed
Auto-push due to change on main branch [ci skip]
1 parent d63abbf commit 72dfbd4

File tree

6 files changed

+367
-0
lines changed

6 files changed

+367
-0
lines changed

dags/bqetl_google_analytics_derived_ga4.py

+13
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,21 @@
103103
depends_on_past=False,
104104
)
105105

106+
ga_derived__www_site_page_metrics__v2 = bigquery_etl_query(
107+
task_id="ga_derived__www_site_page_metrics__v2",
108+
destination_table="www_site_page_metrics_v2",
109+
dataset_id="ga_derived",
110+
project_id="moz-fx-data-marketing-prod",
111+
112+
113+
date_partition_parameter="submission_date",
114+
depends_on_past=False,
115+
)
116+
106117
ga_derived__www_site_events_metrics__v2.set_upstream(ga_derived__www_site_hits__v2)
107118

108119
ga_derived__www_site_landing_page_metrics__v2.set_upstream(
109120
ga_derived__www_site_hits__v2
110121
)
122+
123+
ga_derived__www_site_page_metrics__v2.set_upstream(ga_derived__www_site_hits__v2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
friendly_name: Www Site Page Metrics
2+
description: |-
3+
Please provide a description for the query
4+
owners: []
5+
labels: {}
6+
bigquery: null
7+
workgroup_access:
8+
- role: roles/bigquery.dataViewer
9+
members:
10+
- workgroup:mozilla-confidential
11+
references:
12+
view.sql:
13+
- moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2
14+
deprecated: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-marketing-prod.ga.www_site_page_metrics`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
friendly_name: WWW Site Page Metrics V2
2+
description: |-
3+
Aggregated metrics per page on www.mozilla.org from Google Analytics 4
4+
owners:
5+
6+
labels:
7+
incremental: true
8+
owner1: kwindau
9+
dag: bqetl_google_analytics_derived_ga4
10+
scheduling:
11+
dag_name: bqetl_google_analytics_derived_ga4
12+
bigquery:
13+
time_partitioning:
14+
type: day
15+
field: date
16+
require_partition_filter: true
17+
expiration_days: null
18+
clustering:
19+
fields:
20+
- page_name
21+
- locale
22+
- country
23+
- medium
24+
workgroup_access:
25+
- role: roles/bigquery.dataViewer
26+
members:
27+
- workgroup:mozilla-confidential
28+
references: {}
29+
deprecated: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
--first, calculate the next page view's start time relative to when the session started in seconds using lead
2+
WITH page_view_staging AS (
3+
SELECT
4+
*,
5+
LEAD(hit_time) OVER (
6+
PARTITION BY
7+
full_visitor_id,
8+
visit_start_time
9+
ORDER BY
10+
hit_time
11+
) AS next_pageview,
12+
FROM
13+
`moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2`
14+
WHERE
15+
date = @submission_date
16+
AND hit_type = 'PAGE'
17+
),
18+
--now, subtract current page view start time from next page view start time to get time on current page
19+
page_views_only AS (
20+
SELECT
21+
a.date,
22+
a.page_path AS page,
23+
a.page_path_level1 AS locale,
24+
a.page_name,
25+
a.page_level_1,
26+
a.page_level_2,
27+
a.page_level_3,
28+
a.page_level_4,
29+
a.page_level_5,
30+
a.device_category,
31+
a.operating_system,
32+
a.language,
33+
a.browser,
34+
a.browser_version,
35+
a.country,
36+
a.source,
37+
a.medium,
38+
a.campaign,
39+
a.ad_content,
40+
SUM(a.next_pageview - a.hit_time) AS total_time_on_page
41+
FROM
42+
page_view_staging AS a
43+
GROUP BY
44+
a.date,
45+
a.page_path,
46+
a.page_path_level1,
47+
a.page_name,
48+
a.page_level_1,
49+
a.page_level_2,
50+
a.page_level_3,
51+
a.page_level_4,
52+
a.page_level_5,
53+
a.device_category,
54+
a.operating_system,
55+
a.language,
56+
a.browser,
57+
a.browser_version,
58+
a.country,
59+
a.source,
60+
a.medium,
61+
a.campaign,
62+
a.ad_content
63+
),
64+
all_events_staging AS (
65+
SELECT
66+
a.date,
67+
a.page_path AS page,
68+
a.page_path_level1 AS locale,
69+
a.page_name,
70+
a.page_level_1,
71+
a.page_level_2,
72+
a.page_level_3,
73+
a.page_level_4,
74+
a.page_level_5,
75+
a.device_category,
76+
a.operating_system,
77+
a.language,
78+
a.browser,
79+
a.browser_version,
80+
a.country,
81+
a.source,
82+
a.medium,
83+
a.campaign,
84+
a.ad_content,
85+
COUNTIF(a.event_name = 'page_view') AS pageviews,
86+
COUNT(
87+
DISTINCT(CASE WHEN a.event_name = 'page_view' THEN a.visit_identifier ELSE NULL END)
88+
) AS unique_pageviews,
89+
SUM(a.entrances) AS entrances,
90+
SUM(a.exits) AS exits,
91+
COUNTIF(event_name = 'page_view' AND is_exit IS FALSE) AS non_exit_pageviews,
92+
COUNTIF(hit_type = 'EVENT') AS total_events,
93+
COUNT(
94+
DISTINCT(CASE WHEN hit_type = 'EVENT' THEN visit_identifier ELSE NULL END)
95+
) AS unique_events,
96+
COUNT(
97+
DISTINCT(CASE WHEN single_page_session IS TRUE THEN visit_identifier ELSE NULL END)
98+
) AS single_page_sessions,
99+
COUNT(
100+
DISTINCT(
101+
CASE
102+
WHEN bounces = 1
103+
AND event_name = 'page_view'
104+
THEN visit_identifier
105+
ELSE NULL
106+
END
107+
)
108+
) AS bounces
109+
FROM
110+
`moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2` AS a
111+
WHERE
112+
date = @submission_date
113+
GROUP BY
114+
a.date,
115+
a.page_path,
116+
a.page_path_level1,
117+
a.page_name,
118+
a.page_level_1,
119+
a.page_level_2,
120+
a.page_level_3,
121+
a.page_level_4,
122+
a.page_level_5,
123+
a.device_category,
124+
a.operating_system,
125+
a.language,
126+
a.browser,
127+
a.browser_version,
128+
a.country,
129+
a.source,
130+
a.medium,
131+
a.campaign,
132+
a.ad_content
133+
)
134+
--join it all together to get everything plus total time on each page
135+
SELECT
136+
a.date,
137+
a.page,
138+
a.locale,
139+
a.page_name,
140+
a.page_level_1,
141+
a.page_level_2,
142+
a.page_level_3,
143+
a.page_level_4,
144+
a.page_level_5,
145+
a.device_category,
146+
a.operating_system,
147+
a.language,
148+
a.browser,
149+
a.browser_version,
150+
a.country,
151+
a.source,
152+
a.medium,
153+
a.campaign,
154+
a.ad_content,
155+
a.pageviews,
156+
a.unique_pageviews,
157+
a.entrances,
158+
a.exits,
159+
a.non_exit_pageviews,
160+
b.total_time_on_page,
161+
a.total_events,
162+
a.unique_events,
163+
a.single_page_sessions,
164+
a.bounces,
165+
FROM
166+
all_events_staging a
167+
FULL OUTER JOIN
168+
page_views_only b
169+
ON a.date = b.date
170+
AND COALESCE(a.page, '') = COALESCE(b.page, '')
171+
AND COALESCE(a.locale, '') = COALESCE(b.locale, '')
172+
AND COALESCE(a.page_name, '') = COALESCE(b.page_name, '')
173+
AND COALESCE(a.page_level_1, '') = COALESCE(b.page_level_1, '')
174+
AND COALESCE(a.page_level_2, '') = COALESCE(b.page_level_2, '')
175+
AND COALESCE(a.page_level_3, '') = COALESCE(b.page_level_3, '')
176+
AND COALESCE(a.page_level_4, '') = COALESCE(b.page_level_4, '')
177+
AND COALESCE(a.page_level_5, '') = COALESCE(b.page_level_5, '')
178+
AND COALESCE(a.device_category, '') = COALESCE(b.device_category, '')
179+
AND COALESCE(a.operating_system, '') = COALESCE(b.operating_system, '')
180+
AND COALESCE(a.language, '') = COALESCE(b.language, '')
181+
AND COALESCE(a.browser, '') = COALESCE(b.browser, '')
182+
AND COALESCE(a.browser_version, '') = COALESCE(b.browser_version, '')
183+
AND COALESCE(a.country, '') = COALESCE(b.country, '')
184+
AND COALESCE(a.source, '') = COALESCE(b.source, '')
185+
AND COALESCE(a.medium, '') = COALESCE(b.medium, '')
186+
AND COALESCE(a.campaign, '') = COALESCE(b.campaign, '')
187+
AND COALESCE(a.ad_content, '') = COALESCE(b.ad_content, '')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
fields:
2+
- mode: NULLABLE
3+
name: date
4+
type: DATE
5+
description: The date of the visit
6+
- mode: NULLABLE
7+
name: page
8+
type: STRING
9+
description: Page
10+
- mode: NULLABLE
11+
name: locale
12+
type: STRING
13+
description: Locale
14+
- mode: NULLABLE
15+
name: page_name
16+
type: STRING
17+
description: Page Name
18+
- mode: NULLABLE
19+
name: page_level_1
20+
type: STRING
21+
description: Page Level 1
22+
- mode: NULLABLE
23+
name: page_level_2
24+
type: STRING
25+
description: Page Level 2
26+
- mode: NULLABLE
27+
name: page_level_3
28+
type: STRING
29+
description: Page Level 3
30+
- mode: NULLABLE
31+
name: page_level_4
32+
type: STRING
33+
description: Page Level 4
34+
- mode: NULLABLE
35+
name: page_level_5
36+
type: STRING
37+
description: Page Level 5
38+
- mode: NULLABLE
39+
name: device_category
40+
type: STRING
41+
description: Device Category - The device category the visitor used to visit the site
42+
- mode: NULLABLE
43+
name: operating_system
44+
type: STRING
45+
description: Operating System - The device operating system that the visitor used to visit the site
46+
- mode: NULLABLE
47+
name: language
48+
type: STRING
49+
description: Language
50+
- mode: NULLABLE
51+
name: browser
52+
type: STRING
53+
description: Browser
54+
- mode: NULLABLE
55+
name: browser_version
56+
type: STRING
57+
description: Browser Version
58+
- mode: NULLABLE
59+
name: country
60+
type: STRING
61+
description: Country
62+
- mode: NULLABLE
63+
name: source
64+
type: STRING
65+
description: Source
66+
- mode: NULLABLE
67+
name: medium
68+
type: STRING
69+
description: Medium
70+
- mode: NULLABLE
71+
name: campaign
72+
type: STRING
73+
description: Campaign
74+
- mode: NULLABLE
75+
name: ad_content
76+
type: STRING
77+
description: Ad Content
78+
- mode: NULLABLE
79+
name: pageviews
80+
type: INT64
81+
description: Page Views
82+
- mode: NULLABLE
83+
name: unique_pageviews
84+
type: INT64
85+
description: Unique Page Views
86+
- mode: NULLABLE
87+
name: entrances
88+
type: INT64
89+
description: Entrances
90+
- mode: NULLABLE
91+
name: exits
92+
type: INT64
93+
description: Exits
94+
- mode: NULLABLE
95+
name: non_exit_pageviews
96+
type: INT64
97+
description: Non Exit Page Views
98+
- mode: NULLABLE
99+
name: total_time_on_page
100+
type: FLOAT64
101+
description: Total Time On Page
102+
- mode: NULLABLE
103+
name: total_events
104+
type: INT64
105+
description: Total Events
106+
- mode: NULLABLE
107+
name: unique_events
108+
type: INT64
109+
description: Unique Events
110+
- mode: NULLABLE
111+
name: single_page_sessions
112+
type: INT64
113+
description: Single Page Sessions
114+
- mode: NULLABLE
115+
name: bounces
116+
type: INT64
117+
description: Bounces

0 commit comments

Comments
 (0)