Skip to content

Commit 45fe25d

Browse files
authored
feat(DENG-9733): Create mozilla_org_derived.firefox_whatsnew_summary_v3 (#8276)
* feat(DENG-9733): Create firefox_whatsnew_summary_v3 & update view * feat(DENG-9733): Add missing column description & add edge case handling for odd Fx versions
1 parent e54a72c commit 45fe25d

File tree

4 files changed

+436
-2
lines changed

4 files changed

+436
-2
lines changed

sql/moz-fx-data-shared-prod/mozilla_org/firefox_whatsnew_summary/view.sql

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ CREATE OR REPLACE VIEW
22
`moz-fx-data-shared-prod.mozilla_org.firefox_whatsnew_summary`
33
AS
44
SELECT
5-
*
5+
wn.ga_client_id || "-" || wn.ga_session_id AS visit_identifier,
6+
s.engaged_session,
7+
wn.* EXCEPT (ga_client_id, ga_session_id)
68
FROM
7-
`moz-fx-data-shared-prod.mozilla_org_derived.firefox_whatsnew_summary_v2`
9+
`moz-fx-data-shared-prod.mozilla_org_derived.firefox_whatsnew_summary_v3` wn
10+
JOIN
11+
`moz-fx-data-shared-prod.mozilla_org_derived.ga_sessions_v3` s
12+
ON wn.ga_client_id = s.ga_client_id
13+
AND wn.ga_session_id = s.ga_session_id
14+
WHERE
15+
REGEXP_CONTAINS(s.ga_client_id || '-' || s.ga_session_id, r"^[0-9]+\.{1}[0-9]+\-{1}[0-9]+$")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
friendly_name: Firefox Whats New Page Summary V3
2+
description: |-
3+
Each row represents a page view of a Firefox What's New Page
4+
owners:
5+
6+
labels:
7+
incremental: true
8+
owner1: kwindau
9+
scheduling:
10+
dag_name: bqetl_google_analytics_derived_ga4
11+
bigquery:
12+
time_partitioning:
13+
type: day
14+
field: event_date
15+
require_partition_filter: false
16+
expiration_days: null
17+
range_partitioning: null
18+
clustering:
19+
fields:
20+
- page_location_locale
21+
references: {}
22+
require_column_descriptions: true
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
--Step 1 - get all page view events on mozilla.org for the submission date
2+
WITH all_page_view_events AS (
3+
SELECT
4+
*,
5+
CAST(
6+
(
7+
SELECT
8+
`value`
9+
FROM
10+
UNNEST(event_params)
11+
WHERE
12+
key = 'ga_session_id'
13+
LIMIT
14+
1
15+
).int_value AS STRING
16+
) AS ga_session_id,
17+
(
18+
SELECT
19+
`value`
20+
FROM
21+
UNNEST(event_params)
22+
WHERE
23+
key = 'page_location'
24+
LIMIT
25+
1
26+
).string_value AS page_location,
27+
(
28+
SELECT
29+
value.int_value
30+
FROM
31+
UNNEST(event_params)
32+
WHERE
33+
key = 'engagement_time_msec'
34+
) AS engagement_time_msec
35+
FROM
36+
`moz-fx-data-marketing-prod.analytics_313696158.events_*`
37+
WHERE
38+
_TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date)
39+
AND event_name = 'page_view'
40+
),
41+
--Step 2: Get the subset of events that are page views of the Firefox "whats new" pages
42+
-- Additionally parse out info from the whats new page path
43+
whats_new_page_page_views AS (
44+
SELECT
45+
PARSE_DATE('%Y%m%d', event_date) AS event_date,
46+
user_pseudo_id AS ga_client_id,
47+
ga_session_id,
48+
platform,
49+
device.category AS device_category,
50+
device.operating_system AS operating_system,
51+
device.operating_system_version,
52+
geo.country AS geo_country,
53+
geo.city AS geo_city,
54+
geo.region AS geo_region,
55+
geo.sub_continent AS geo_sub_continent,
56+
geo.metro AS geo_metro,
57+
device.language AS device_language,
58+
device.web_info.browser AS device_browser,
59+
device.web_info.browser_version AS device_browser_version,
60+
event_params,
61+
--traffic source = first-ever source that acquired the user_pseudo_id
62+
traffic_source.name AS traffic_source_name,
63+
traffic_source.medium AS traffic_source_medium,
64+
traffic_source.source AS traffic_source_source,
65+
--collected traffic source = source for that event (can change from event to event)
66+
collected_traffic_source.manual_campaign_id AS cts_manual_campaign_id,
67+
collected_traffic_source.manual_campaign_name AS cts_manual_campaign_name,
68+
collected_traffic_source.manual_source AS cts_manual_source,
69+
collected_traffic_source.manual_medium AS cts_manual_medium,
70+
collected_traffic_source.manual_term AS cts_manual_term,
71+
collected_traffic_source.manual_content AS cts_manual_content,
72+
collected_traffic_source.manual_source_platform AS cts_manual_source_platform,
73+
collected_traffic_source.manual_creative_format AS cts_manual_creative_format,
74+
collected_traffic_source.manual_marketing_tactic AS cts_manual_marketing_tactic,
75+
--the last source before starting their session
76+
session_traffic_source_last_click.manual_campaign.campaign_id AS session_src_manual_campaign_id,
77+
session_traffic_source_last_click.manual_campaign.campaign_name AS session_src_manual_campaign_name,
78+
session_traffic_source_last_click.manual_campaign.source AS session_src_manual_campaign_source,
79+
session_traffic_source_last_click.manual_campaign.medium AS sesssion_src_manual_campaign_medium,
80+
session_traffic_source_last_click.manual_campaign.term AS session_src_manual_campaign_term,
81+
session_traffic_source_last_click.manual_campaign.content AS session_src_manual_campaign_content,
82+
session_traffic_source_last_click.google_ads_campaign.customer_id AS session_src_customer_id,
83+
session_traffic_source_last_click.google_ads_campaign.account_name AS session_src_account_name,
84+
session_traffic_source_last_click.google_ads_campaign.campaign_id AS session_src_campaign_id,
85+
session_traffic_source_last_click.google_ads_campaign.campaign_name AS session_src_campaign_name,
86+
session_traffic_source_last_click.google_ads_campaign.ad_group_id AS session_src_ad_group_id,
87+
session_traffic_source_last_click.google_ads_campaign.ad_group_name AS session_src_ad_group_name,
88+
page_location,
89+
TRIM(
90+
SPLIT(REGEXP_REPLACE(page_location, '^https://www.mozilla.org', ''), '/')[SAFE_OFFSET(1)],
91+
'/'
92+
) AS page_location_locale,
93+
SPLIT(REGEXP_REPLACE(page_location, '^https://www.mozilla.org', ''), '/')[
94+
SAFE_OFFSET(3)
95+
] AS page_level_2,
96+
REGEXP_EXTRACT(page_location, r'[?&]oldversion=([^&]+)') AS oldversion,
97+
REGEXP_EXTRACT(page_location, r'[?&]newversion=([^&]+)') AS newversion,
98+
engagement_time_msec
99+
FROM
100+
all_page_view_events
101+
WHERE
102+
LOWER(page_location) LIKE '%whatsnew%'
103+
AND LOWER(
104+
SPLIT(REGEXP_REPLACE(page_location, '^https://www.mozilla.org', ''), '/')[SAFE_OFFSET(2)]
105+
) = 'firefox'
106+
)
107+
SELECT
108+
wnp.*,
109+
mozfun.norm.browser_version_info(wnp.page_level_2) AS page_level_2_version_info,
110+
--handling for weird edge cases that break version parsing
111+
CASE
112+
WHEN wnp.oldversion LIKE '999999999999999999999999%'
113+
THEN mozfun.norm.browser_version_info(NULL)
114+
ELSE mozfun.norm.browser_version_info(wnp.oldversion)
115+
END AS old_version_version_info,
116+
mozfun.norm.browser_version_info(wnp.newversion) AS new_version_version_info
117+
FROM
118+
whats_new_page_page_views wnp

0 commit comments

Comments
 (0)