Skip to content

Commit 8a50626

Browse files
phil-lee70Philip Lee
andauthored
feat(DEN-9914) Create hourly table for Onboarding data (#8584)
* create hourly table for onboarding data * change expiration date to only 7 days * change dag start_date to todays date --------- Co-authored-by: Philip Lee <[email protected]>
1 parent d6f0ce5 commit 8a50626

File tree

6 files changed

+381
-0
lines changed

6 files changed

+381
-0
lines changed

dags.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,26 @@ bqetl_messaging_system:
248248
tags:
249249
- impact/tier_3
250250

251+
bqetl_messaging_system_hourly:
252+
default_args:
253+
depends_on_past: false
254+
email:
255+
256+
257+
email_on_failure: true
258+
email_on_retry: false
259+
end_date: null
260+
261+
retries: 1
262+
retry_delay: 10m
263+
start_date: '2025-12-15'
264+
description: Hourly tables for onboarding reporting
265+
repo: bigquery-etl
266+
schedule_interval: hourly
267+
catchup: true
268+
tags:
269+
- impact/tier_1
270+
251271
bqetl_activity_stream:
252272
schedule_interval: 0 2 * * *
253273
description: |
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
friendly_name: Onboarding Hourly
2+
description: |-
3+
Contains CFR specific data extracted from messaging_system_v1 at an hourly cadence
4+
owners:
5+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE OR REPLACE VIEW
2+
`moz-fx-data-shared-prod.firefox_desktop.onboarding_hourly`
3+
AS
4+
SELECT
5+
*
6+
FROM
7+
`moz-fx-data-shared-prod.firefox_desktop_derived.onboarding_hourly_v2`
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
friendly_name: Onboarding Hourly
2+
description: |-
3+
Contains CFR specific data extracted from messaging_system_v1 at an hourly cadence
4+
owners:
5+
6+
labels:
7+
incremental: true
8+
schedule: hourly
9+
owner1: phlee
10+
table_type: client_level
11+
dag: bqetl_messaging_system_hourly
12+
scheduling:
13+
dag_name: bqetl_messaging_system_hourly
14+
date_partition_parameter: null
15+
# We reprocess the same day every hour up until 1:00 the following day, to give
16+
# the live data time to come in
17+
destination_table: >-
18+
onboarding_hourly_v2${{
19+
(execution_date - macros.timedelta(hours=1)).strftime("%Y%m%d")
20+
}}
21+
parameters:
22+
- >-
23+
submission_date:DATE:{{
24+
(execution_date - macros.timedelta(hours=1)).strftime('%Y-%m-%d')
25+
}}
26+
bigquery:
27+
time_partitioning:
28+
type: day
29+
field: submission_timestamp
30+
require_partition_filter: true
31+
expiration_days: 7.0
32+
range_partitioning: null
33+
clustering:
34+
fields:
35+
- normalized_channel
36+
- sample_id
37+
require_column_descriptions: false
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
WITH messaging_system_live_deduped AS (
2+
SELECT
3+
*
4+
FROM
5+
`moz-fx-data-shared-prod.firefox_desktop_live.messaging_system_v1`
6+
WHERE
7+
DATE(submission_timestamp) = @submission_date
8+
AND metrics.string.messaging_system_ping_type IS NULL
9+
QUALIFY
10+
ROW_NUMBER() OVER (
11+
PARTITION BY
12+
DATE(submission_timestamp),
13+
document_id
14+
ORDER BY
15+
submission_timestamp
16+
) = 1
17+
)
18+
SELECT
19+
submission_timestamp,
20+
additional_properties,
21+
metrics.string.messaging_system_addon_version AS addon_version,
22+
metrics.uuid.messaging_system_client_id AS client_id,
23+
document_id,
24+
metrics.string.messaging_system_event AS event,
25+
metrics.text2.messaging_system_event_context AS event_context,
26+
metrics.string.messaging_system_event_page AS event_page,
27+
metrics.string.messaging_system_event_reason AS event_reason,
28+
metrics.string.messaging_system_event_source AS event_source,
29+
metrics.string.messaging_system_locale AS locale,
30+
metrics.text2.messaging_system_message_id AS message_id,
31+
metadata,
32+
normalized_app_name,
33+
normalized_channel,
34+
normalized_country_code,
35+
normalized_os,
36+
normalized_os_version,
37+
client_info.app_channel AS release_channel,
38+
sample_id,
39+
client_info.app_display_version AS version,
40+
metrics.uuid.messaging_system_browser_session_id AS browser_session_id,
41+
ping_info.experiments AS experiments,
42+
STRUCT(
43+
metrics.string.messaging_system_attribution_campaign AS campaign,
44+
metrics.string.messaging_system_attribution_content AS content,
45+
metrics.string.messaging_system_attribution_experiment AS experiment,
46+
metrics.string.messaging_system_attribution_medium AS medium,
47+
metrics.string.messaging_system_attribution_source AS source,
48+
metrics.string.messaging_system_attribution_ua AS ua,
49+
metrics.string.messaging_system_attribution_variation AS variation,
50+
metrics.string.messaging_system_attribution_dltoken AS dltoken,
51+
metrics.string.messaging_system_attribution_dlsource AS dlsource,
52+
metrics.string.messaging_system_attribution_msstoresignedin AS msstoresignedin
53+
) AS attribution
54+
FROM
55+
messaging_system_live_deduped
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
fields:
2+
- description: Timestamp when the ping is received on the server side.
3+
name: submission_timestamp
4+
type: TIMESTAMP
5+
mode: NULLABLE
6+
- description: A JSON string containing any payload properties not present in the
7+
schema
8+
name: additional_properties
9+
type: STRING
10+
mode: NULLABLE
11+
- description: Addon Version
12+
name: addon_version
13+
type: STRING
14+
mode: NULLABLE
15+
- description: A unique identifier (UUID) for the client.
16+
name: client_id
17+
type: STRING
18+
mode: NULLABLE
19+
- description: The document ID specified in the URI when the client sent this message.
20+
name: document_id
21+
type: STRING
22+
mode: NULLABLE
23+
- name: event
24+
type: STRING
25+
mode: NULLABLE
26+
- description: A string that describes the context about this event
27+
name: event_context
28+
type: STRING
29+
mode: NULLABLE
30+
- description: The event_context's page. Almost always "about:welcome".
31+
name: event_page
32+
type: STRING
33+
mode: NULLABLE
34+
- description: The event_context's reason. Likely something like "welcome-window-closed"
35+
or "app-shut-down".
36+
name: event_reason
37+
type: STRING
38+
mode: NULLABLE
39+
- description: The event_context's source. Likely something like "primary_button".
40+
name: event_source
41+
type: STRING
42+
mode: NULLABLE
43+
- name: locale
44+
type: STRING
45+
mode: NULLABLE
46+
description: Set of language- and/or country-based preferences for a user interface.
47+
- name: message_id
48+
type: STRING
49+
mode: NULLABLE
50+
description: Message ID
51+
- name: metadata
52+
type: RECORD
53+
mode: NULLABLE
54+
fields:
55+
- description: Results of a geographic lookup based on the client's IP address
56+
name: geo
57+
type: RECORD
58+
mode: NULLABLE
59+
fields:
60+
- name: city
61+
type: STRING
62+
mode: NULLABLE
63+
- description: An ISO 3166-1 alpha-2 country code
64+
name: country
65+
type: STRING
66+
mode: NULLABLE
67+
- description: The specific geo database version used for this lookup
68+
name: db_version
69+
type: STRING
70+
mode: NULLABLE
71+
- description: First major country subdivision, typically a state, province, or
72+
county
73+
name: subdivision1
74+
type: STRING
75+
mode: NULLABLE
76+
- description: Second major country subdivision; not applicable for most countries
77+
name: subdivision2
78+
type: STRING
79+
mode: NULLABLE
80+
- name: header
81+
type: RECORD
82+
mode: NULLABLE
83+
fields:
84+
- description: Date HTTP header
85+
mode: NULLABLE
86+
name: date
87+
type: STRING
88+
- description: DNT (Do Not Track) HTTP header
89+
mode: NULLABLE
90+
name: dnt
91+
type: STRING
92+
- description: X-Debug-Id HTTP header
93+
mode: NULLABLE
94+
name: x_debug_id
95+
type: STRING
96+
- description: X-PingSender-Version HTTP header
97+
mode: NULLABLE
98+
name: x_pingsender_version
99+
type: STRING
100+
- description: X-Source-Tags HTTP header
101+
mode: NULLABLE
102+
name: x_source_tags
103+
type: STRING
104+
- description: X-Telemetry-Agent HTTP header
105+
mode: NULLABLE
106+
name: x_telemetry_agent
107+
type: STRING
108+
- description: X-Foxsec-IP-Reputation header
109+
mode: NULLABLE
110+
name: x_foxsec_ip_reputation
111+
type: STRING
112+
- description: X-LB-Tags HTTP header
113+
mode: NULLABLE
114+
name: x_lb_tags
115+
type: STRING
116+
- description: Results of ISP lookup based on the client's IP address
117+
name: isp
118+
type: RECORD
119+
mode: NULLABLE
120+
fields:
121+
- description: The specific geo ISP database version used for this lookup
122+
mode: NULLABLE
123+
name: db_version
124+
type: STRING
125+
- description: The name of the ISP associated with the client's IP address
126+
mode: NULLABLE
127+
name: name
128+
type: STRING
129+
- description: The name of a specific business entity associated with the client's
130+
IP address when available; otherwise the ISP name
131+
mode: NULLABLE
132+
name: organization
133+
type: STRING
134+
- description: Parsed components of the client's user agent string
135+
name: user_agent
136+
type: RECORD
137+
mode: NULLABLE
138+
fields:
139+
- name: browser
140+
type: STRING
141+
mode: NULLABLE
142+
- name: os
143+
type: STRING
144+
mode: NULLABLE
145+
- name: version
146+
type: STRING
147+
mode: NULLABLE
148+
- description: Set to "Other" if this message contained an unrecognized app name
149+
mode: NULLABLE
150+
name: normalized_app_name
151+
type: STRING
152+
- description: The normalized channel the application is being distributed on.
153+
mode: NULLABLE
154+
name: normalized_channel
155+
type: STRING
156+
- description: Code of the country in which the activity took place, as determined
157+
by the IP geolocation. Unknown or NULL values are normally stored as '??'.
158+
mode: NULLABLE
159+
name: normalized_country_code
160+
type: STRING
161+
- description: The normalized name of the operating system running at the client.
162+
mode: NULLABLE
163+
name: normalized_os
164+
type: STRING
165+
- name: normalized_os_version
166+
mode: NULLABLE
167+
type: STRING
168+
- name: release_channel
169+
mode: NULLABLE
170+
type: STRING
171+
- name: sample_id
172+
mode: NULLABLE
173+
type: INTEGER
174+
description: A number, 0-99, that samples by client_id and allows filtering data
175+
for analysis. It is a pipeline-generated artifact that should match between pings.
176+
- name: version
177+
mode: NULLABLE
178+
type: STRING
179+
description: User visible version string (e.g. "1.0.3") for the browser.
180+
- description: A mirror of the browser sessionId, as defined in
181+
https://github.com/mozilla-services/mozilla-pipeline-schemas/blob/main/schemas/telemetry/main/main.4.schema.json
182+
name: browser_session_id
183+
type: STRING
184+
mode: NULLABLE
185+
- name: experiments
186+
type: RECORD
187+
mode: REPEATED
188+
description: Experiment Information
189+
fields:
190+
- name: key
191+
type: STRING
192+
mode: NULLABLE
193+
- name: value
194+
type: RECORD
195+
mode: NULLABLE
196+
fields:
197+
- name: branch
198+
type: STRING
199+
mode: NULLABLE
200+
- name: extra
201+
type: RECORD
202+
mode: NULLABLE
203+
fields:
204+
- name: enrollment_id
205+
type: STRING
206+
mode: NULLABLE
207+
- name: type
208+
type: STRING
209+
mode: NULLABLE
210+
- name: attribution
211+
type: RECORD
212+
mode: NULLABLE
213+
fields:
214+
- description: Identifier of the particular campaign that led to the download of
215+
the product.
216+
name: campaign
217+
type: STRING
218+
mode: NULLABLE
219+
- description: Identifier to indicate the particular link within a campaign.
220+
name: content
221+
type: STRING
222+
mode: NULLABLE
223+
- description: Funnel experiment parameters, see bug 1567339
224+
name: experiment
225+
type: STRING
226+
mode: NULLABLE
227+
- description: Category of the source, such as 'organic' for a search engine.
228+
name: medium
229+
type: STRING
230+
mode: NULLABLE
231+
- description: Referring partner domain, when install happens via a known partner.
232+
name: source
233+
type: STRING
234+
mode: NULLABLE
235+
- description: Derived user agent, see bug 1595063
236+
name: ua
237+
type: STRING
238+
mode: NULLABLE
239+
- description: Funnel experiment parameters, see bug 1567339
240+
name: variation
241+
type: STRING
242+
mode: NULLABLE
243+
- description: Unique token created at Firefox download time, see bug 1757451
244+
name: dltoken
245+
type: STRING
246+
mode: NULLABLE
247+
- description: Identifier that indicates where installations of Firefox originate,
248+
see bug 1827238
249+
name: dlsource
250+
type: STRING
251+
mode: NULLABLE
252+
- description: Either the string "true" or the string "false" to indicate whether
253+
the attributed install came from the Microsoft store and, if so, whether the
254+
user was signed in at the time.
255+
name: msstoresignedin
256+
type: STRING
257+
mode: NULLABLE

0 commit comments

Comments
 (0)