-
Notifications
You must be signed in to change notification settings - Fork 311
Expand file tree
/
Copy pathreach-and-frequency.sql
More file actions
218 lines (203 loc) · 7 KB
/
reach-and-frequency.sql
File metadata and controls
218 lines (203 loc) · 7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
-- setup warehouse, database and schema
USE ROLE ACCOUNTADMIN;
CREATE WAREHOUSE R_F_DEMO_WH WITH WAREHOUSE_SIZE='XSmall' STATEMENT_TIMEOUT_IN_SECONDS=15 STATEMENT_QUEUED_TIMEOUT_IN_SECONDS=15;
USE WAREHOUSE R_F_DEMO_WH;
CREATE DATABASE R_F_DEMO;
CREATE SCHEMA R_F_DEMO.DEMO;
-- create table to hold impressions
use R_F_DEMO.DEMO;
create or replace table IMPRESSIONS(event_time TIMESTAMP_TZ,
campaign_id VARCHAR(10),
placement_id VARCHAR(10),
creative_id VARCHAR(10),
user_id VARCHAR(40));
-- generate test data
insert into IMPRESSIONS
select
dateadd(second, uniform(1, 2592000, random(1)), ('2022-09-01'::timestamp)) as event_time,
'174631' as campaign_id
,uniform(200001,202000,random(2))::varchar(10) as placement_id
,uniform(300001,301000,random(3))::varchar(10) as creative_id
,sha1(uniform(1, 3500000, random(4)))::varchar(40) as user_id
from table(generator(rowcount=>20000000));
-- verify data
select count(1) from IMPRESSIONS where campaign_id='174631';
select count(distinct(placement_id)) from IMPRESSIONS where campaign_id='174631';
select count(distinct(user_id)) from IMPRESSIONS where campaign_id='174631';
-- calculate cumulative impressions by day
WITH DAILY_IMPRESSIONS AS
(
SELECT TO_DATE(event_time) as day,
COUNT(1) as day_impressions
FROM
IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
select
day,
sum(day_impressions) over (order by day asc rows between unbounded preceding and current row) as CUMULATIVE_IMPRESSIONS
from DAILY_IMPRESSIONS
order by 1;
-- calculate cumulative uniques by day
WITH firstseen AS (
SELECT user_id, MIN(TO_DATE(event_time)) firstday
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
SELECT DISTINCT firstday as day, COUNT(user_id) OVER (ORDER BY firstday) daily_cumulative_uniques
FROM firstseen
ORDER BY 1;
-- show cumulativer impressions and uniques side-by-side to understand diminishing returns
WITH DAILY_CUMULATIVE_IMPRESSIONS AS (
WITH DAILY_IMPRESSIONS AS
(
SELECT TO_DATE(event_time) as day,
COUNT(1) as day_impressions
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
select
day,
sum(day_impressions) over (order by day asc rows between unbounded preceding and current row) as CUMULATIVE_IMPRESSIONS
from DAILY_IMPRESSIONS
),
DAILY_CUMULATIVE_UNIQUES AS
(
WITH firstseen AS
(
SELECT user_id, MIN(TO_DATE(event_time)) firstday
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
SELECT DISTINCT firstday as day, COUNT(user_id) OVER (ORDER BY firstday) CUMULATIVE_UNIQUES
FROM firstseen
ORDER BY 1
)
SELECT dci.DAY, dci.CUMULATIVE_IMPRESSIONS, dcu.CUMULATIVE_UNIQUES
FROM DAILY_CUMULATIVE_IMPRESSIONS dci
JOIN DAILY_CUMULATIVE_UNIQUES dcu on dcu.day=dci.day
ORDER BY 1;
-- show cumulative impressions and uniques side-by-side to understand diminishing returns, with minimum of 3 impressions
WITH DAILY_CUMULATIVE_IMPRESSIONS AS (
WITH DAILY_IMPRESSIONS AS
(
SELECT TO_DATE(event_time) as day,
COUNT(1) as day_impressions
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
select
day,
sum(day_impressions) over (order by day asc rows between unbounded preceding and current row) as CUMULATIVE_IMPRESSIONS
from DAILY_IMPRESSIONS
),
DAILY_CUMULATIVE_UNIQUES_MIN_3 AS
(
WITH firstseen AS
(
SELECT user_id, MIN(TO_DATE(event_time)) firstday
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
HAVING count(1) > 2
)
SELECT DISTINCT firstday as day, COUNT(user_id) OVER (ORDER BY firstday) CUMULATIVE_UNIQUES
FROM firstseen
ORDER BY 1
)
SELECT dci.DAY, dci.CUMULATIVE_IMPRESSIONS, dcu.CUMULATIVE_UNIQUES
FROM DAILY_CUMULATIVE_IMPRESSIONS dci
JOIN DAILY_CUMULATIVE_UNIQUES_MIN_3 dcu on dcu.day=dci.day
ORDER BY 1;
-- show cumulative impressions and uniques, comparing to minimum of 3 impressions
WITH DAILY_CUMULATIVE_IMPRESSIONS AS (
WITH DAILY_IMPRESSIONS AS
(
SELECT TO_DATE(event_time) as day,
COUNT(1) as day_impressions
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
select
day,
sum(day_impressions) over (order by day asc rows between unbounded preceding and current row) as CUMULATIVE_IMPRESSIONS
from DAILY_IMPRESSIONS
),
DAILY_CUMULATIVE_UNIQUES_MIN_3 AS
(
WITH firstseen AS
(
SELECT user_id, MIN(TO_DATE(event_time)) firstday
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
HAVING count(1) > 2
)
SELECT DISTINCT firstday as day, COUNT(user_id) OVER (ORDER BY firstday) CUMULATIVE_UNIQUES
FROM firstseen
ORDER BY 1
),
DAILY_CUMULATIVE_UNIQUES AS
(
WITH firstseen AS
(
SELECT user_id, MIN(TO_DATE(event_time)) firstday
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1
)
SELECT DISTINCT firstday as day, COUNT(user_id) OVER (ORDER BY firstday) CUMULATIVE_UNIQUES
FROM firstseen
ORDER BY 1
)
SELECT dci.DAY, dci.CUMULATIVE_IMPRESSIONS, dcu.CUMULATIVE_UNIQUES, dcu3.CUMULATIVE_UNIQUES as "UNIQUES MIN 3 IMPRESSIONS"
FROM DAILY_CUMULATIVE_IMPRESSIONS dci
JOIN DAILY_CUMULATIVE_UNIQUES dcu on dcu.day=dci.day
JOIN DAILY_CUMULATIVE_UNIQUES_MIN_3 dcu3 on dcu3.day=dci.day
ORDER BY 1;
-- basic frequency query
WITH impressions_by_user AS
(select user_id, count(1) as num_impressions
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1)
select avg(num_impressions)
from impressions_by_user;
-- see distributions of frequency
WITH impressions_by_user AS
(select user_id, count(1) as num_impressions
FROM IMPRESSIONS
WHERE campaign_id='174631'
GROUP BY 1)
select num_impressions, count(1) as num_users
from impressions_by_user
group by 1
order by 1;
-- incremental reach
-- only only both
select count(1), count(distinct user_id) from impressions where campaign_id='174631' and placement_id='201571';
select count(1), count(distinct user_id) from impressions where campaign_id='174631' and placement_id='200420';
select count(1), count(distinct user_id) from impressions where campaign_id='174631' and placement_id in ('200420','201571');
-- how many people does this placement have that the other doesnt
select count(distinct i.user_id)
from impressions i
where i.campaign_id='174631' and i.placement_id='201571'
AND i.user_id not in (select user_id from impressions i2 where i2.campaign_id='174631' and i2.placement_id='200420');
-- how many users does this placement have that no other placement does?
select count(distinct i.user_id)
from impressions i
where i.campaign_id='174631' and i.placement_id='201571'
AND i.user_id not in (select user_id from impressions i2 where i2.campaign_id='174631' and i2.placement_id<>'201571');
-- ordered placements with least unique people
select i.placement_id, count(distinct i.user_id)
from impressions i
where i.campaign_id='174631'
AND i.user_id not in (select user_id from impressions i2 where i2.campaign_id='174631' and i2.placement_id<>i.placement_id)
group by 1
order by 2 asc;