Skip to content

Commit cca0305

Browse files
authored
Fixed mismatch in visits and unique visits on stats page (#22588)
ref https://linear.app/ghost/issue/ANAL-96 - fixed mismatch in visits and unique visits - moved away from sessions-based materialized views (still hanging around) - bounce rate needs revisions This doesn't quite fix everything, as bounce rate looks off yet. It's much better than it was and I'll follow up tomorrow/next week with the rest. We also need to clean up or consolidate the views, which I need to think about more before getting rid of what we have.
1 parent ea4d5d6 commit cca0305

20 files changed

+799
-416
lines changed

ghost/admin/app/utils/stats.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import moment from 'moment-timezone';
22

3-
export const TB_VERSION = 1;
3+
export const TB_VERSION = 3;
44

55
export const RANGE_OPTIONS = [
66
{name: 'Last 24 hours', value: 1},
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Data Source created from Pipe 'mv_hits'
2+
3+
SCHEMA >
4+
`site_uuid` LowCardinality(String),
5+
`timestamp` DateTime,
6+
`action` LowCardinality(String),
7+
`version` LowCardinality(String),
8+
`session_id` String,
9+
`member_uuid` String,
10+
`member_status` String,
11+
`post_uuid` String,
12+
`post_type` String,
13+
`location` String,
14+
`source` String,
15+
`pathname` String,
16+
`href` String,
17+
`device` String,
18+
`os` String,
19+
`browser` String
20+
21+
ENGINE "MergeTree"
22+
ENGINE_PARTITION_KEY "toYYYYMM(timestamp)"
23+
ENGINE_SORTING_KEY "site_uuid, timestamp, session_id"

ghost/web-analytics/datasources/_mv_pages.datasource

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
VERSION 1
2-
1+
VERSION 3
32
TAGS "v0"
43

54
SCHEMA >
@@ -17,6 +16,6 @@ SCHEMA >
1716
`visits` AggregateFunction(uniq, String),
1817
`pageviews` AggregateFunction(count)
1918

20-
ENGINE AggregatingMergeTree
21-
ENGINE_PARTITION_KEY toYYYYMM(date)
22-
ENGINE_SORTING_KEY site_uuid, date, device, os, browser, location, source, pathname, post_type, post_uuid
19+
ENGINE "AggregatingMergeTree"
20+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
21+
ENGINE_SORTING_KEY "site_uuid, date, device, os, browser, location, source, pathname, post_type, post_uuid"

ghost/web-analytics/datasources/_mv_sessions.datasource

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
VERSION 1
2-
1+
VERSION 3
32
TAGS "v0"
43

54
SCHEMA >
@@ -19,6 +18,6 @@ SCHEMA >
1918
`latest_view` SimpleAggregateFunction(max, DateTime),
2019
`pageviews` AggregateFunction(count)
2120

22-
ENGINE AggregatingMergeTree
23-
ENGINE_PARTITION_KEY toYYYYMM(date)
24-
ENGINE_SORTING_KEY site_uuid, date, session_id
21+
ENGINE "AggregatingMergeTree"
22+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
23+
ENGINE_SORTING_KEY "site_uuid, date, session_id"

ghost/web-analytics/datasources/_mv_sources.datasource

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
VERSION 1
2-
1+
VERSION 3
32
TAGS "v0"
43

54
SCHEMA >
@@ -18,6 +17,6 @@ SCHEMA >
1817
`post_type` SimpleAggregateFunction(any, String),
1918
`post_uuid` SimpleAggregateFunction(any, String)
2019

21-
ENGINE AggregatingMergeTree
22-
ENGINE_PARTITION_KEY toYYYYMM(date)
23-
ENGINE_SORTING_KEY site_uuid, date, session_id, device, os, browser, location, source, pathname
20+
ENGINE "AggregatingMergeTree"
21+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
22+
ENGINE_SORTING_KEY "site_uuid, date, session_id, device, os, browser, location, source, pathname"

ghost/web-analytics/datasources/fixtures/analytics_events.ndjson

Lines changed: 31 additions & 31 deletions
Large diffs are not rendered by default.

ghost/web-analytics/pipes/_hits.incl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION 1
1+
VERSION 3
22

33
TAGS "v0"
44

ghost/web-analytics/pipes/_parsed_hits.incl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION 1
1+
VERSION 3
22

33
TAGS "v0"
44

Lines changed: 112 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -1,172 +1,135 @@
1-
VERSION 1
2-
1+
VERSION 3
32
TAGS "v0"
43

5-
INCLUDE "_parsed_hits.incl"
6-
74
NODE timeseries
85
SQL >
96

107
%
11-
{% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %}
12-
with
13-
{% if defined(date_from) %}
14-
toStartOfDay(
15-
toDate(
16-
{{
17-
Date(
18-
date_from,
19-
description="Starting day for filtering a date range",
20-
required=False,
8+
{% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %}
9+
with
10+
{% if defined(date_from) %}
11+
toStartOfDay(
12+
toDate(
13+
{{
14+
Date(
15+
date_from,
16+
description="Starting day for filtering a date range",
17+
required=False,
18+
)
19+
}}
20+
)
21+
) as start,
22+
{% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start,
23+
{% end %}
24+
{% if defined(date_to) %}
25+
toStartOfDay(
26+
toDate(
27+
{{
28+
Date(
29+
date_to,
30+
description="Finishing day for filtering a date range",
31+
required=False,
32+
)
33+
}}
34+
)
35+
) as end
36+
{% else %} toStartOfDay(today()) as end
37+
{% end %}
38+
{% if _single_day %}
39+
select
40+
arrayJoin(
41+
arrayMap(
42+
x -> toDateTime(x),
43+
range(
44+
toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600
45+
)
2146
)
22-
}}
23-
)
24-
) as start,
25-
{% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start,
26-
{% end %}
27-
{% if defined(date_to) %}
28-
toStartOfDay(
29-
toDate(
30-
{{
31-
Date(
32-
date_to,
33-
description="Finishing day for filtering a date range",
34-
required=False,
47+
) as date
48+
{% else %}
49+
select
50+
arrayJoin(
51+
arrayMap(
52+
x -> toDate(x),
53+
range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600)
3554
)
36-
}}
37-
)
38-
) as end
39-
{% else %} toStartOfDay(today()) as end
40-
{% end %}
41-
{% if _single_day %}
42-
select
43-
arrayJoin(
44-
arrayMap(
45-
x -> toDateTime(x),
46-
range(
47-
toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600
48-
)
49-
)
50-
) as date
51-
{% else %}
52-
select
53-
arrayJoin(
54-
arrayMap(
55-
x -> toDate(x),
56-
range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600)
57-
)
58-
) as date
59-
{% end %}
55+
) as date
56+
{% end %}
6057

6158

6259

6360
NODE pageviews
6461
SQL >
6562

6663
%
67-
{% if defined(date_from) and day_diff(date_from, date_to) == 0 %}
68-
select
69-
site_uuid,
70-
toStartOfHour(timestamp) as date,
71-
session_id,
72-
member_status,
73-
case
74-
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
75-
then 'bot'
76-
when match(user_agent, 'android')
77-
then 'mobile-android'
78-
when match(user_agent, 'ipad|iphone|ipod')
79-
then 'mobile-ios'
80-
else 'desktop'
81-
END as device,
82-
case
83-
when match(user_agent, 'firefox')
84-
then 'firefox'
85-
when match(user_agent, 'chrome|crios')
86-
then 'chrome'
87-
when match(user_agent, 'opera')
88-
then 'opera'
89-
when match(user_agent, 'msie|trident')
90-
then 'ie'
91-
when match(user_agent, 'iphone|ipad|safari')
92-
then 'safari'
93-
else 'Unknown'
94-
END as browser,
95-
location,
96-
domainWithoutWWW(referrer) as source,
97-
pathname,
98-
case
99-
when match(user_agent, 'windows')
100-
then 'windows'
101-
when match(user_agent, 'mac')
102-
then 'macos'
103-
when match(user_agent, 'linux')
104-
then 'linux'
105-
when match(user_agent, 'android')
106-
then 'android'
107-
when match(user_agent, 'iphone|ipad|ipod')
108-
then 'ios'
109-
else 'Unknown'
110-
END as os,
111-
uniq(session_id) as visits,
112-
count() as pageviews,
113-
case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce,
114-
max(timestamp) as latest_view_aux,
115-
min(timestamp) as first_view_aux
116-
from parsed_hits
117-
where toDate(timestamp) = {{ Date(date_from) }}
118-
group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname, os
119-
{% else %}
120-
select
121-
site_uuid,
122-
date,
123-
member_status,
124-
device,
125-
browser,
126-
location,
127-
source,
128-
pathname,
129-
os,
130-
session_id,
131-
uniq(session_id) as visits,
132-
countMerge(pageviews) as pageviews,
133-
case when min(first_view) = max(latest_view) then 1 else 0 end as is_bounce,
134-
max(latest_view) as latest_view_aux,
135-
min(first_view) as first_view_aux
136-
from _mv_sessions
137-
where
138-
{% if defined(date_from) %} date >= {{ Date(date_from) }}
139-
{% else %} date >= timestampAdd(today(), interval -7 day)
140-
{% end %}
141-
{% if defined(date_to) %} and date <= {{ Date(date_to) }}
142-
{% else %} and date <= today()
64+
{% if defined(date_from) and day_diff(date_from, date_to) == 0 %}
65+
select
66+
site_uuid,
67+
toStartOfHour(timestamp) as date,
68+
session_id,
69+
member_status,
70+
device,
71+
browser,
72+
location,
73+
source,
74+
pathname,
75+
os,
76+
count() over (partition by session_id) as hits_per_session,
77+
count() as pageviews,
78+
max(timestamp) as latest_view_aux,
79+
min(timestamp) as first_view_aux
80+
from mv_hits
81+
where toDate(timestamp) = {{ Date(date_from) }}
82+
group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname, os
83+
{% else %}
84+
select
85+
site_uuid,
86+
toDate(timestamp) as date,
87+
member_status,
88+
device,
89+
browser,
90+
location,
91+
source,
92+
pathname,
93+
os,
94+
session_id,
95+
count() over (partition by session_id) as hits_per_session,
96+
count() as pageviews,
97+
max(timestamp) as latest_view_aux,
98+
min(timestamp) as first_view_aux
99+
from mv_hits
100+
where
101+
{% if defined(date_from) %} toDate(timestamp) >= {{ Date(date_from) }}
102+
{% else %} toDate(timestamp) >= timestampAdd(today(), interval -7 day)
103+
{% end %}
104+
{% if defined(date_to) %} and toDate(timestamp) <= {{ Date(date_to) }}
105+
{% else %} and toDate(timestamp) <= today()
106+
{% end %}
107+
group by date, session_id, site_uuid, member_status, device, browser, location, source, pathname, os
143108
{% end %}
144-
group by date, session_id, site_uuid, member_status, device, browser, location, source, pathname, os
145-
{% end %}
146109

147110

148111

149112
NODE data
150113
SQL >
151114

152115
%
153-
select
154-
date,
155-
uniq(session_id) as visits,
156-
sum(pageviews) as pageviews,
157-
sum(case when latest_view_aux = first_view_aux then 1 else 0 end) / visits as bounce_rate,
158-
avg(latest_view_aux - first_view_aux) as avg_session_sec
159-
from pageviews
160-
where
161-
site_uuid = {{String(site_uuid, 'mock_site_uuid', description="Tenant ID", required=True)}}
162-
{% if defined(member_status) %} and member_status IN {{ Array(member_status, "'undefined', 'free', 'paid'", description="Member status to filter on", required=False) }} {% end %}
163-
{% if defined(device) %} and device = {{ String(device, description="Device to filter on", required=False) }} {% end %}
164-
{% if defined(browser) %} and browser = {{ String(browser, description="Browser to filter on", required=False) }} {% end %}
165-
{% if defined(os) %} and os = {{ String(os, description="Operating system to filter on", required=False) }} {% end %}
166-
{% if defined(source) %} and source = {{ String(source, description="Source to filter on", required=False) }} {% end %}
167-
{% if defined(location) %} and location = {{ String(location, description="Location to filter on", required=False) }} {% end %}
168-
{% if defined(pathname) %} and pathname = {{ String(pathname, description="Pathname to filter on", required=False) }} {% end %}
169-
group by date
116+
select
117+
date,
118+
uniq(session_id) as visits,
119+
sum(pageviews) as pageviews,
120+
sum(case when hits_per_session = 1 then 1 else 0 end) / nullIf(uniq(session_id), 0) as bounce_rate,
121+
avg(latest_view_aux - first_view_aux) as avg_session_sec
122+
from pageviews
123+
where
124+
site_uuid = {{String(site_uuid, 'mock_site_uuid', description="Tenant ID", required=False)}}
125+
{% if defined(member_status) %} and member_status IN {{ Array(member_status, "'undefined', 'free', 'paid'", description="Member status to filter on", required=False) }} {% end %}
126+
{% if defined(device) %} and device = {{ String(device, description="Device to filter on", required=False) }} {% end %}
127+
{% if defined(browser) %} and browser = {{ String(browser, description="Browser to filter on", required=False) }} {% end %}
128+
{% if defined(os) %} and os = {{ String(os, description="Operating system to filter on", required=False) }} {% end %}
129+
{% if defined(source) %} and source = {{ String(source, description="Source to filter on", required=False) }} {% end %}
130+
{% if defined(location) %} and location = {{ String(location, description="Location to filter on", required=False) }} {% end %}
131+
{% if defined(pathname) %} and pathname = {{ String(pathname, description="Pathname to filter on", required=False) }} {% end %}
132+
group by date
170133

171134

172135

@@ -175,4 +138,6 @@ SQL >
175138

176139
select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec
177140
from timeseries a
178-
left join data b using date
141+
left join data b using date
142+
143+

0 commit comments

Comments
 (0)