forked from HTTPArchive/almanac.httparchive.org
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompressed_images_by_3p.sql
More file actions
81 lines (78 loc) · 1.64 KB
/
compressed_images_by_3p.sql
File metadata and controls
81 lines (78 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#standardSQL
# Compressed images (excluding SVG) by third parties
WITH requests AS (
SELECT
_TABLE_SUFFIX AS client,
pageid AS page,
url,
resp_content_encoding AS content_encoding,
type,
respBodySize AS size
FROM
`httparchive.summary_requests.2025_06_01_*`
WHERE
type = 'image' AND (
resp_content_encoding = 'gzip' OR
resp_content_encoding = 'br'
) AND NOT (
resp_content_type LIKE 'image/svg%' OR
ENDS_WITH(url, '.svg')
)
),
third_party AS (
SELECT
NET.HOST(domain) AS domain,
COUNT(DISTINCT page) AS page_usage
FROM
`httparchive.almanac.third_parties` tp
JOIN
requests r
ON NET.HOST(r.url) = NET.HOST(tp.domain)
WHERE
date = '2025-07-01' AND
category != 'hosting'
GROUP BY
domain
HAVING
page_usage >= 50
)
SELECT
client,
content_encoding,
domain,
size,
SUM(size) OVER (PARTITION BY client) AS total_size,
size / SUM(size) OVER (PARTITION BY client) AS pct_size,
num_requests,
total_requests,
pct_requests
FROM (
SELECT
client,
content_encoding,
domain,
COUNT(0) AS num_requests,
SUM(size) AS size,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_requests,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_requests,
RANK() OVER (PARTITION BY client, type, content_encoding ORDER BY COUNT(0) DESC) AS domain_rank
FROM
requests
LEFT JOIN
third_party
ON
NET.HOST(requests.url) = NET.HOST(third_party.domain)
WHERE
domain IS NOT NULL
GROUP BY
client,
type,
content_encoding,
domain
)
WHERE
domain_rank <= 100
ORDER BY
client,
content_encoding,
size DESC