Skip to content
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
dae7f2a
Initialized with queries from 2024
jazlan01 Aug 27, 2025
771fd5f
Updated query for 2025 spec
jazlan01 Aug 27, 2025
0f0ed90
Updated date
jazlan01 Aug 27, 2025
a9da756
Temp changes to dates, 2025-06 instead of 2025-07
jazlan01 Aug 28, 2025
ee7b00e
Updated median number of third parties by rank query
jazlan01 Aug 28, 2025
bde3438
Updated top 100 third parties by number of websites query
jazlan01 Aug 28, 2025
a0c17f7
Updated third party domains per page by rank
jazlan01 Aug 28, 2025
3c8519d
Updated percent of third parties by content type
jazlan01 Aug 28, 2025
8976952
Added prevalence of consent signals in third party requests
jazlan01 Aug 28, 2025
9a9f908
Updated queries for 2025
jazlan01 Aug 28, 2025
beff2c6
Updated third parties table date
jazlan01 Sep 15, 2025
87d7c8f
Updated third-parties.md with the content
jazlan01 Dec 27, 2025
1e302cc
Merge branch 'main' into thirdparties-sql-2025
jazlan01 Dec 31, 2025
374bad3
Merge branch 'main' into thirdparties-sql-2025
jazlan01 Jan 1, 2026
8139992
Enable chapter
tunetheweb Jan 3, 2026
59fef6d
Technical edit
tunetheweb Jan 3, 2026
d30ed0d
Lint SQL
tunetheweb Jan 3, 2026
c859d0b
Retake image
tunetheweb Jan 3, 2026
a772fd7
tunetheweb contributions
tunetheweb Jan 3, 2026
3f552ed
Linting fixes
tunetheweb Jan 3, 2026
b147e23
Linting
tunetheweb Jan 3, 2026
a7d2d17
Tweaks
tunetheweb Jan 3, 2026
1e346c4
Apply suggestions from code review
tunetheweb Jan 3, 2026
ae9218c
Update src/content/en/2025/third-parties.md
tunetheweb Jan 3, 2026
4f9ecd4
Update contributors
tunetheweb Jan 3, 2026
656d60d
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 3, 2026
1999788
Clean up images
tunetheweb Jan 3, 2026
4e9ca4f
Merge remote-tracking branch 'upstream/main' into thirdparties-sql-2025
tunetheweb Jan 6, 2026
c4ff7f8
Merge remote-tracking branch 'upstream/main' into thirdparties-sql-2025
tunetheweb Jan 6, 2026
f2a0bbb
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 9, 2026
e12b85c
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 9, 2026
6c025be
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 9, 2026
537efc5
Merge branch 'main' into thirdparties-sql-2025
jazlan01 Jan 10, 2026
abdde90
Most recently used queries
jazlan01 Jan 10, 2026
e5d36d4
Merge branch 'thirdparties-sql-2025' of github.com:jazlan01/almanac.h…
jazlan01 Jan 10, 2026
e5592e5
Updated bio for Jazlan
jazlan01 Jan 10, 2026
cfe46a8
Added featured stats, sql file names
jazlan01 Jan 10, 2026
62bb93b
Fixed linter errors
jazlan01 Jan 11, 2026
0cd4ce5
text update
abubakaraziz Jan 11, 2026
217f2ea
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 11, 2026
2bad4b3
Technical edit
tunetheweb Jan 11, 2026
5c202c1
Cleaned up SQL
jazlan01 Jan 11, 2026
750602c
Fixed sql file name for third party providers
jazlan01 Jan 11, 2026
a794c8b
Added newline at the end of the file
jazlan01 Jan 11, 2026
8081630
Update sql/2025/third-parties/number_of_third_parties_by_rank_and_cat…
tunetheweb Jan 11, 2026
910fd55
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 11, 2026
1187992
Update sql/2025/third-parties/number_of_third_parties_by_rank_and_cat…
tunetheweb Jan 11, 2026
2781079
Merge branch 'main' into thirdparties-sql-2025
jazlan01 Jan 11, 2026
1ba5085
Merge branch 'main' into thirdparties-sql-2025
tunetheweb Jan 11, 2026
9d6616b
Incorporate feedback in text
abubakaraziz Jan 11, 2026
0838649
Update src/content/en/2025/third-parties.md with link to 2024 chapter
jazlan01 Jan 11, 2026
46161b7
Merge branch 'main' into thirdparties-sql-2025
jazlan01 Jan 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions sql/2025/third-parties/a11y_overall_tech_usage_by_rank.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#standardSQL
# Overall A11Y technology usage by domain rank

WITH a11y_technologies AS (
SELECT
_TABLE_SUFFIX AS client,
url
FROM
`httparchive.technologies.2025_06_01_*`
WHERE
category = 'Accessibility'
),

pages AS (
SELECT
_TABLE_SUFFIX AS client,
url,
rank_grouping
FROM
`httparchive.summary_pages.2025_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
),

rank_totals AS (
SELECT
_TABLE_SUFFIX AS client,
rank_grouping,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2025_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
GROUP BY
client,
rank_grouping
)

SELECT
client,
rank_grouping AS rank,
COUNT(DISTINCT url) AS freq,
total,
(COUNT(DISTINCT url) / total) * 100 AS pct
FROM
a11y_technologies
LEFT OUTER JOIN
pages
USING (client, url)
JOIN
rank_totals
USING (client, rank_grouping)
GROUP BY
rank_grouping,
total,
client
ORDER BY
client,
rank
35 changes: 35 additions & 0 deletions sql/2025/third-parties/a11y_technology_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#standardSQL
# A11Y technology usage

WITH a11y_technologies AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(DISTINCT url) AS freq
FROM
`httparchive.technologies.2025_06_01_*`
WHERE
category = 'Accessibility'
GROUP BY
client
),

pages AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2025_06_01_*`
GROUP BY
client
)

SELECT
client,
freq,
total,
(freq / total) * 100 AS pct
FROM
a11y_technologies
JOIN
pages
USING (client)
65 changes: 65 additions & 0 deletions sql/2025/third-parties/a11y_technology_usage_by_rank.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#standardSQL
# A11Y technology usage by domain rank

WITH a11y_technologies AS (
SELECT
_TABLE_SUFFIX AS client,
app,
url
FROM
`httparchive.technologies.2025_06_01_*`
WHERE
category = 'Accessibility'
),

pages AS (
SELECT
_TABLE_SUFFIX AS client,
url,
rank_grouping
FROM
`httparchive.summary_pages.2025_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
),

rank_totals AS (
SELECT
_TABLE_SUFFIX AS client,
rank_grouping,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2025_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
GROUP BY
client,
rank_grouping
)

SELECT
client,
rank_grouping AS rank,
app,
COUNT(0) AS freq,
total,
(COUNT(0) / total) * 100 AS pct
FROM
a11y_technologies
LEFT OUTER JOIN
pages
USING (client, url)
JOIN
rank_totals
USING (client, rank_grouping)
GROUP BY
rank_grouping,
total,
client,
app
ORDER BY
client,
rank,
pct DESC
81 changes: 81 additions & 0 deletions sql/2025/third-parties/compressed_images_by_3p.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#standardSQL
# Compressed images (excluding SVG) by third parties

WITH requests AS (
SELECT
_TABLE_SUFFIX AS client,
pageid AS page,
url,
resp_content_encoding AS content_encoding,
type,
respBodySize AS size
FROM
`httparchive.summary_requests.2025_06_01_*`
WHERE
type = 'image' AND (
resp_content_encoding = 'gzip' OR
resp_content_encoding = 'br'
) AND NOT (
resp_content_type LIKE 'image/svg%' OR
ENDS_WITH(url, '.svg')
)
),

third_party AS (
SELECT
NET.HOST(domain) AS domain,
COUNT(DISTINCT page) AS page_usage
FROM
`httparchive.almanac.third_parties` tp
JOIN
requests r
ON NET.HOST(r.url) = NET.HOST(tp.domain)
WHERE
date = '2025-07-01' AND
category != 'hosting'
GROUP BY
domain
HAVING
page_usage >= 50
)

SELECT
client,
content_encoding,
domain,
size,
SUM(size) OVER (PARTITION BY client) AS total_size,
size / SUM(size) OVER (PARTITION BY client) AS pct_size,
num_requests,
total_requests,
pct_requests
FROM (
SELECT
client,
content_encoding,
domain,
COUNT(0) AS num_requests,
SUM(size) AS size,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_requests,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_requests,
RANK() OVER (PARTITION BY client, type, content_encoding ORDER BY COUNT(0) DESC) AS domain_rank
FROM
requests
LEFT JOIN
third_party
ON
NET.HOST(requests.url) = NET.HOST(third_party.domain)
WHERE
domain IS NOT NULL
GROUP BY
client,
type,
content_encoding,
domain
)
WHERE
domain_rank <= 100
ORDER BY
client,
content_encoding,
size DESC
90 changes: 90 additions & 0 deletions sql/2025/third-parties/consent_signal_basic_analysis.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#standardSQL
# Basic consent signal analysis (simplified version to ensure data returns)

WITH pages AS (
SELECT
client,
page,
rank
FROM
`httparchive.crawl.pages`
WHERE
date = '2025-07-01' AND
rank <= 50000 -- Expand to top 50K sites
),

-- Find requests with consent signals (no redirect filtering)
consent_requests AS (
SELECT
r.client,
r.page,
r.url,
NET.REG_DOMAIN(r.page) AS page_domain,
NET.REG_DOMAIN(r.url) AS url_domain,

-- Extract consent signals
REGEXP_CONTAINS(r.url, r'[?&]us_privacy=') AS has_usp_standard,
REGEXP_CONTAINS(r.url, r'[?&](ccpa|usp_consent|uspString|uspConsent|ccpa_consent|usp|usprivacy|ccpaconsent|usp_string)=') AS has_usp_nonstandard,
REGEXP_CONTAINS(r.url, r'[?&](gdpr|gdpr_consent|gdpr_pd)=') AS has_tcf_standard,
REGEXP_CONTAINS(r.url, r'[?&](gpp|gpp_sid)=') AS has_gpp_standard,

-- Check if request has redirects
JSON_EXTRACT(r.summary, '$.redirects') IS NOT NULL AND
TO_JSON_STRING(JSON_EXTRACT(r.summary, '$.redirects')) != '[]' AS has_redirects
FROM
`httparchive.crawl.requests` r
INNER JOIN
pages p
ON
r.client = p.client AND r.page = p.page
WHERE
r.date = '2025-07-01' AND
NET.REG_DOMAIN(r.page) != NET.REG_DOMAIN(r.url) AND -- Third-party only
(
REGEXP_CONTAINS(r.url, r'[?&]us_privacy=') OR
REGEXP_CONTAINS(r.url, r'[?&](ccpa|usp_consent|uspString|uspConsent|ccpa_consent|usp|usprivacy|ccpaconsent|usp_string)=') OR
REGEXP_CONTAINS(r.url, r'[?&](gdpr|gdpr_consent|gdpr_pd)=') OR
REGEXP_CONTAINS(r.url, r'[?&](gpp|gpp_sid)=')
)
),

-- Add any consent signal flag
requests_with_signals AS (
SELECT
*,
(has_usp_standard OR has_usp_nonstandard OR has_tcf_standard OR has_gpp_standard) AS has_any_signal
FROM
consent_requests
)

-- Basic analysis
SELECT
client,

-- Overall counts
COUNT(0) AS total_requests_with_consent_signals,
COUNT(DISTINCT page) AS total_pages_with_consent_signals,
COUNT(DISTINCT url_domain) AS total_domains_with_consent_signals,

-- Signal type breakdown
COUNTIF(has_usp_standard) AS usp_standard_requests,
COUNTIF(has_usp_nonstandard) AS usp_nonstandard_requests,
COUNTIF(has_tcf_standard) AS tcf_standard_requests,
COUNTIF(has_gpp_standard) AS gpp_standard_requests,

-- Percentage breakdown
COUNTIF(has_usp_standard) / COUNT(0) AS pct_usp_standard,
COUNTIF(has_usp_nonstandard) / COUNT(0) AS pct_usp_nonstandard,
COUNTIF(has_tcf_standard) / COUNT(0) AS pct_tcf_standard,
COUNTIF(has_gpp_standard) / COUNT(0) AS pct_gpp_standard,

-- Redirect availability
COUNTIF(has_redirects) AS requests_with_redirects,
COUNTIF(has_redirects) / COUNT(0) AS pct_requests_with_redirects

FROM
requests_with_signals
GROUP BY
client
ORDER BY
client
Loading