Skip to content

Commit a0b2a6d

Browse files
authored
Merge branch 'main' into patch-1
2 parents fb23400 + dc87982 commit a0b2a6d

File tree

71 files changed

+4000
-26
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+4000
-26
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#standardSQL
2+
# Anchor rel attribute usage
3+
# This query reports if a rel attribute value was ever used on a page, and calculates various statistics.
4+
5+
CREATE TEMPORARY FUNCTION getRelStatsWptBodies(wpt_bodies JSON)
6+
RETURNS STRUCT<
7+
rel ARRAY<STRING>
8+
> LANGUAGE js AS '''
9+
var result = {rel: []};
10+
// Function to retrieve only keys if value is >0
11+
function getKey(dict){
12+
const arr = [],
13+
obj = Object.keys(dict);
14+
for (var x in obj){
15+
if(dict[obj[x]] > 0){
16+
arr.push(obj[x]);
17+
}
18+
}
19+
return arr;
20+
}
21+
try {
22+
if (Array.isArray(wpt_bodies) || typeof wpt_bodies != 'object') return result;
23+
if (wpt_bodies.anchors && wpt_bodies.anchors.rendered && wpt_bodies.anchors.rendered.rel_attributes) {
24+
result.rel = getKey(wpt_bodies.anchors.rendered.rel_attributes);
25+
}
26+
} catch (e) {}
27+
return result;
28+
''';
29+
30+
WITH rel_stats_table AS (
31+
SELECT
32+
client,
33+
root_page,
34+
page,
35+
CASE
36+
WHEN is_root_page = FALSE THEN 'Secondarypage'
37+
WHEN is_root_page = TRUE THEN 'Homepage'
38+
ELSE 'No Assigned Page'
39+
END
40+
AS is_root_page,
41+
getRelStatsWptBodies(custom_metrics.wpt_bodies) AS wpt_bodies_info
42+
FROM
43+
`httparchive.crawl.pages`
44+
WHERE
45+
date = '2025-07-01'
46+
)
47+
48+
SELECT
49+
client,
50+
is_root_page,
51+
rel,
52+
COUNT(DISTINCT page) AS sites,
53+
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS total,
54+
COUNT(0) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS pct
55+
FROM
56+
rel_stats_table,
57+
UNNEST(wpt_bodies_info.rel) AS rel
58+
GROUP BY
59+
client,
60+
is_root_page,
61+
rel
62+
ORDER BY
63+
sites DESC,
64+
rel,
65+
client DESC;
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#standardSQL
2+
# Anchor same site occurrence stats
3+
# This query aims to highlight sites with few same-site links, like SPAs.
4+
5+
CREATE TEMPORARY FUNCTION getLinkDesciptionsWptBodies(anchors JSON)
6+
RETURNS STRUCT<
7+
links_same_site INT64,
8+
links_window_location INT64,
9+
links_window_open INT64,
10+
links_href_javascript INT64
11+
> LANGUAGE js AS '''
12+
var result = {
13+
links_same_site: 0,
14+
links_window_location: 0,
15+
links_window_open: 0,
16+
links_href_javascript: 0
17+
};
18+
try {
19+
if (Array.isArray(anchors) || typeof anchors != 'object') return result;
20+
21+
if (anchors && anchors.rendered) {
22+
var anchors_rendered = anchors.rendered;
23+
24+
result.links_same_site = anchors_rendered.same_site || 0;
25+
result.links_window_location = anchors_rendered.same_page.dynamic.onclick_attributes.window_location || 0;
26+
result.links_window_open = anchors_rendered.same_page.dynamic.onclick_attributes.window_open || 0;
27+
result.links_href_javascript = anchors_rendered.same_page.dynamic.href_javascript || 0;
28+
}
29+
30+
} catch (e) {}
31+
return result;
32+
''';
33+
34+
WITH same_links_info AS (
35+
SELECT
36+
client,
37+
root_page,
38+
page,
39+
CASE
40+
WHEN is_root_page = FALSE THEN 'Secondarypage'
41+
WHEN is_root_page = TRUE THEN 'Homepage'
42+
ELSE 'No Assigned Page'
43+
END
44+
AS is_root_page,
45+
getLinkDesciptionsWptBodies(custom_metrics.wpt_bodies.anchors) AS anchors_info
46+
FROM
47+
`httparchive.crawl.pages`
48+
WHERE
49+
date = '2025-07-01'
50+
)
51+
52+
SELECT
53+
client,
54+
anchors_info.links_same_site AS links_same_site,
55+
is_root_page,
56+
COUNT(DISTINCT page) AS sites, -- Counting all occurrences of links_same_site
57+
SAFE_DIVIDE(COUNT(0), COUNT(DISTINCT page)) AS pct_links_same_site, -- Percentage of same-site links
58+
AVG(anchors_info.links_window_location) AS avg_links_window_location,
59+
AVG(anchors_info.links_window_open) AS avg_links_window_open,
60+
AVG(anchors_info.links_href_javascript) AS avg_links_href_javascript,
61+
AVG(
62+
anchors_info.links_window_location +
63+
anchors_info.links_window_open +
64+
anchors_info.links_href_javascript
65+
) AS avg_links_any,
66+
MAX(
67+
anchors_info.links_window_location +
68+
anchors_info.links_window_open +
69+
anchors_info.links_href_javascript
70+
) AS max_links_any,
71+
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS total,
72+
COUNT(0) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS pct -- Secondary page percentage within group
73+
FROM
74+
same_links_info
75+
GROUP BY
76+
client,
77+
is_root_page,
78+
anchors_info.links_same_site
79+
ORDER BY
80+
links_same_site ASC;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#standardSQL
2+
# Content Language
3+
4+
CREATE TEMPORARY FUNCTION getContentLanguagesAlmanac(almanac JSON)
5+
RETURNS ARRAY<STRING>
6+
LANGUAGE js AS '''
7+
var result = [];
8+
try {
9+
10+
if (Array.isArray(almanac) || typeof almanac != 'object') return ["NO PAYLOAD"];
11+
12+
if (almanac && almanac["meta-nodes"] && almanac["meta-nodes"].nodes && almanac["meta-nodes"].nodes.filter) {
13+
result = almanac["meta-nodes"].nodes.filter(n => n["http-equiv"] && n["http-equiv"].toLowerCase().trim() == 'content-language' && n.content).map(am => am.content.toLowerCase().trim());
14+
}
15+
16+
if (result.length === 0)
17+
result.push("NO TAG");
18+
19+
} catch (e) {result.push("ERROR "+e);} // results show some issues with the validity of the payload
20+
return result;
21+
''';
22+
WITH content_language_usage AS (
23+
SELECT
24+
client,
25+
root_page,
26+
page,
27+
CASE
28+
WHEN is_root_page = FALSE THEN 'Secondarypage'
29+
WHEN is_root_page = TRUE THEN 'Homepage'
30+
ELSE 'No Assigned Page'
31+
END AS is_root_page,
32+
getContentLanguagesAlmanac(custom_metrics.other.almanac) AS content_languages
33+
FROM
34+
`httparchive.crawl.pages`
35+
WHERE
36+
date = '2025-07-01'
37+
)
38+
39+
SELECT
40+
client,
41+
is_root_page,
42+
content_language,
43+
COUNT(DISTINCT page) AS sites,
44+
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS total,
45+
COUNT(0) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS pct
46+
FROM
47+
content_language_usage,
48+
UNNEST(content_languages) AS content_language
49+
GROUP BY
50+
client,
51+
is_root_page,
52+
content_language
53+
ORDER BY
54+
sites DESC,
55+
client DESC;
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
2+
good / (good + needs_improvement + poor) >= 0.75
3+
);
4+
CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
5+
good + needs_improvement + poor > 0
6+
);
7+
SELECT
8+
date,
9+
device,
10+
SAFE_DIVIDE(
11+
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
12+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
13+
) AS pct_good_lcp,
14+
# Origins with good FID divided by origins with any FID.
15+
SAFE_DIVIDE(
16+
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
17+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))
18+
) AS pct_good_fid,
19+
# Origins with good CLS divided by origins with any CLS.
20+
SAFE_DIVIDE(
21+
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
22+
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
23+
) AS pct_good_cls,
24+
# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS.
25+
SAFE_DIVIDE(
26+
COUNT(DISTINCT IF(
27+
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
28+
IS_GOOD(fast_fid, avg_fid, slow_fid) IS NOT FALSE AND
29+
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
30+
)),
31+
COUNT(DISTINCT IF(
32+
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
33+
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
34+
))
35+
) AS pct_good_cwv
36+
FROM
37+
`chrome-ux-report.materialized.device_summary`
38+
WHERE
39+
date BETWEEN '2019-11-01' AND '2025-07-01' AND
40+
device IN ('desktop', 'phone')
41+
GROUP BY
42+
date,
43+
device
44+
ORDER BY
45+
date DESC
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#standardSQL
2+
-- Anchor same site occurrence stats
3+
4+
CREATE TEMPORARY FUNCTION getLinkDesciptions(anchors JSON)
5+
RETURNS STRUCT<
6+
links_same_site INT64,
7+
links_window_location INT64,
8+
links_window_open INT64,
9+
links_href_javascript INT64
10+
>
11+
LANGUAGE js AS '''
12+
var result = {
13+
links_same_site: 0,
14+
links_window_location: 0,
15+
links_window_open: 0,
16+
links_href_javascript: 0
17+
};
18+
try {
19+
if (Array.isArray(anchors) || typeof anchors !== 'object') return result;
20+
21+
var r = anchors && anchors.rendered ? anchors.rendered : null;
22+
if (!r) return result;
23+
24+
// Defensive: coerce to numbers or 0
25+
result.links_same_site = Number(r.same_site) || 0;
26+
var spd = (r.same_page && r.same_page.dynamic) ? r.same_page.dynamic : {};
27+
var oa = spd.onclick_attributes || {};
28+
29+
result.links_window_location = Number(oa.window_location) || 0;
30+
result.links_window_open = Number(oa.window_open) || 0;
31+
result.links_href_javascript = Number(spd.href_javascript) || 0;
32+
} catch (e) {}
33+
return result;
34+
''';
35+
36+
WITH same_links_info AS (
37+
SELECT
38+
client,
39+
root_page,
40+
page,
41+
CASE WHEN is_root_page THEN 'Homepage' ELSE 'Secondarypage' END AS is_root_page,
42+
getLinkDesciptions(custom_metrics.wpt_bodies.anchors) AS anchors_info
43+
FROM
44+
`httparchive.crawl.pages`
45+
WHERE
46+
date = '2025-07-01'
47+
)
48+
49+
SELECT
50+
client,
51+
anchors_info.links_same_site AS links_same_site,
52+
is_root_page,
53+
COUNT(DISTINCT page) AS sites,
54+
SAFE_DIVIDE(COUNT(0), COUNT(DISTINCT page)) AS pct_links_same_site,
55+
AVG(anchors_info.links_window_location) AS avg_links_window_location,
56+
AVG(anchors_info.links_window_open) AS avg_links_window_open,
57+
AVG(anchors_info.links_href_javascript) AS avg_links_href_javascript,
58+
AVG(
59+
anchors_info.links_window_location +
60+
anchors_info.links_window_open +
61+
anchors_info.links_href_javascript
62+
) AS avg_links_any,
63+
MAX(
64+
anchors_info.links_window_location +
65+
anchors_info.links_window_open +
66+
anchors_info.links_href_javascript
67+
) AS max_links_any,
68+
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS total,
69+
COUNT(0) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS pct
70+
FROM
71+
same_links_info
72+
GROUP BY
73+
client,
74+
is_root_page,
75+
anchors_info.links_same_site
76+
ORDER BY
77+
links_same_site ASC;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#standardSQL
2+
# hreflang header usage
3+
4+
# Returns all the data we need from _wpt_bodies
5+
CREATE TEMPORARY FUNCTION getHreflangInfo(hreflang JSON)
6+
RETURNS STRUCT<
7+
hreflangs ARRAY<STRING>
8+
> LANGUAGE js AS '''
9+
var result = {
10+
hreflangs: []
11+
};
12+
13+
try {
14+
if (Array.isArray(hreflang) || typeof hreflang != 'object') return result;
15+
16+
if (hreflang && hreflang.http_header && hreflang.http_header.values) {
17+
result.hreflangs = hreflang.http_header.values.map(v => v); // seems to fix a coercion issue!
18+
}
19+
20+
} catch (e) {}
21+
return result;
22+
''';
23+
24+
WITH hreflang_usage AS (
25+
SELECT
26+
client,
27+
root_page,
28+
page,
29+
CASE
30+
WHEN is_root_page = FALSE THEN 'Secondarypage'
31+
WHEN is_root_page = TRUE THEN 'Homepage'
32+
ELSE 'No Assigned Page'
33+
END AS is_root_page,
34+
getHreflangInfo(custom_metrics.wpt_bodies.hreflangs) AS hreflang_info
35+
FROM
36+
`httparchive.crawl.pages`
37+
WHERE
38+
date = '2025-07-01'
39+
40+
)
41+
42+
SELECT
43+
client,
44+
is_root_page,
45+
NORMALIZE_AND_CASEFOLD(hreflang) AS hreflang,
46+
COUNT(DISTINCT page) AS sites,
47+
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS total,
48+
COUNT(0) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client, is_root_page) AS pct
49+
FROM
50+
hreflang_usage,
51+
UNNEST(hreflang_info.hreflangs) AS hreflang
52+
GROUP BY
53+
hreflang,
54+
client,
55+
is_root_page
56+
ORDER BY
57+
sites DESC,
58+
client DESC;

0 commit comments

Comments
 (0)