Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sql/2024/javascript/bytes_by_3p.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ FROM (
IF(NET.HOST(url) IN (
SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2024-06-01' AND category != 'hosting'
), 'third party', 'first party') AS host,
SUM(cast(json_value(payload, '$.response.bodySize') AS INT64)) / 1024 AS kbytes
SUM(INT64(summary.respBodySize)) / 1024 AS kbytes
FROM
`httparchive.all.requests`
`httparchive.crawl.requests`

WHERE
date = '2024-06-01' AND
Expand Down
30 changes: 0 additions & 30 deletions sql/2024/javascript/fid.sql

This file was deleted.

35 changes: 10 additions & 25 deletions sql/2024/javascript/render_blocking_javascript_by_rank.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#standardSQL
# Percent of pages using render-blocking JavaScript.
CREATE TEMPORARY FUNCTION getRenderBlockingScripts(payload STRING)
CREATE TEMPORARY FUNCTION getRenderBlockingScripts(payload JSON)
RETURNS INT64
LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var renderBlockingJS = $._renderBlockingJS;
var renderBlockingJS = JSON.parse(payload)
return renderBlockingJS;
} catch (e) {
return 0;
Expand All @@ -15,28 +14,14 @@ try {
WITH render_blocking_scripts AS (
SELECT
client,
url,
page,
rank,
number_of_render_blocking_scripts
FROM (
SELECT
client,
page AS url,
getRenderBlockingScripts(payload) AS number_of_render_blocking_scripts
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01'
)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
rank
FROM
`httparchive.summary_pages.2024_06_01_*`
)
USING (client, url)
getRenderBlockingScripts(payload['_renderBlockingJS']) AS number_of_render_blocking_scripts
FROM
`httparchive.crawl.pages`
WHERE
date = '2024-06-01' AND
is_root_page
)

SELECT
Expand All @@ -47,7 +32,7 @@ SELECT
COUNTIF(number_of_render_blocking_scripts > 0) / COUNT(0) AS pct_pages_with_render_blocking_scripts
FROM
render_blocking_scripts,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
WHERE
rank <= rank_grouping
GROUP BY
Expand Down
38 changes: 9 additions & 29 deletions sql/2024/javascript/usage_of_parcel.sql
Original file line number Diff line number Diff line change
@@ -1,37 +1,17 @@
#standardSQL
# Percent of pages using parcel

WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(DISTINCT url) AS total_pages
FROM
`httparchive.summary_pages.2024_06_01_*`
GROUP BY
client
),

parcel AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(DISTINCT url) AS parcel_pages
FROM
`httparchive.technologies.2024_06_01_*`
WHERE
app = 'parcel'
GROUP BY
client
)

SELECT
client,
parcel_pages,
total_pages,
parcel_pages / total_pages AS pct_parcel_pages
COUNTIF('parcel' IN UNNEST(technologies.technology)) AS parcel_pages,
COUNT(0) AS total_pages,
COUNTIF('parcel' IN UNNEST(technologies.technology)) / COUNT(0) AS pct_parcel_pages
FROM
totals
JOIN
parcel
USING (client)
`httparchive.crawl.pages`
WHERE
date = '2024-06-01' AND
is_root_page
GROUP BY
client
ORDER BY
client
62 changes: 9 additions & 53 deletions sql/2024/javascript/usage_of_parcel_by_rank.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,61 +2,17 @@
# Percent of pages using parcel grouped by rank
# usage_of_parcel_by_rank.sql

WITH parcel_pages AS (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2024_06_01_*`
WHERE
app = 'parcel'
),

rank_totals AS (
SELECT
_TABLE_SUFFIX AS client,
rank_grouping,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2024_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
GROUP BY
client,
rank_grouping
),

pages AS (
SELECT
_TABLE_SUFFIX AS client,
rank_grouping,
url AS page
FROM
`httparchive.summary_pages.2024_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
)

SELECT
client,
rank_grouping AS rank,
COUNT(DISTINCT parcel_pages.page) AS count_parcel_pages,
total,
COUNT(DISTINCT parcel_pages.page) / total AS pct_parcel_pages
COUNTIF('parcel' IN UNNEST(technologies.technology)) AS parcel_pages,
COUNT(0) AS total_pages,
COUNTIF('parcel' IN UNNEST(technologies.technology)) / COUNT(0) AS pct_parcel_pages
FROM
parcel_pages
LEFT OUTER JOIN
pages
USING (client, page)
JOIN
rank_totals
USING (client, rank_grouping)
`httparchive.crawl.pages`
WHERE
date = '2024-06-01' AND
is_root_page
GROUP BY
client,
total,
rank_grouping
client
ORDER BY
client,
rank_grouping
client
35 changes: 15 additions & 20 deletions sql/2024/javascript/usage_of_typescript_and_babel.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@
# Number of pages using TypeScript or Babel

# returns boolean whether the page uses Babel or TypeScript
CREATE TEMPORARY FUNCTION getSourceMaps(payload STRING)
CREATE TEMPORARY FUNCTION getSourceMaps(javascript JSON)
RETURNS STRUCT<hasSourceMaps BOOL, isPublic BOOL, isBabel BOOL, isTypeScript BOOL>
LANGUAGE js AS '''
try {
const $ = JSON.parse(payload);
const javascript = JSON.parse($._javascript);

if (javascript && javascript.sourceMaps) {
const { sourceMaps } = javascript;

Expand All @@ -28,22 +25,20 @@ try {

SELECT
client,
COUNTIF(sourcemaps.isBabel = true) AS use_babel,
COUNTIF(sourcemaps.isTypeScript = true) AS use_typescript,
COUNT(0) AS total_pages_with_sourcemaps,
COUNTIF(sourcemaps.isBabel = true) / COUNT(0) AS pct_use_babel,
COUNTIF(sourcemaps.isTypeScript = true) / COUNT(0) AS pct_use_typescript
FROM (
SELECT
client,
page,
getSourceMaps(payload) AS sourcemaps
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01'
)
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) AS use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) AS use_typescript,
COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS uses_sourcemaps,
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) / COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS pct_source_maps_use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) / COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS pct_source_maps_use_typescript,
COUNT(0) AS total_pages,
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) / COUNT(0) AS pct_use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) / COUNT(0) AS pct_use_typescript
FROM
`httparchive.crawl.pages`
WHERE
sourcemaps.isPublic = true
date = '2024-06-01' AND
is_root_page
GROUP BY
client
ORDER BY
client
54 changes: 19 additions & 35 deletions sql/2024/javascript/usage_of_typescript_and_babel_by_rank.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
# Number of pages using TypeScript or Babel grouped by rank

# returns boolean whether the page uses Babel or TypeScript
CREATE TEMPORARY FUNCTION getSourceMaps(payload STRING)
CREATE TEMPORARY FUNCTION getSourceMaps(javascript JSON)
RETURNS STRUCT<hasSourceMaps BOOL, isPublic BOOL, isBabel BOOL, isTypeScript BOOL>
LANGUAGE js AS '''
try {
const $ = JSON.parse(payload);
const javascript = JSON.parse($._javascript);

if (javascript && javascript.sourceMaps) {
const { sourceMaps } = javascript;
Expand All @@ -26,42 +24,28 @@ try {
}
''';

WITH pages AS (
SELECT
_TABLE_SUFFIX AS client,
rank_grouping,
url AS page
FROM
`httparchive.summary_pages.2024_06_01_*`,
UNNEST([1000, 10000, 100000, 1000000, 10000000]) AS rank_grouping
WHERE
rank <= rank_grouping
),

pages_sourcemaps AS (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
getSourceMaps(payload) AS sourcemaps
FROM
`httparchive.pages.2024_06_01_*`
)

SELECT
client,
rank_grouping AS rank,
COUNTIF(sourcemaps.isBabel = true) AS use_babel,
COUNTIF(sourcemaps.isTypeScript = true) AS use_typescript,
COUNT(0) AS total_pages_with_sourcemaps,
COUNTIF(sourcemaps.isBabel = true) / COUNT(0) AS pct_use_babel,
COUNTIF(sourcemaps.isTypeScript = true) / COUNT(0) AS pct_use_typescript
rank_grouping,
CASE
WHEN rank_grouping = 100000000 THEN 'all'
ELSE FORMAT("%'d", rank_grouping)
END AS ranking,
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) AS use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) AS use_typescript,
COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS uses_sourcemaps,
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) / COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS pct_source_maps_use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) / COUNTIF(getSourceMaps(custom_metrics.javascript).isPublic) AS pct_source_maps_use_typescript,
COUNT(0) AS total_pages,
COUNTIF(getSourceMaps(custom_metrics.javascript).isBabel) / COUNT(0) AS pct_use_babel,
COUNTIF(getSourceMaps(custom_metrics.javascript).isTypeScript) / COUNT(0) AS pct_use_typescript
FROM
pages_sourcemaps
JOIN
pages
USING (client, page)
`httparchive.crawl.pages`,
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
WHERE
sourcemaps.isPublic = true
date = '2024-06-01' AND
is_root_page AND
rank <= rank_grouping
GROUP BY
client,
rank_grouping
Expand Down
40 changes: 10 additions & 30 deletions sql/2024/javascript/usage_of_webpack.sql
Original file line number Diff line number Diff line change
@@ -1,37 +1,17 @@
#standardSQL
# Percent of pages using webpack

WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(DISTINCT url) AS total_pages
FROM
`httparchive.summary_pages.2024_06_01_*`
GROUP BY
client
),

webpack AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(DISTINCT url) AS webpack_pages
FROM
`httparchive.technologies.2024_06_01_*`
WHERE
lower(app) = 'webpack'
GROUP BY
client
)
# Percent of pages using webpack grouped by rank

SELECT
client,
webpack_pages,
total_pages,
webpack_pages / total_pages AS pct_webpack_pages
COUNTIF('Webpack' IN UNNEST(technologies.technology)) AS webpack_pages,
COUNT(0) AS total_pages,
COUNTIF('Webpack' IN UNNEST(technologies.technology)) / COUNT(0) AS pct_webpack_pages
FROM
totals
JOIN
webpack
USING (client)
`httparchive.crawl.pages`
WHERE
date = '2024-06-01' AND
is_root_page
GROUP BY
client
ORDER BY
client
Loading
Loading