Skip to content

Commit 5230702

Browse files
authored
Merge pull request #209 from HTTPArchive/upgrade-to-new-datamodel
Upgrade to new datamodel
2 parents 30201f7 + 477d54f commit 5230702

File tree

129 files changed

+1133
-1429
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+1133
-1429
lines changed

sql/.sqlfluff

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@ templater = jinja
1111
## Comma separated list of rules to check, or None for all
1212
rules = None
1313
## Comma separated list of rules to exclude, or None
14-
exclude_rules = AL01,AL04,AL07,AL09,AM03,AM05,CP02,CP03,CV02,LT05,LT09,RF01,RF02,RF03,RF04,ST01,ST02,ST05,ST06,ST07
14+
exclude_rules = AL01,AL04,AL07,AL09,AM03,AM05,CP02,CP03,CV02,CV12,LT05,LT09,LT14,RF01,RF02,RF03,RF04,ST01,ST02,ST05,ST06,ST07
1515
# AL04 - Asks for unique table aliases meaning it complains if selecting from two 2021_07_01 tables as implicit alias is table name (not fully qualified) so same.
1616
# AL07 - Avoid aliases in from and join - why?
1717
# AM03 - if using DESC in one ORDER BY column, then insist on ASC/DESC for all.
1818
# AM05 - INNER JOIN must be fully qualified. Probably should use this but not our style.
1919
# CP02 - Unquoted identifiers (e.g. column names) will be mixed case so don't enforce case
2020
# CP03 - Function names will be mixed case so don't enforce case
2121
# CV02 - Use COALESCE instead of IFNULL or NVL. We think ISNULL is clearer.
22+
# CV12 - Doesn't work with UNNEST. https://github.com/sqlfluff/sqlfluff/issues/6558
2223
# LT05 - We allow longer lines as some of our queries are complex. Maybe should limit in future?
2324
# LT09 - Select targets should be on new lines but sub clauses don't always obey this. Maybe revisit in future?
25+
# LT14 - Keywords on newline. We have some simple, single line joins
2426
# RF01 - BigQuery uses STRUCTS which can look like incorrect table references
2527
# RF02 - Asks for qualified columns for ambiguous ones, but we not qualify our columns, and they are not really ambiguous (or BigQuery would complain)
2628
# RF03 - Insists on references in column names even if not ambiguous. Bit OTT.

sql/.sqlfluffignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
/lens/*/crux_histograms.sql
2+
/lens/*/crux_timeseries.sql
3+
/lens/*/histograms.sql
4+
/lens/*/timeseries.sql

sql/generate_reports.sh

Lines changed: 95 additions & 98 deletions
Large diffs are not rendered by default.

sql/histograms/bootupJs.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
FLOOR(CAST(IFNULL(JSON_EXTRACT(report, '$.audits.bootup-time.numericValue'), JSON_EXTRACT(report, '$.audits.bootup-time.rawValue')) AS FLOAT64) / 100) / 10 AS bin
13+
FLOOR(FLOAT64(IFNULL(lighthouse.audits['bootup-time'].numericValue, lighthouse.audits['bootup-time'].rawValue)) / 100) / 10 AS bin
1414
FROM
15-
`httparchive.lighthouse.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesCss.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesCSS / 10240) * 10 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesCss) / 10240) * 10 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesFont.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesFont / 10240) * 10 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesFont) / 10240) * 10 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesHtml.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesHtml / 10240) * 10 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesHtml) / 10240) * 10 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesImg.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesImg / 102400) * 100 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesImg) / 102400) * 100 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesJs.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesJS / 10240) * 10 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesJS) / 10240) * 10 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

sql/histograms/bytesOther.sql

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ FROM (
88
volume / SUM(volume) OVER (PARTITION BY client) AS pdf
99
FROM (
1010
SELECT
11-
_TABLE_SUFFIX AS client,
11+
client,
1212
COUNT(0) AS volume,
13-
CAST(FLOOR(bytesOther / 10240) * 10 AS INT64) AS bin
13+
CAST(FLOOR(FLOAT64(summary.bytesOther) / 10240) * 10 AS INT64) AS bin
1414
FROM
15-
`httparchive.summary_pages.${YYYY_MM_DD}_*`
15+
`httparchive.crawl.pages`
16+
WHERE
17+
date = '${YYYY-MM-DD}' AND
18+
is_root_page
1619
GROUP BY
1720
bin,
1821
client

0 commit comments

Comments
 (0)