-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathbq103.sql
More file actions
36 lines (36 loc) · 1004 Bytes
/
bq103.sql
File metadata and controls
36 lines (36 loc) · 1004 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
WITH filtered_variants AS (
SELECT
`AN`,
alternate_bases
FROM `bigquery-public-data.gnomAD.v3_genomes__chr1`
WHERE reference_name = 'chr1'
AND start_position >= 55039446
AND start_position < 55064852
),
variant_stats AS (
SELECT
COUNT(*) AS num_variants,
SUM(`AN`) AS total_AN
FROM filtered_variants
),
alt_stats AS (
SELECT
SUM(alt.AC) AS total_AC
FROM filtered_variants
CROSS JOIN UNNEST(alternate_bases) AS alt
),
gene_stats AS (
SELECT
COUNT(DISTINCT vep.SYMBOL) AS distinct_genes
FROM filtered_variants
CROSS JOIN UNNEST(alternate_bases) AS alt
CROSS JOIN UNNEST(alt.vep) AS vep
WHERE vep.SYMBOL IS NOT NULL AND vep.SYMBOL != ''
)
SELECT
variant_stats.num_variants AS number_of_variants,
alt_stats.total_AC AS total_allele_count,
variant_stats.total_AN AS total_number_of_alleles,
gene_stats.distinct_genes AS distinct_gene_symbols,
25406.0 / variant_stats.num_variants AS mutation_density
FROM variant_stats, alt_stats, gene_stats