Skip to content

Commit 32813e5

Browse files
Use RNU4ATAC variant patches in API
1 parent 97cfe31 commit 32813e5

File tree

3 files changed

+182
-98
lines changed

3 files changed

+182
-98
lines changed

graphql-api/src/queries/gene-queries.ts

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import elasticsearch from '@elastic/elasticsearch'
22
import { withCache } from '../cache'
33

4-
import { fetchAllSearchResults } from './helpers/elasticsearch-helpers'
4+
import { fetchAllSearchResultsFromMultipleIndices } from './helpers/elasticsearch-helpers'
55

66
import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata'
77
import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch'
@@ -130,21 +130,6 @@ export const fetchGenesByRegion = async (
130130
return mergedHits.map((hit) => hit._source.value)
131131
}
132132

133-
const fetchAllSearchResultsFromMultipleIndices = async (
134-
esClient: LimitedElasticClient,
135-
indices: string[],
136-
searchParams: elasticsearch.RequestParams.Search<any>
137-
) => {
138-
const requests = indices.map((index) =>
139-
fetchAllSearchResults(esClient, {
140-
index,
141-
type: '_doc',
142-
...searchParams,
143-
})
144-
)
145-
return Promise.all(requests)
146-
}
147-
148133
const searchMultipleIndices = async (
149134
esClient: LimitedElasticClient,
150135
indices: string[],

graphql-api/src/queries/helpers/elasticsearch-helpers.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import elasticsearch from '@elastic/elasticsearch'
12
import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch'
23

34
/**
@@ -43,6 +44,21 @@ export const fetchAllSearchResults = async (client: LimitedElasticClient, search
4344
return allResults
4445
}
4546

47+
export const fetchAllSearchResultsFromMultipleIndices = async (
48+
esClient: LimitedElasticClient,
49+
indices: string[],
50+
searchParams: elasticsearch.RequestParams.Search<any>
51+
) => {
52+
const requests = indices.map((index) =>
53+
fetchAllSearchResults(esClient, {
54+
index,
55+
type: '_doc',
56+
...searchParams,
57+
})
58+
)
59+
return Promise.all(requests)
60+
}
61+
4662
// Retrieve index metadata set by data pipeline
4763
export const fetchIndexMetadata = async (esClient: any, index: any) => {
4864
const response = await esClient.indices.getMapping({

graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts

Lines changed: 165 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ import { isRsId } from '@gnomad/identifiers'
55
import { UserVisibleError } from '../../errors'
66

77
import { fetchLocalAncestryPopulationsByVariant } from '../local-ancestry-queries'
8-
import { fetchAllSearchResults } from '../helpers/elasticsearch-helpers'
8+
import {
9+
fetchAllSearchResults,
10+
fetchAllSearchResultsFromMultipleIndices,
11+
} from '../helpers/elasticsearch-helpers'
912
import { mergeOverlappingRegions } from '../helpers/region-helpers'
1013
import {
1114
fetchLofCurationResultsByVariant,
@@ -16,10 +19,67 @@ import {
1619
import { getFlagsForContext } from './shared/flags'
1720
import { getConsequenceForContext } from './shared/transcriptConsequence'
1821
import largeGenes from '../helpers/large-genes'
22+
import { LimitedElasticClient, SearchResponse } from '../../elasticsearch'
1923

2024
const GNOMAD_V4_VARIANT_INDEX = 'gnomad_v4_variants'
25+
const GNOMAD_V4_VARIANT_INDEX_PATCHES = 'gnomad_v4_variants_patches-2025-10-14--20-02'
2126

2227
type Subset = 'all' | 'non_ukb'
28+
type ESTranscriptConsequence = {
29+
biotype: string
30+
consequence_terms: string[]
31+
gene_id: string
32+
gene_symbol: string
33+
gene_version: string
34+
is_canonical: boolean
35+
major_consequence: string
36+
transcript_id: string
37+
transcript_version: string
38+
}
39+
type ESPatch = {
40+
variant_id: string
41+
transcript_consequences: ESTranscriptConsequence[]
42+
}
43+
44+
const mergeTranscriptConsequences = (
45+
transcriptConsequences: ESTranscriptConsequence[],
46+
patchedTranscriptConsequences?: ESTranscriptConsequence[] | null
47+
) => {
48+
if (!patchedTranscriptConsequences) {
49+
return transcriptConsequences
50+
}
51+
52+
const result: ESTranscriptConsequence[] = []
53+
transcriptConsequences.forEach((csq) => {
54+
const patchedConsequence = patchedTranscriptConsequences!.find(
55+
(patchedCsq) => patchedCsq.transcript_id === csq.transcript_id
56+
)
57+
result.push(patchedConsequence || csq)
58+
})
59+
return result
60+
}
61+
62+
const mergeTranscriptConsequencesInVariant = (
63+
variant: { variant_id: string; transcript_consequences: ESTranscriptConsequence[] },
64+
patches: ESPatch[]
65+
) => {
66+
const matchingPatch = patches.find((patch) => patch.variant_id === variant.variant_id)
67+
if (matchingPatch === undefined) {
68+
return variant
69+
}
70+
71+
return {
72+
...variant,
73+
transcript_consequences: mergeTranscriptConsequences(
74+
variant.transcript_consequences,
75+
matchingPatch.transcript_consequences
76+
),
77+
}
78+
}
79+
80+
const hasPositiveAC = (variant: any, subset: string) =>
81+
(variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
82+
variant.exome.freq[subset].ac_raw > 0
2383

2484
// ================================================================================================
2585
// Count query
@@ -69,30 +129,50 @@ const chooseIdField = (variantId: string) => {
69129
return 'variant_id'
70130
}
71131

72-
const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) => {
132+
const fetchVariantById = async (
133+
esClient: LimitedElasticClient,
134+
variantId: string,
135+
subset: Subset
136+
) => {
73137
const idField = chooseIdField(variantId)
74-
const response = await esClient.search({
138+
const query = {
139+
bool: {
140+
filter: { term: { [idField]: variantId } },
141+
},
142+
}
143+
144+
const variantResponsePromise = esClient.search({
75145
index: GNOMAD_V4_VARIANT_INDEX,
76146
body: {
77-
query: {
78-
bool: {
79-
filter: { term: { [idField]: variantId } },
80-
},
81-
},
147+
query,
82148
},
83149
size: 1,
84-
})
150+
}) as Promise<SearchResponse>
151+
const patchResponsePromise = esClient.search({
152+
index: GNOMAD_V4_VARIANT_INDEX_PATCHES,
153+
body: { query },
154+
size: 1,
155+
}) as Promise<SearchResponse>
85156

86-
if (response.body.hits.total.value === 0) {
157+
const variantResponse = await variantResponsePromise
158+
159+
if (variantResponse.body.hits.total.value === 0) {
87160
throw new UserVisibleError('Variant not found')
88161
}
89162

90163
// An rsID may match multiple variants
91-
if (response.body.hits.total.value > 1) {
164+
if (variantResponse.body.hits.total.value > 1) {
92165
throw new UserVisibleError('Multiple variants found, query using variant ID to select one.')
93166
}
94167

95-
const variant = response.body.hits.hits[0]._source.value
168+
const patchResponse = await patchResponsePromise
169+
const patchedTranscriptConsequences =
170+
patchResponse.body.hits.total.value > 0
171+
? (patchResponse.body.hits.hits[0]._source.value
172+
.transcript_consequences as ESTranscriptConsequence[])
173+
: null
174+
175+
const variant = variantResponse.body.hits.hits[0]._source.value
96176

97177
const subsetGenomeFreq = variant.genome.freq.all || {}
98178
const subsetJointFreq = variant.joint.freq[subset] || {}
@@ -244,9 +324,10 @@ const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) =
244324
flags: variantFlags,
245325
// TODO: Include RefSeq transcripts once the browser supports them.
246326
lof_curations: lofCurationResults,
247-
transcript_consequences: (variant.transcript_consequences || []).filter((csq: any) =>
248-
csq.gene_id.startsWith('ENSG')
249-
),
327+
transcript_consequences: mergeTranscriptConsequences(
328+
variant.transcript_consequences,
329+
patchedTranscriptConsequences
330+
).filter((csq: any) => csq.gene_id.startsWith('ENSG')),
250331
in_silico_predictors: inSilicoPredictorsList,
251332
}
252333

@@ -454,28 +535,30 @@ const fetchVariantsByGene = async (esClient: any, gene: any, subset: Subset) =>
454535
},
455536
}))
456537

457-
const hits = await fetchAllSearchResults(esClient, {
458-
index: GNOMAD_V4_VARIANT_INDEX,
459-
type: '_doc',
460-
size: pageSize,
461-
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
462-
body: {
463-
query: {
464-
bool: {
465-
filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }],
538+
const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
539+
esClient,
540+
[GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
541+
{
542+
type: '_doc',
543+
size: pageSize,
544+
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
545+
body: {
546+
query: {
547+
bool: {
548+
filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }],
549+
},
466550
},
551+
sort: [{ 'locus.position': { order: 'asc' } }],
467552
},
468-
sort: [{ 'locus.position': { order: 'asc' } }],
469-
},
470-
})
553+
}
554+
)
555+
556+
const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
471557

472558
const shapedHits = hits
473559
.map((hit: any) => hit._source.value)
474-
.filter(
475-
(variant: any) =>
476-
(variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
477-
variant.exome.freq[subset].ac_raw > 0
478-
)
560+
.filter((variant) => hasPositiveAC(variant, subset))
561+
.map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
479562
.map(shapeVariantSummary(subset, { type: 'gene', geneId: gene.gene_id }))
480563

481564
const lofCurationResults = await fetchLofCurationResultsByGene(esClient, 'v4', gene)
@@ -507,38 +590,40 @@ const fetchVariantsByRegion = async (esClient: any, region: any, subset: Subset)
507590
const genomeSubset = 'all'
508591
const jointSubset = 'all'
509592

510-
const hits = await fetchAllSearchResults(esClient, {
511-
index: GNOMAD_V4_VARIANT_INDEX,
512-
type: '_doc',
513-
size: 10000,
514-
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
515-
body: {
516-
query: {
517-
bool: {
518-
filter: [
519-
{ term: { 'locus.contig': `chr${region.chrom}` } },
520-
{
521-
range: {
522-
'locus.position': {
523-
gte: region.start,
524-
lte: region.stop,
593+
const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
594+
esClient,
595+
[GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
596+
{
597+
type: '_doc',
598+
size: 10000,
599+
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
600+
body: {
601+
query: {
602+
bool: {
603+
filter: [
604+
{ term: { 'locus.contig': `chr${region.chrom}` } },
605+
{
606+
range: {
607+
'locus.position': {
608+
gte: region.start,
609+
lte: region.stop,
610+
},
525611
},
526612
},
527-
},
528-
],
613+
],
614+
},
529615
},
616+
sort: [{ 'locus.position': { order: 'asc' } }],
530617
},
531-
sort: [{ 'locus.position': { order: 'asc' } }],
532-
},
533-
})
618+
}
619+
)
620+
621+
const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
534622

535623
const variants = hits
536624
.map((hit: any) => hit._source.value)
537-
.filter(
538-
(variant: any) =>
539-
(variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
540-
variant.exome.freq[subset].ac_raw > 0
541-
)
625+
.filter((variant) => hasPositiveAC(variant, subset))
626+
.map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
542627
.map(shapeVariantSummary(subset, { type: 'region' }))
543628

544629
const lofCurationResults = await fetchLofCurationResultsByRegion(esClient, 'v4', region)
@@ -599,31 +684,33 @@ const fetchVariantsByTranscript = async (esClient: any, transcript: any, subset:
599684
},
600685
}))
601686

602-
const hits = await fetchAllSearchResults(esClient, {
603-
index: GNOMAD_V4_VARIANT_INDEX,
604-
type: '_doc',
605-
size: 10000,
606-
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
607-
body: {
608-
query: {
609-
bool: {
610-
filter: [
611-
{ term: { transcript_id: transcript.transcript_id } },
612-
{ bool: { should: rangeQueries } },
613-
],
687+
const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
688+
esClient,
689+
[GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
690+
{
691+
type: '_doc',
692+
size: 10000,
693+
_source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
694+
body: {
695+
query: {
696+
bool: {
697+
filter: [
698+
{ term: { transcript_id: transcript.transcript_id } },
699+
{ bool: { should: rangeQueries } },
700+
],
701+
},
614702
},
703+
sort: [{ 'locus.position': { order: 'asc' } }],
615704
},
616-
sort: [{ 'locus.position': { order: 'asc' } }],
617-
},
618-
})
705+
}
706+
)
707+
708+
const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
619709

620710
return hits
621711
.map((hit: any) => hit._source.value)
622-
.filter(
623-
(variant: any) =>
624-
(variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
625-
variant.exome.freq[subset].ac_raw > 0
626-
)
712+
.filter((variant) => hasPositiveAC(variant, subset))
713+
.map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
627714
.map(
628715
shapeVariantSummary(subset, { type: 'transcript', transcriptId: transcript.transcript_id })
629716
)
@@ -665,11 +752,7 @@ const fetchMatchingVariants = async (
665752

666753
return hits
667754
.map((hit: any) => hit._source.value)
668-
.filter(
669-
(variant: any) =>
670-
(variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
671-
variant.exome.freq[subset].ac_raw > 0
672-
)
755+
.filter((variant) => hasPositiveAC(variant, subset))
673756
.map((variant: any) => ({
674757
variant_id: variant.variant_id,
675758
}))

0 commit comments

Comments
 (0)