@@ -5,7 +5,10 @@ import { isRsId } from '@gnomad/identifiers'
55import { UserVisibleError } from '../../errors'
66
77import { fetchLocalAncestryPopulationsByVariant } from '../local-ancestry-queries'
8- import { fetchAllSearchResults } from '../helpers/elasticsearch-helpers'
8+ import {
9+ fetchAllSearchResults ,
10+ fetchAllSearchResultsFromMultipleIndices ,
11+ } from '../helpers/elasticsearch-helpers'
912import { mergeOverlappingRegions } from '../helpers/region-helpers'
1013import {
1114 fetchLofCurationResultsByVariant ,
@@ -16,10 +19,67 @@ import {
1619import { getFlagsForContext } from './shared/flags'
1720import { getConsequenceForContext } from './shared/transcriptConsequence'
1821import largeGenes from '../helpers/large-genes'
22+ import { LimitedElasticClient , SearchResponse } from '../../elasticsearch'
1923
2024const GNOMAD_V4_VARIANT_INDEX = 'gnomad_v4_variants'
25+ const GNOMAD_V4_VARIANT_INDEX_PATCHES = 'gnomad_v4_variants_patches-2025-10-14--20-02'
2126
2227type Subset = 'all' | 'non_ukb'
28+ type ESTranscriptConsequence = {
29+ biotype : string
30+ consequence_terms : string [ ]
31+ gene_id : string
32+ gene_symbol : string
33+ gene_version : string
34+ is_canonical : boolean
35+ major_consequence : string
36+ transcript_id : string
37+ transcript_version : string
38+ }
39+ type ESPatch = {
40+ variant_id : string
41+ transcript_consequences : ESTranscriptConsequence [ ]
42+ }
43+
44+ const mergeTranscriptConsequences = (
45+ transcriptConsequences : ESTranscriptConsequence [ ] ,
46+ patchedTranscriptConsequences ?: ESTranscriptConsequence [ ] | null
47+ ) => {
48+ if ( ! patchedTranscriptConsequences ) {
49+ return transcriptConsequences
50+ }
51+
52+ const result : ESTranscriptConsequence [ ] = [ ]
53+ transcriptConsequences . forEach ( ( csq ) => {
54+ const patchedConsequence = patchedTranscriptConsequences ! . find (
55+ ( patchedCsq ) => patchedCsq . transcript_id === csq . transcript_id
56+ )
57+ result . push ( patchedConsequence || csq )
58+ } )
59+ return result
60+ }
61+
62+ const mergeTranscriptConsequencesInVariant = (
63+ variant : { variant_id : string ; transcript_consequences : ESTranscriptConsequence [ ] } ,
64+ patches : ESPatch [ ]
65+ ) => {
66+ const matchingPatch = patches . find ( ( patch ) => patch . variant_id === variant . variant_id )
67+ if ( matchingPatch === undefined ) {
68+ return variant
69+ }
70+
71+ return {
72+ ...variant ,
73+ transcript_consequences : mergeTranscriptConsequences (
74+ variant . transcript_consequences ,
75+ matchingPatch . transcript_consequences
76+ ) ,
77+ }
78+ }
79+
80+ const hasPositiveAC = ( variant : any , subset : string ) =>
81+ ( variant . genome . freq . all && variant . genome . freq . all . ac_raw > 0 ) ||
82+ variant . exome . freq [ subset ] . ac_raw > 0
2383
2484// ================================================================================================
2585// Count query
@@ -69,30 +129,50 @@ const chooseIdField = (variantId: string) => {
69129 return 'variant_id'
70130}
71131
72- const fetchVariantById = async ( esClient : any , variantId : any , subset : Subset ) => {
132+ const fetchVariantById = async (
133+ esClient : LimitedElasticClient ,
134+ variantId : string ,
135+ subset : Subset
136+ ) => {
73137 const idField = chooseIdField ( variantId )
74- const response = await esClient . search ( {
138+ const query = {
139+ bool : {
140+ filter : { term : { [ idField ] : variantId } } ,
141+ } ,
142+ }
143+
144+ const variantResponsePromise = esClient . search ( {
75145 index : GNOMAD_V4_VARIANT_INDEX ,
76146 body : {
77- query : {
78- bool : {
79- filter : { term : { [ idField ] : variantId } } ,
80- } ,
81- } ,
147+ query,
82148 } ,
83149 size : 1 ,
84- } )
150+ } ) as Promise < SearchResponse >
151+ const patchResponsePromise = esClient . search ( {
152+ index : GNOMAD_V4_VARIANT_INDEX_PATCHES ,
153+ body : { query } ,
154+ size : 1 ,
155+ } ) as Promise < SearchResponse >
85156
86- if ( response . body . hits . total . value === 0 ) {
157+ const variantResponse = await variantResponsePromise
158+
159+ if ( variantResponse . body . hits . total . value === 0 ) {
87160 throw new UserVisibleError ( 'Variant not found' )
88161 }
89162
90163 // An rsID may match multiple variants
91- if ( response . body . hits . total . value > 1 ) {
164+ if ( variantResponse . body . hits . total . value > 1 ) {
92165 throw new UserVisibleError ( 'Multiple variants found, query using variant ID to select one.' )
93166 }
94167
95- const variant = response . body . hits . hits [ 0 ] . _source . value
168+ const patchResponse = await patchResponsePromise
169+ const patchedTranscriptConsequences =
170+ patchResponse . body . hits . total . value > 0
171+ ? ( patchResponse . body . hits . hits [ 0 ] . _source . value
172+ . transcript_consequences as ESTranscriptConsequence [ ] )
173+ : null
174+
175+ const variant = variantResponse . body . hits . hits [ 0 ] . _source . value
96176
97177 const subsetGenomeFreq = variant . genome . freq . all || { }
98178 const subsetJointFreq = variant . joint . freq [ subset ] || { }
@@ -244,9 +324,10 @@ const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) =
244324 flags : variantFlags ,
245325 // TODO: Include RefSeq transcripts once the browser supports them.
246326 lof_curations : lofCurationResults ,
247- transcript_consequences : ( variant . transcript_consequences || [ ] ) . filter ( ( csq : any ) =>
248- csq . gene_id . startsWith ( 'ENSG' )
249- ) ,
327+ transcript_consequences : mergeTranscriptConsequences (
328+ variant . transcript_consequences ,
329+ patchedTranscriptConsequences
330+ ) . filter ( ( csq : any ) => csq . gene_id . startsWith ( 'ENSG' ) ) ,
250331 in_silico_predictors : inSilicoPredictorsList ,
251332 }
252333
@@ -454,28 +535,30 @@ const fetchVariantsByGene = async (esClient: any, gene: any, subset: Subset) =>
454535 } ,
455536 } ) )
456537
457- const hits = await fetchAllSearchResults ( esClient , {
458- index : GNOMAD_V4_VARIANT_INDEX ,
459- type : '_doc' ,
460- size : pageSize ,
461- _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
462- body : {
463- query : {
464- bool : {
465- filter : [ { term : { gene_id : gene . gene_id } } , { bool : { should : rangeQueries } } ] ,
538+ const [ hits , consequencePatchHits ] = await fetchAllSearchResultsFromMultipleIndices (
539+ esClient ,
540+ [ GNOMAD_V4_VARIANT_INDEX , GNOMAD_V4_VARIANT_INDEX_PATCHES ] ,
541+ {
542+ type : '_doc' ,
543+ size : pageSize ,
544+ _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
545+ body : {
546+ query : {
547+ bool : {
548+ filter : [ { term : { gene_id : gene . gene_id } } , { bool : { should : rangeQueries } } ] ,
549+ } ,
466550 } ,
551+ sort : [ { 'locus.position' : { order : 'asc' } } ] ,
467552 } ,
468- sort : [ { 'locus.position' : { order : 'asc' } } ] ,
469- } ,
470- } )
553+ }
554+ )
555+
556+ const consequencePatches : ESPatch [ ] = consequencePatchHits . map ( ( hit ) => hit . _source . value )
471557
472558 const shapedHits = hits
473559 . map ( ( hit : any ) => hit . _source . value )
474- . filter (
475- ( variant : any ) =>
476- ( variant . genome . freq . all && variant . genome . freq . all . ac_raw > 0 ) ||
477- variant . exome . freq [ subset ] . ac_raw > 0
478- )
560+ . filter ( ( variant ) => hasPositiveAC ( variant , subset ) )
561+ . map ( ( variant ) => mergeTranscriptConsequencesInVariant ( variant , consequencePatches ) )
479562 . map ( shapeVariantSummary ( subset , { type : 'gene' , geneId : gene . gene_id } ) )
480563
481564 const lofCurationResults = await fetchLofCurationResultsByGene ( esClient , 'v4' , gene )
@@ -507,38 +590,40 @@ const fetchVariantsByRegion = async (esClient: any, region: any, subset: Subset)
507590 const genomeSubset = 'all'
508591 const jointSubset = 'all'
509592
510- const hits = await fetchAllSearchResults ( esClient , {
511- index : GNOMAD_V4_VARIANT_INDEX ,
512- type : '_doc' ,
513- size : 10000 ,
514- _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
515- body : {
516- query : {
517- bool : {
518- filter : [
519- { term : { 'locus.contig' : `chr${ region . chrom } ` } } ,
520- {
521- range : {
522- 'locus.position' : {
523- gte : region . start ,
524- lte : region . stop ,
593+ const [ hits , consequencePatchHits ] = await fetchAllSearchResultsFromMultipleIndices (
594+ esClient ,
595+ [ GNOMAD_V4_VARIANT_INDEX , GNOMAD_V4_VARIANT_INDEX_PATCHES ] ,
596+ {
597+ type : '_doc' ,
598+ size : 10000 ,
599+ _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
600+ body : {
601+ query : {
602+ bool : {
603+ filter : [
604+ { term : { 'locus.contig' : `chr${ region . chrom } ` } } ,
605+ {
606+ range : {
607+ 'locus.position' : {
608+ gte : region . start ,
609+ lte : region . stop ,
610+ } ,
525611 } ,
526612 } ,
527- } ,
528- ] ,
613+ ] ,
614+ } ,
529615 } ,
616+ sort : [ { 'locus.position' : { order : 'asc' } } ] ,
530617 } ,
531- sort : [ { 'locus.position' : { order : 'asc' } } ] ,
532- } ,
533- } )
618+ }
619+ )
620+
621+ const consequencePatches : ESPatch [ ] = consequencePatchHits . map ( ( hit ) => hit . _source . value )
534622
535623 const variants = hits
536624 . map ( ( hit : any ) => hit . _source . value )
537- . filter (
538- ( variant : any ) =>
539- ( variant . genome . freq . all && variant . genome . freq . all . ac_raw > 0 ) ||
540- variant . exome . freq [ subset ] . ac_raw > 0
541- )
625+ . filter ( ( variant ) => hasPositiveAC ( variant , subset ) )
626+ . map ( ( variant ) => mergeTranscriptConsequencesInVariant ( variant , consequencePatches ) )
542627 . map ( shapeVariantSummary ( subset , { type : 'region' } ) )
543628
544629 const lofCurationResults = await fetchLofCurationResultsByRegion ( esClient , 'v4' , region )
@@ -599,31 +684,33 @@ const fetchVariantsByTranscript = async (esClient: any, transcript: any, subset:
599684 } ,
600685 } ) )
601686
602- const hits = await fetchAllSearchResults ( esClient , {
603- index : GNOMAD_V4_VARIANT_INDEX ,
604- type : '_doc' ,
605- size : 10000 ,
606- _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
607- body : {
608- query : {
609- bool : {
610- filter : [
611- { term : { transcript_id : transcript . transcript_id } } ,
612- { bool : { should : rangeQueries } } ,
613- ] ,
687+ const [ hits , consequencePatchHits ] = await fetchAllSearchResultsFromMultipleIndices (
688+ esClient ,
689+ [ GNOMAD_V4_VARIANT_INDEX , GNOMAD_V4_VARIANT_INDEX_PATCHES ] ,
690+ {
691+ type : '_doc' ,
692+ size : 10000 ,
693+ _source : getMultiVariantSourceFields ( exomeSubset , genomeSubset , jointSubset ) ,
694+ body : {
695+ query : {
696+ bool : {
697+ filter : [
698+ { term : { transcript_id : transcript . transcript_id } } ,
699+ { bool : { should : rangeQueries } } ,
700+ ] ,
701+ } ,
614702 } ,
703+ sort : [ { 'locus.position' : { order : 'asc' } } ] ,
615704 } ,
616- sort : [ { 'locus.position' : { order : 'asc' } } ] ,
617- } ,
618- } )
705+ }
706+ )
707+
708+ const consequencePatches : ESPatch [ ] = consequencePatchHits . map ( ( hit ) => hit . _source . value )
619709
620710 return hits
621711 . map ( ( hit : any ) => hit . _source . value )
622- . filter (
623- ( variant : any ) =>
624- ( variant . genome . freq . all && variant . genome . freq . all . ac_raw > 0 ) ||
625- variant . exome . freq [ subset ] . ac_raw > 0
626- )
712+ . filter ( ( variant ) => hasPositiveAC ( variant , subset ) )
713+ . map ( ( variant ) => mergeTranscriptConsequencesInVariant ( variant , consequencePatches ) )
627714 . map (
628715 shapeVariantSummary ( subset , { type : 'transcript' , transcriptId : transcript . transcript_id } )
629716 )
@@ -665,11 +752,7 @@ const fetchMatchingVariants = async (
665752
666753 return hits
667754 . map ( ( hit : any ) => hit . _source . value )
668- . filter (
669- ( variant : any ) =>
670- ( variant . genome . freq . all && variant . genome . freq . all . ac_raw > 0 ) ||
671- variant . exome . freq [ subset ] . ac_raw > 0
672- )
755+ . filter ( ( variant ) => hasPositiveAC ( variant , subset ) )
673756 . map ( ( variant : any ) => ( {
674757 variant_id : variant . variant_id ,
675758 } ) )
0 commit comments