Skip to content

Commit 046be97

Browse files
committed
Merge branch 'TASK-8076' of github.com:opencb/opencga into TASK-8076
2 parents dd64fbb + 8a3d780 commit 046be97

9 files changed

Lines changed: 305 additions & 31 deletions

File tree

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/CpicAnnotator.java

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
import com.fasterxml.jackson.databind.ObjectMapper;
66
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicAlleleAnnotation;
77
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicAlleleInfo;
8+
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicAlleleLocationValue;
89
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicDiplotypeAnnotation;
910
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicDiplotypeInfo;
1011
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicDrug;
1112
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicDrugRecommendation;
13+
import org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic.CpicSequenceLocation;
1214
import org.slf4j.Logger;
1315
import org.slf4j.LoggerFactory;
1416

@@ -26,11 +28,12 @@
2628
*
2729
* <p>For each gene diplotype the following CPIC endpoints are called:
2830
* <ul>
29-
* <li>/diplotype – phenotype classification and lookupkey</li>
30-
* <li>/allele – per-allele functional status and activity value</li>
31-
* <li>/pair – drug-gene pairs (CPIC level, PGx testing)</li>
32-
* <li>/recommendation – drug dosing recommendations for the lookupkey</li>
33-
* <li>/drug – resolves drugid to human-readable drug name</li>
31+
* <li>/diplotype – phenotype classification and lookupkey</li>
32+
* <li>/allele – per-allele functional status and activity value</li>
33+
* <li>/allele_definition – per-allele variant locations (filled into CpicAlleleInfo.location)</li>
34+
* <li>/pair – drug-gene pairs (CPIC level, PGx testing)</li>
35+
* <li>/recommendation – drug dosing recommendations for the lookupkey</li>
36+
* <li>/drug – resolves drugid to human-readable drug name</li>
3437
* </ul>
3538
*/
3639
public class CpicAnnotator {
@@ -45,11 +48,12 @@ public class CpicAnnotator {
4548
private final Logger logger = LoggerFactory.getLogger(CpicAnnotator.class);
4649

4750
// Caches to avoid repeated API calls across samples
48-
private final Map<String, String> drugNameCache = new HashMap<>(); // drugid -> name
49-
private final Map<String, CpicAlleleInfo> alleleInfoCache = new HashMap<>(); // "gene:allele" -> info
50-
private final Map<String, CpicDiplotypeInfo> diplotypeInfoCache = new HashMap<>(); // "gene:diplotype" -> info
51+
private final Map<String, String> drugNameCache = new HashMap<>(); // drugid -> name
52+
private final Map<String, CpicAlleleInfo> alleleInfoCache = new HashMap<>(); // "gene:allele" -> info
53+
private final Map<String, List<CpicAlleleLocationValue>> alleleDefinitionCache = new HashMap<>(); // "gene:allele" -> location
54+
private final Map<String, CpicDiplotypeInfo> diplotypeInfoCache = new HashMap<>(); // "gene:diplotype" -> info
5155
private final Map<String, List<CpicDrugRecommendation>> recommendationCache = new HashMap<>(); // lookupkey JSON -> recs
52-
private final Map<String, List<RawPair>> pairCache = new HashMap<>(); // gene -> pairs
56+
private final Map<String, List<RawPair>> pairCache = new HashMap<>(); // gene -> pairs
5357

5458
public CpicAnnotator() {
5559
this.objectMapper = new ObjectMapper()
@@ -158,10 +162,50 @@ private CpicAlleleInfo fetchAlleleInfo(String gene, String allele) throws IOExce
158162
List<CpicAlleleInfo> list = objectMapper.readValue(json, new TypeReference<List<CpicAlleleInfo>>() { });
159163
result = list.isEmpty() ? null : list.get(0);
160164
}
165+
if (result != null) {
166+
// Enrich with variant-level location data from /allele_definition
167+
result.setLocation(fetchAlleleLocationValues(gene, allele, encodedAllele));
168+
}
161169
alleleInfoCache.put(cacheKey, result);
162170
return result;
163171
}
164172

173+
/**
174+
* Fetch variant-level location values for a single allele from the CPIC /allele_definition endpoint.
175+
* Example: GET /allele_definition?genesymbol=eq.CYP2C9&name=eq.*6&select=*,allele_location_value(*,sequence_location(*))
176+
*/
177+
private List<CpicAlleleLocationValue> fetchAlleleLocationValues(String gene, String allele,
178+
String encodedAllele) throws IOException {
179+
String cacheKey = gene + ":" + allele;
180+
if (alleleDefinitionCache.containsKey(cacheKey)) {
181+
return alleleDefinitionCache.get(cacheKey);
182+
}
183+
String url = CPIC_BASE_URL + "/allele_definition?genesymbol=eq." + gene + "&name=eq." + encodedAllele
184+
+ "&select=*,allele_location_value(*,sequence_location(*))";
185+
String json = get(url);
186+
List<CpicAlleleLocationValue> result = new ArrayList<>();
187+
if (json != null) {
188+
List<RawAlleleDef> defs = objectMapper.readValue(json, new TypeReference<List<RawAlleleDef>>() { });
189+
if (!defs.isEmpty() && defs.get(0).allele_location_value != null) {
190+
for (RawLocationValue raw : defs.get(0).allele_location_value) {
191+
CpicSequenceLocation seqLoc = null;
192+
if (raw.sequence_location != null) {
193+
seqLoc = new CpicSequenceLocation(
194+
raw.sequence_location.name,
195+
raw.sequence_location.dbsnpid,
196+
raw.sequence_location.position,
197+
raw.sequence_location.genelocation,
198+
raw.sequence_location.proteinlocation,
199+
raw.sequence_location.chromosomelocation);
200+
}
201+
result.add(new CpicAlleleLocationValue(raw.variantallele, seqLoc));
202+
}
203+
}
204+
}
205+
alleleDefinitionCache.put(cacheKey, result);
206+
return result;
207+
}
208+
165209
/**
166210
* Build CpicDrug list by fetching /pair for the gene and /recommendation for the lookupkey,
167211
* then matching recommendations to drugs by drugid + guidelineid.
@@ -331,6 +375,33 @@ private String get(String url) throws IOException {
331375
return sb.toString();
332376
}
333377

378+
/**
379+
* Internal DTO to deserialize the /allele_definition response (top-level object).
380+
*/
381+
private static class RawAlleleDef {
382+
public List<RawLocationValue> allele_location_value;
383+
}
384+
385+
/**
386+
* Internal DTO for one element of the allele_location_value array.
387+
*/
388+
private static class RawLocationValue {
389+
public String variantallele;
390+
public RawSequenceLocation sequence_location;
391+
}
392+
393+
/**
394+
* Internal DTO for the nested sequence_location object.
395+
*/
396+
private static class RawSequenceLocation {
397+
public String name;
398+
public String dbsnpid;
399+
public Long position;
400+
public String genelocation;
401+
public String proteinlocation;
402+
public String chromosomelocation;
403+
}
404+
334405
/**
335406
* Internal DTO to deserialize the /pair response.
336407
*/

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/PharmacogenomicsAlleleTyperAnalysisTool.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ private void alleleTyper() throws IOException, CatalogException {
7878
Files.createDirectories(resultsPath);
7979
pharmacogenomicsManager.storeResultsInPath(results, resultsPath);
8080

81-
// In addition, the it is store in the sample object in catalog
82-
pharmacogenomicsManager.storeResultsInCatalog(study, results, token);
81+
// Store the tool output directory path in the sample attributes in catalog
82+
pharmacogenomicsManager.storeResultsPathInCatalog(study, results, getOutDir(), token);
8383
}
8484
}

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/PharmacogenomicsAnnotationAnalysisTool.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,8 @@ private void annotateResults() throws IOException, CatalogException {
8686
Path resultsPath = getOutDir().resolve(PharmacogenomicsAlleleTyperAnalysisTool.RESULTS_DIR);
8787
Files.createDirectories(resultsPath);
8888
pharmacogenomicsManager.storeResultsInPath(alleleTyperResults, resultsPath);
89-
// In addition, the it is store in the sample object in catalog
90-
91-
pharmacogenomicsManager.storeResultsInCatalog(study, alleleTyperResults, token);
89+
// Store the tool output directory path in the sample attributes in catalog
90+
pharmacogenomicsManager.storeResultsPathInCatalog(study, alleleTyperResults, getOutDir(), token);
9291
}
9392

9493
}

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/PharmacogenomicsManager.java

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,14 +213,59 @@ public void storeResultsInPath(List<AlleleTyperResult> results, Path annotatedDi
213213
}
214214

215215
/**
216-
* Store pharmacogenomics results as attributes in sample objects.
216+
* Store the tool output directory path in catalog sample attributes under the key OPENCGA_PHARMACOGENOMICS_PATH.
217+
* The same outDir is stored for every non-NTC sample produced by the tool run.
218+
* Samples not found in the catalog are silently skipped.
217219
*
218220
* @param studyId Study identifier
219221
* @param results List of pharmacogenomics results
220-
* @param token Authentication token
222+
* @param outDir Root output directory of the tool execution
223+
* @param token Authentication token
221224
* @throws CatalogException if catalog operations fail
222-
* @throws IOException if JSON serialization fails
223225
*/
226+
public void storeResultsPathInCatalog(String studyId, List<AlleleTyperResult> results, Path outDir, String token)
227+
throws CatalogException {
228+
logger.info("Storing pharmacogenomics output path in catalog for {} samples", results.size());
229+
String outDirPath = outDir.toAbsolutePath().toString();
230+
231+
for (AlleleTyperResult result : results) {
232+
String sampleId = result.getSampleId();
233+
234+
if ("NTC".equalsIgnoreCase(sampleId)) {
235+
logger.debug("Skipping control sample: {}", sampleId);
236+
continue;
237+
}
238+
239+
try {
240+
OpenCGAResult<org.opencb.opencga.core.models.sample.Sample> sampleResult =
241+
catalogManager.getSampleManager().get(studyId, sampleId, QueryOptions.empty(), token);
242+
243+
if (sampleResult.getNumResults() == 0) {
244+
logger.warn("Sample {} not found in study {}. Skipping path storage.", sampleId, studyId);
245+
continue;
246+
}
247+
248+
Map<String, Object> attributes = new HashMap<>();
249+
attributes.put("OPENCGA_PHARMACOGENOMICS_PATH", outDirPath);
250+
251+
SampleUpdateParams updateParams = new SampleUpdateParams();
252+
updateParams.setAttributes(attributes);
253+
254+
catalogManager.getSampleManager().update(studyId, sampleId, updateParams, QueryOptions.empty(), token);
255+
logger.debug("Updated sample {} with pharmacogenomics path: {}", sampleId, outDirPath);
256+
} catch (CatalogException e) {
257+
logger.error("Failed to update sample {} with pharmacogenomics path: {}", sampleId, e.getMessage());
258+
throw e;
259+
}
260+
}
261+
262+
logger.info("Successfully stored pharmacogenomics path for {} samples in catalog", results.size());
263+
}
264+
265+
/**
266+
* @deprecated Use {@link #storeResultsPathInCatalog(String, List, Path, String)} instead.
267+
*/
268+
@Deprecated
224269
public void storeResultsInCatalog(String studyId, List<AlleleTyperResult> results, String token)
225270
throws CatalogException, IOException {
226271
logger.info("Storing pharmacogenomics results in catalog for {} samples", results.size());

opencga-analysis/src/test/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/PharmacogenomicsAlleleTyperAnalysisToolTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public void setUp() throws Exception {
110110
translationContent = readGzipFile(TRANSLATION_RESOURCE);
111111

112112
// Parse genotyping to discover sample IDs and create them in catalog so
113-
// storeResultsInCatalog can persist the OPENCGA_PHARMACOGENOMICS_DATA attribute
113+
// storeResultsInCatalog can persist the OPENCGA_PHARMACOGENOMICS_PATH attribute
114114
AlleleTyper typer = new AlleleTyper();
115115
typer.parseTranslationFromString(translationContent);
116116
List<AlleleTyperResult> parsedResults = typer.buildAlleleTyperResultsFromString(genotypingContent);
@@ -185,7 +185,7 @@ public void testAlleleTyperAnalysisUsingContent() throws Exception {
185185
assertTrue("At least some diplotypes should have been annotated", annotatedDiplotypeCount > 0);
186186
System.out.println("Total annotated diplotypes: " + annotatedDiplotypeCount);
187187

188-
// Verify OPENCGA_PHARMACOGENOMICS_DATA attribute was persisted in catalog for each sample
188+
// Verify OPENCGA_PHARMACOGENOMICS_PATH attribute was persisted in catalog for each sample
189189
assertSamplesHavePharmacogenomicsAttribute();
190190
}
191191

@@ -237,12 +237,12 @@ public void testAlleleTyperAnalysisUsingFile() throws Exception {
237237
assertTrue("At least some diplotypes should have been annotated", annotatedDiplotypeCount > 0);
238238
System.out.println("Total annotated diplotypes: " + annotatedDiplotypeCount);
239239

240-
// Verify OPENCGA_PHARMACOGENOMICS_DATA attribute was persisted in catalog for each sample
240+
// Verify OPENCGA_PHARMACOGENOMICS_PATH attribute was persisted in catalog for each sample
241241
assertSamplesHavePharmacogenomicsAttribute();
242242
}
243243

244244
/**
245-
* Verifies that every non-NTC sample has the OPENCGA_PHARMACOGENOMICS_DATA attribute set
245+
* Verifies that every non-NTC sample has the OPENCGA_PHARMACOGENOMICS_PATH attribute set
246246
* in the catalog after the allele typer tool has run.
247247
*/
248248
private void assertSamplesHavePharmacogenomicsAttribute() throws Exception {
@@ -251,13 +251,13 @@ private void assertSamplesHavePharmacogenomicsAttribute() throws Exception {
251251
Sample sample = catalogManager.getSampleManager()
252252
.get(studyFqn, sampleId, QueryOptions.empty(), token).first();
253253
Map<String, Object> attributes = sample.getAttributes();
254-
if (attributes != null && attributes.containsKey("OPENCGA_PHARMACOGENOMICS_DATA")) {
254+
if (attributes != null && attributes.containsKey("OPENCGA_PHARMACOGENOMICS_PATH")) {
255255
samplesWithAttribute++;
256256
}
257257
}
258-
assertTrue("At least one sample should have OPENCGA_PHARMACOGENOMICS_DATA attribute set"
258+
assertTrue("At least one sample should have OPENCGA_PHARMACOGENOMICS_PATH attribute set"
259259
+ " (checked " + sampleIds.size() + " samples)", samplesWithAttribute > 0);
260-
System.out.println("Samples with OPENCGA_PHARMACOGENOMICS_DATA attribute: "
260+
System.out.println("Samples with OPENCGA_PHARMACOGENOMICS_PATH attribute: "
261261
+ samplesWithAttribute + "/" + sampleIds.size());
262262
}
263263

opencga-analysis/src/test/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/PharmacogenomicsAnnotationAnalysisToolTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ public void testAnnotationAnalysisUsingContent() throws Exception {
162162
assertTrue("At least some diplotypes should have been annotated", annotatedDiplotypeCount > 0);
163163
System.out.println("Total annotated diplotypes: " + annotatedDiplotypeCount);
164164

165-
// Verify OPENCGA_PHARMACOGENOMICS_DATA attribute was persisted in catalog for each sample
165+
// Verify OPENCGA_PHARMACOGENOMICS_PATH attribute was persisted in catalog for each sample
166166
assertSamplesHavePharmacogenomicsAttribute(objectMapper);
167167
}
168168

@@ -214,12 +214,12 @@ public void testAnnotationAnalysisUsingFile() throws Exception {
214214
assertTrue("At least some diplotypes should have been annotated", annotatedDiplotypeCount > 0);
215215
System.out.println("Total annotated diplotypes: " + annotatedDiplotypeCount);
216216

217-
// Verify OPENCGA_PHARMACOGENOMICS_DATA attribute was persisted in catalog for each sample
217+
// Verify OPENCGA_PHARMACOGENOMICS_PATH attribute was persisted in catalog for each sample
218218
assertSamplesHavePharmacogenomicsAttribute(objectMapper);
219219
}
220220

221221
/**
222-
* Verifies that every non-NTC sample in alleleTyperContent has the OPENCGA_PHARMACOGENOMICS_DATA
222+
* Verifies that every non-NTC sample in alleleTyperContent has the OPENCGA_PHARMACOGENOMICS_PATH
223223
* attribute set in the catalog after the annotation tool has run.
224224
*/
225225
private void assertSamplesHavePharmacogenomicsAttribute(ObjectMapper objectMapper) throws Exception {
@@ -235,13 +235,13 @@ private void assertSamplesHavePharmacogenomicsAttribute(ObjectMapper objectMappe
235235
Sample sample = catalogManager.getSampleManager()
236236
.get(studyFqn, r.getSampleId(), QueryOptions.empty(), token).first();
237237
if (sample.getAttributes() != null
238-
&& sample.getAttributes().containsKey("OPENCGA_PHARMACOGENOMICS_DATA")) {
238+
&& sample.getAttributes().containsKey("OPENCGA_PHARMACOGENOMICS_PATH")) {
239239
samplesWithAttribute++;
240240
}
241241
}
242-
assertTrue("At least one non-NTC sample should have OPENCGA_PHARMACOGENOMICS_DATA attribute set"
242+
assertTrue("At least one non-NTC sample should have OPENCGA_PHARMACOGENOMICS_PATH attribute set"
243243
+ " (checked " + samplesChecked + " samples)", samplesWithAttribute > 0);
244-
System.out.println("Samples with OPENCGA_PHARMACOGENOMICS_DATA attribute: "
244+
System.out.println("Samples with OPENCGA_PHARMACOGENOMICS_PATH attribute: "
245245
+ samplesWithAttribute + "/" + samplesChecked);
246246
}
247247
}

opencga-core/src/main/java/org/opencb/opencga/core/models/clinical/pharmacogenomics/cpic/CpicAlleleInfo.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.opencb.opencga.core.models.clinical.pharmacogenomics.cpic;
22

3+
import java.util.List;
34
import java.util.Map;
45

56
/**
@@ -16,12 +17,14 @@ public class CpicAlleleInfo {
1617
private String strength; // evidence strength: "Strong", "Moderate", etc.
1718
private String findings; // functional evidence summary
1819
private Map<String, Double> frequency; // population -> frequency, e.g. {"European": 0.185}
20+
private List<CpicAlleleLocationValue> location; // variant-level locations from /allele_definition endpoint
1921

2022
public CpicAlleleInfo() {
2123
}
2224

2325
public CpicAlleleInfo(String genesymbol, String name, String functionalstatus, String clinicalfunctionalstatus,
24-
String activityvalue, String strength, String findings, Map<String, Double> frequency) {
26+
String activityvalue, String strength, String findings, Map<String, Double> frequency,
27+
List<CpicAlleleLocationValue> location) {
2528
this.genesymbol = genesymbol;
2629
this.name = name;
2730
this.functionalstatus = functionalstatus;
@@ -30,6 +33,7 @@ public CpicAlleleInfo(String genesymbol, String name, String functionalstatus, S
3033
this.strength = strength;
3134
this.findings = findings;
3235
this.frequency = frequency;
36+
this.location = location;
3337
}
3438

3539
@Override
@@ -43,6 +47,7 @@ public String toString() {
4347
sb.append(", strength='").append(strength).append('\'');
4448
sb.append(", findings='").append(findings).append('\'');
4549
sb.append(", frequency=").append(frequency);
50+
sb.append(", location=").append(location);
4651
sb.append('}');
4752
return sb.toString();
4853
}
@@ -118,4 +123,13 @@ public CpicAlleleInfo setFrequency(Map<String, Double> frequency) {
118123
this.frequency = frequency;
119124
return this;
120125
}
126+
127+
public List<CpicAlleleLocationValue> getLocation() {
128+
return location;
129+
}
130+
131+
public CpicAlleleInfo setLocation(List<CpicAlleleLocationValue> location) {
132+
this.location = location;
133+
return this;
134+
}
121135
}

0 commit comments

Comments
 (0)