Skip to content

Commit 6fb1a39

Browse files
committed
analysis: update Individual catalog with PGx summary after executor completes #TASK-8076
Add second step to OpenArrayPharmacogenomicsAnalysis that reads summary JSON files from outdir and updates the Individual entity in catalog with pharmacogenomics analysis results. Handles duplicate prevention by replacing existing entries for the same sampleId+source.
1 parent b7f0390 commit 6fb1a39

1 file changed

Lines changed: 127 additions & 1 deletion

File tree

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/pharmacogenomics/OpenArrayPharmacogenomicsAnalysis.java

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,34 @@
11
package org.opencb.opencga.analysis.clinical.pharmacogenomics;
22

3+
import com.fasterxml.jackson.databind.ObjectMapper;
34
import org.apache.commons.lang3.StringUtils;
5+
import org.opencb.commons.datastore.core.QueryOptions;
46
import org.opencb.opencga.analysis.AnalysisUtils;
57
import org.opencb.opencga.analysis.tools.OpenCgaTool;
8+
import org.opencb.opencga.catalog.exceptions.CatalogException;
69
import org.opencb.opencga.catalog.managers.FileManager;
10+
import org.opencb.opencga.core.common.JacksonUtils;
711
import org.opencb.opencga.core.exceptions.ToolException;
812
import org.opencb.opencga.core.models.clinical.pharmacogenomics.OpenArrayPharmacogenomicsAnalysisParams;
13+
import org.opencb.opencga.core.models.clinical.pharmacogenomics.PharmacogenomicsAnalysis;
14+
import org.opencb.opencga.core.models.clinical.pharmacogenomics.PharmacogenomicsAnalysisSummary;
915
import org.opencb.opencga.core.models.common.Enums;
16+
import org.opencb.opencga.core.models.individual.Individual;
17+
import org.opencb.opencga.core.models.individual.IndividualAnalysis;
18+
import org.opencb.opencga.core.models.individual.IndividualUpdateParams;
19+
import org.opencb.opencga.core.models.sample.Sample;
20+
import org.opencb.opencga.core.response.OpenCGAResult;
1021
import org.opencb.opencga.core.tools.annotations.Tool;
1122
import org.opencb.opencga.core.tools.annotations.ToolParams;
1223

24+
import java.io.IOException;
25+
import java.nio.file.DirectoryStream;
26+
import java.nio.file.Files;
27+
import java.nio.file.Path;
28+
import java.util.ArrayList;
29+
import java.util.Arrays;
30+
import java.util.List;
31+
1332
/**
1433
* OpenArray pharmacogenomics analysis tool.
1534
* Executes the Python pharmacogenomics CLI via Docker to infer star alleles
@@ -23,6 +42,9 @@ public class OpenArrayPharmacogenomicsAnalysis extends OpenCgaTool {
2342
public static final String DESCRIPTION = "OpenArray pharmacogenomics analysis: infer star alleles from "
2443
+ "ThermoFisher OpenArray genotyping data and optionally annotate with CPIC";
2544

45+
private static final String STEP_EXECUTE = "execute";
46+
private static final String STEP_UPDATE_CATALOG = "update-catalog";
47+
2648
@ToolParams
2749
protected final OpenArrayPharmacogenomicsAnalysisParams analysisParams = new OpenArrayPharmacogenomicsAnalysisParams();
2850

@@ -75,9 +97,15 @@ protected void check() throws Exception {
7597
setUpStorageEngineExecutor(study);
7698
}
7799

100+
@Override
101+
protected List<String> getSteps() {
102+
return Arrays.asList(STEP_EXECUTE, STEP_UPDATE_CATALOG);
103+
}
104+
78105
@Override
79106
protected void run() throws ToolException {
80-
step(ID, () -> {
107+
// Step 1: Execute the Python pharmacogenomics tool via Docker
108+
step(STEP_EXECUTE, () -> {
81109
OpenArrayPharmacogenomicsAnalysisExecutor executor =
82110
getToolExecutor(OpenArrayPharmacogenomicsAnalysisExecutor.class);
83111

@@ -89,5 +117,103 @@ protected void run() throws ToolException {
89117
.setAnnotate(Boolean.TRUE.equals(analysisParams.getAnnotate()))
90118
.execute();
91119
});
120+
121+
// Step 2: Read summary JSON files and update Individual entities in catalog
122+
step(STEP_UPDATE_CATALOG, this::updateCatalog);
123+
}
124+
125+
/**
126+
* Read *_summary.json files from the output directory and update the corresponding
127+
* Individual entities in the catalog with the pharmacogenomics analysis results.
128+
*/
129+
private void updateCatalog() throws IOException, CatalogException {
130+
ObjectMapper objectMapper = JacksonUtils.getDefaultObjectMapper();
131+
Path outDir = getOutDir();
132+
String outDirPath = outDir.toAbsolutePath().toString();
133+
134+
// Find all *_summary.json files in the output directory
135+
try (DirectoryStream<Path> stream = Files.newDirectoryStream(outDir, "*_summary.json")) {
136+
for (Path summaryFile : stream) {
137+
String filename = summaryFile.getFileName().toString();
138+
// Extract sampleId: <sampleId>_summary.json
139+
String sampleId = filename.replace("_summary.json", "");
140+
141+
// Skip control samples
142+
if ("NTC".equalsIgnoreCase(sampleId) || sampleId.startsWith("CALT")
143+
|| sampleId.startsWith("CHET") || sampleId.startsWith("CREF")) {
144+
logger.debug("Skipping control sample: {}", sampleId);
145+
continue;
146+
}
147+
148+
// Read summary JSON
149+
PharmacogenomicsAnalysisSummary summary = objectMapper.readValue(
150+
summaryFile.toFile(), PharmacogenomicsAnalysisSummary.class);
151+
152+
// Build the full results file path (catalog-relative)
153+
String resultsFilePath = outDirPath + "/" + sampleId + ".json";
154+
155+
// Build PharmacogenomicsAnalysis object
156+
PharmacogenomicsAnalysis pgxAnalysis = new PharmacogenomicsAnalysis(
157+
sampleId, "openarray", resultsFilePath, summary);
158+
159+
// Find the Individual that has this sample
160+
updateIndividualBySample(sampleId, pgxAnalysis);
161+
}
162+
}
163+
}
164+
165+
/**
166+
* Find the Individual linked to the given sampleId and update its analysis.pharmacogenomics field.
167+
*/
168+
private void updateIndividualBySample(String sampleId, PharmacogenomicsAnalysis pgxAnalysis) throws CatalogException {
169+
// Search for the sample to find the linked individual
170+
OpenCGAResult<Sample> sampleResult = catalogManager.getSampleManager()
171+
.get(study, sampleId, QueryOptions.empty(), token);
172+
173+
if (sampleResult.getNumResults() == 0) {
174+
logger.warn("Sample '{}' not found in study '{}'. Skipping catalog update.", sampleId, study);
175+
return;
176+
}
177+
178+
String individualId = sampleResult.first().getIndividualId();
179+
if (StringUtils.isEmpty(individualId)) {
180+
logger.warn("Sample '{}' is not linked to an individual. Skipping catalog update.", sampleId);
181+
return;
182+
}
183+
184+
// Get current individual to preserve existing pharmacogenomics entries
185+
OpenCGAResult<Individual> individualResult = catalogManager.getIndividualManager()
186+
.get(study, individualId, QueryOptions.empty(), token);
187+
188+
if (individualResult.getNumResults() == 0) {
189+
logger.warn("Individual '{}' not found in study '{}'. Skipping catalog update.", individualId, study);
190+
return;
191+
}
192+
193+
Individual individual = individualResult.first();
194+
IndividualAnalysis analysis = individual.getAnalysis();
195+
if (analysis == null) {
196+
analysis = new IndividualAnalysis();
197+
}
198+
199+
List<PharmacogenomicsAnalysis> pgxList = analysis.getPharmacogenomics();
200+
if (pgxList == null) {
201+
pgxList = new ArrayList<>();
202+
}
203+
204+
// Remove any existing entry for the same sampleId+source to avoid duplicates
205+
pgxList.removeIf(existing -> sampleId.equals(existing.getSampleId())
206+
&& "openarray".equals(existing.getSource()));
207+
208+
// Add the new entry
209+
pgxList.add(pgxAnalysis);
210+
analysis.setPharmacogenomics(pgxList);
211+
212+
// Update the individual
213+
IndividualUpdateParams updateParams = new IndividualUpdateParams();
214+
updateParams.setAnalysis(analysis);
215+
216+
catalogManager.getIndividualManager().update(study, individualId, updateParams, QueryOptions.empty(), token);
217+
logger.info("Updated individual '{}' with pharmacogenomics results for sample '{}'", individualId, sampleId);
92218
}
93219
}

0 commit comments

Comments
 (0)