Skip to content

Commit 318a7af

Browse files
authored
Merge pull request #2624 from opencb/TASK-7645
TASK-7645 - Integrated Clinical Preprocessing, Variant Assessment, and Reporting
2 parents 2dc7e58 + 2221538 commit 318a7af

284 files changed

Lines changed: 12786 additions & 1766 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ make_env
2020
*.so.*
2121
*.out
2222
*.log
23+
*.error
24+
*.errors
2325
*.zip
2426
# Maven
2527
target/

opencga-analysis/pom.xml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,21 @@
287287
</exclusion>
288288
</exclusions>
289289
</dependency>
290+
<dependency>
291+
<groupId>de.flapdoodle.embed</groupId>
292+
<artifactId>de.flapdoodle.embed.mongo</artifactId>
293+
<scope>test</scope>
294+
</dependency>
295+
<dependency>
296+
<groupId>de.flapdoodle.embed</groupId>
297+
<artifactId>de.flapdoodle.embed.process</artifactId>
298+
<scope>test</scope>
299+
</dependency>
300+
<dependency>
301+
<groupId>de.flapdoodle.reverse</groupId>
302+
<artifactId>de.flapdoodle.reverse</artifactId>
303+
<scope>test</scope>
304+
</dependency>
290305
<dependency>
291306
<groupId>org.yaml</groupId>
292307
<artifactId>snakeyaml</artifactId>

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ public OpenCGAResult<ClinicalVariant> get(Query query, QueryOptions queryOptions
314314
// Only it's updated the following fields
315315
// Important to note that the results include the "new" clinical evidences
316316
clinicalVariant.setComments(primaryFinding.getComments())
317-
.setFilters(primaryFinding.getFilters())
317+
.setFilter(primaryFinding.getFilter())
318318
.setDiscussion(primaryFinding.getDiscussion())
319319
.setStatus(primaryFinding.getStatus())
320320
.setAttributes(primaryFinding.getAttributes());

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/InterpretationAnalysis.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.commons.collections4.MapUtils;
2121
import org.opencb.biodata.models.clinical.ClinicalAnalyst;
2222
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariant;
23+
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariantFilter;
2324
import org.opencb.biodata.models.clinical.interpretation.InterpretationMethod;
2425
import org.opencb.biodata.models.clinical.interpretation.Software;
2526
import org.opencb.commons.datastore.core.ObjectMap;
@@ -134,10 +135,10 @@ protected void saveInterpretation(String name, String description, ParamUtils.Sa
134135
+ SECONDARY_FINDINGS_FILENAME));
135136

136137
for (ClinicalVariant primaryFinding : primaryFindings) {
137-
primaryFinding.setFilters(query);
138+
primaryFinding.setFilter(new ClinicalVariantFilter(query, GitRepositoryState.getInstance().getBuildVersion(), ""));
138139
}
139140
for (ClinicalVariant secondaryFinding : secondaryFindings) {
140-
secondaryFinding.setFilters(query);
141+
secondaryFinding.setFilter(new ClinicalVariantFilter(query, GitRepositoryState.getInstance().getBuildVersion(), ""));
141142
}
142143

143144
org.opencb.biodata.models.clinical.interpretation.Interpretation interpretation = new Interpretation()

opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserClinicalVariantCreator.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
import org.opencb.biodata.models.clinical.ClinicalAcmg;
66
import org.opencb.biodata.models.clinical.ClinicalDiscussion;
77
import org.opencb.biodata.models.clinical.ClinicalProperty;
8-
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariant;
9-
import org.opencb.biodata.models.clinical.interpretation.ClinicalVariantEvidence;
10-
import org.opencb.biodata.models.clinical.interpretation.GenomicFeature;
11-
import org.opencb.biodata.models.clinical.interpretation.VariantClassification;
8+
import org.opencb.biodata.models.clinical.interpretation.*;
129
import org.opencb.biodata.models.variant.Variant;
1310
import org.opencb.biodata.models.variant.avro.ConsequenceType;
1411
import org.opencb.biodata.models.variant.avro.GeneCancerAssociation;
@@ -36,8 +33,10 @@ public ClinicalVariant create(Variant variant) {
3633
return null;
3734
}
3835

39-
return new ClinicalVariant(variant.getImpl(), new ArrayList<>(), new ArrayList<>(), new HashMap<>(), new ClinicalDiscussion(),
40-
null, ClinicalVariant.Status.NOT_REVIEWED, new ArrayList<>(), new HashMap<>());
36+
return new ClinicalVariant(variant.getImpl(), new ArrayList<>(), new ArrayList<>(),
37+
new ClinicalVariantFilter(new HashMap<>(), "", ""), new ArrayList<>(), "", new ArrayList<>(), new ClinicalDiscussion(),
38+
new ClinicalVariantConfidence(), new ArrayList<>(), ClinicalVariant.Status.NOT_REVIEWED, new ArrayList<>(),
39+
new ArrayList<>(), new HashMap<>());
4140
}
4241

4342
public void addClinicalVariantEvidences(ClinicalVariant clinicalVariant, List<ExomiserTranscriptAnnotation> exomiserTranscripts,

opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/ToolFactory.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
public class ToolFactory {
4242
private static final Logger logger = LoggerFactory.getLogger(ToolFactory.class);
4343
private static Map<String, Class<? extends OpenCgaTool>> toolsCache;
44+
private static Set<String> toolsCachePackages;
4445
private static Map<String, Set<Class<? extends OpenCgaTool>>> duplicatedTools;
4546
private static List<Class<? extends OpenCgaTool>> toolsList;
4647

@@ -60,7 +61,7 @@ private static void loadTools(Analysis analysisConf) {
6061
}
6162

6263
private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(List<String> packages) {
63-
if (toolsCache == null) {
64+
if (isCacheOutdated(packages)) {
6465
Reflections reflections = new Reflections(new ConfigurationBuilder()
6566
.setScanners(
6667
new SubTypesScanner(),
@@ -104,9 +105,31 @@ private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(
104105
ToolFactory.toolsList = Collections.unmodifiableList(toolsList);
105106
ToolFactory.duplicatedTools = Collections.unmodifiableMap(duplicatedTools);
106107
ToolFactory.toolsCache = cache;
108+
// And add packages to the cache
109+
if (ToolFactory.toolsCachePackages == null) {
110+
ToolFactory.toolsCachePackages = new HashSet<>();
111+
}
112+
ToolFactory.toolsCachePackages.addAll(packages);
107113
}
108114
return toolsCache;
109115
}
116+
117+
private static boolean isCacheOutdated(List<String> packages) {
118+
if (toolsCache == null || toolsCachePackages == null) {
119+
// Cache is empty
120+
return true;
121+
}
122+
123+
Set<String> packageSet = new HashSet<>(packages);
124+
if (!toolsCachePackages.containsAll(packageSet)) {
125+
// There is at least one package missing in the cache
126+
return true;
127+
}
128+
129+
// Cache is up to date
130+
return false;
131+
}
132+
110133
static Collection<URL> getUrlsFromPackages(List<String> packages) {
111134
Collection<URL> urls = new LinkedList<>();
112135
for (String pack :packages){

opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ public static SegregationMode parse(String name) {
199199

200200
private final StudyFilterValidator studyFilterValidator;
201201
private final FileFilterValidator fileFilterValidator;
202+
private final FileDataValidator fileDataValidator;
202203
private final SampleFilterValidator sampleFilterValidator;
203204
private final CohortFilterValidator cohortFilterValidator;
204205
// public static final QueryParam SAMPLE_FILTER_GENOTYPE = QueryParam.create("sampleFilterGenotype", "", QueryParam.Type.TEXT_ARRAY);
@@ -208,6 +209,7 @@ public VariantCatalogQueryUtils(CatalogManager catalogManager) {
208209
super(catalogManager);
209210
studyFilterValidator = new StudyFilterValidator();
210211
fileFilterValidator = new FileFilterValidator();
212+
fileDataValidator = new FileDataValidator();
211213
sampleFilterValidator = new SampleFilterValidator();
212214
cohortFilterValidator = new CohortFilterValidator();
213215
}
@@ -279,7 +281,7 @@ public Query parseQuery(Query query, QueryOptions queryOptions, CellBaseUtils ce
279281
sampleFilterValidator.processFilter(query, VariantQueryParam.GENOTYPE, release, token, defaultStudyStr);
280282
fileFilterValidator.processFilter(query, VariantQueryParam.FILE, release, token, defaultStudyStr);
281283
fileFilterValidator.processFilter(query, VariantQueryParam.INCLUDE_FILE, release, token, defaultStudyStr);
282-
fileFilterValidator.processFilter(query, VariantQueryParam.FILE_DATA, release, token, defaultStudyStr);
284+
fileDataValidator.processFilter(query, VariantQueryParam.FILE_DATA, release, token, defaultStudyStr);
283285
cohortFilterValidator.processFilter(query, VariantQueryParam.COHORT, release, token, defaultStudyStr);
284286
cohortFilterValidator.processFilter(query, VariantQueryParam.STATS_ALT, release, token, defaultStudyStr);
285287
cohortFilterValidator.processFilter(query, VariantQueryParam.STATS_REF, release, token, defaultStudyStr);
@@ -950,7 +952,7 @@ private String toIndividualId(String study, String individuaOrSample, String tok
950952
}
951953

952954
private Region processSampleFilter(Query query, String defaultStudyStr, CellBaseUtils cellBaseUtils,
953-
ParsedVariantQuery.VariantQueryXref xrefs, String token) throws CatalogException {
955+
ParsedVariantQuery.VariantQueryXref xrefs, String token) throws CatalogException {
954956
Region segregationChromosome = null;
955957
String sampleFilterValue = query.getString(SAMPLE.key());
956958
if (sampleFilterValue.contains(IS)) {
@@ -1392,10 +1394,18 @@ public static String toStorageFileName(File file) {
13921394
}
13931395

13941396
public static String toStorageFilePath(File file) {
1395-
return file.getUri().getPath();
1397+
switch (file.getType()) {
1398+
case FILE:
1399+
return file.getUri().getPath();
1400+
case VIRTUAL:
1401+
return file.getPath();
1402+
case DIRECTORY:
1403+
default:
1404+
throw new IllegalArgumentException("Unexpected file type: " + file.getType());
1405+
}
13961406
}
13971407

1398-
public abstract class FilterValidator {
1408+
public abstract static class FilterValidator {
13991409
protected final QueryOptions RELEASE_OPTIONS = new QueryOptions(INCLUDE, Arrays.asList(
14001410
FileDBAdaptor.QueryParams.ID.key(),
14011411
FileDBAdaptor.QueryParams.NAME.key(),
@@ -1557,6 +1567,35 @@ protected List<String> validate(String defaultStudyStr, List<String> values, Int
15571567

15581568
}
15591569

1570+
public class FileDataValidator extends FileFilterValidator {
1571+
@Override
1572+
protected void processFilter(Query query, VariantQueryParam param, Integer release, String sessionId, String defaultStudy)
1573+
throws CatalogException {
1574+
if (param != VariantQueryParam.FILE_DATA) {
1575+
throw new IllegalStateException("Unexpected param '" + param + "' in FileDataValidator");
1576+
}
1577+
if (VariantQueryUtils.isValidParam(query, param)) {
1578+
String value = query.getString(FILE_DATA.key());
1579+
if (!value.contains(IS)) {
1580+
// File data does not contain key-value pairs.
1581+
// Validation only applies to keys (ie file identifiers)
1582+
return;
1583+
}
1584+
ParsedQuery<KeyValues<String, KeyOpValue<String, String>>> fileData = parseFileData(query);
1585+
1586+
List<String> valuesToValidate = fileData.mapValues(KeyValues::getKey);
1587+
List<String> validatedValues = validate(defaultStudy, valuesToValidate, release, param, sessionId);
1588+
1589+
for (int i = 0; i < fileData.getValues().size(); i++) {
1590+
fileData.getValues().get(i).setKey(validatedValues.get(i));
1591+
}
1592+
1593+
String newValue = fileData.toQuery();
1594+
query.put(param.key(), newValue);
1595+
}
1596+
}
1597+
}
1598+
15601599
public class SampleFilterValidator extends FilterValidator {
15611600

15621601
@Override

opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/metadata/CatalogStorageMetadataSynchronizer.java

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.opencb.opencga.catalog.utils.ParamUtils;
3838
import org.opencb.opencga.core.api.ParamConstants;
3939
import org.opencb.opencga.core.common.BatchUtils;
40+
import org.opencb.opencga.core.common.UriUtils;
4041
import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
4142
import org.opencb.opencga.core.config.storage.SampleIndexConfiguration;
4243
import org.opencb.opencga.core.models.cohort.Cohort;
@@ -735,8 +736,8 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
735736
for (Integer virtualFile : virtualFiles) {
736737
File file = catalogManager.getFileManager()
737738
.get(study.getName(), filePathMap.get(virtualFile), INDEXED_FILES_QUERY_OPTIONS, token).first();
738-
boolean annotationIndexReady = annotationReadyFilesFromStorage.contains(file.getUri());
739-
boolean secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(file.getUri());
739+
boolean annotationIndexReady = annotationReadyFilesFromStorage.contains(getFileUri(file));
740+
boolean secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(getFileUri(file));
740741
if (synchronizeIndexedFile(study, file, fileSamplesMap, annotationIndexReady, secondaryIndexReady, token, true)) {
741742
modified = true;
742743
}
@@ -760,16 +761,16 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
760761
.iterator(study.getName(), query, INDEXED_FILES_QUERY_OPTIONS, token)) {
761762
while (iterator.hasNext()) {
762763
File file = iterator.next();
763-
boolean annotationIndexReady = annotationReadyFilesFromStorage.contains(file.getUri());
764-
boolean secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(file.getUri());
764+
boolean annotationIndexReady = annotationReadyFilesFromStorage.contains(getFileUri(file));
765+
boolean secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(getFileUri(file));
765766
if (synchronizeIndexedFile(study, file, fileSamplesMap, annotationIndexReady, secondaryIndexReady, token, true)) {
766767
modifiedFiles++;
767768
modified = true;
768769
}
769770

770771
// Remove processed file from list of uris.
771772
// Avoid double processing in case of exception
772-
batch.remove(file.getUri().toString());
773+
batch.remove(getFileUri(file).toString());
773774
numFiles++;
774775
processedFilesInBatch++;
775776
progressLogger.increment(1, modifiedFiles + " updated files");
@@ -818,7 +819,7 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
818819
.iterator(study.getName(), indexedFilesQuery, INDEXED_FILES_QUERY_OPTIONS, token)) {
819820
while (iterator.hasNext()) {
820821
File file = iterator.next();
821-
Integer fileId = fileURIMap.get(file.getUri());
822+
Integer fileId = fileURIMap.get(getFileUri(file));
822823
if (fileId == null || !indexedFilesFromStorage.contains(fileId)) {
823824
// Check for annotation index and secondary annotation index
824825
boolean annotationIndexReady;
@@ -827,8 +828,8 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
827828
annotationIndexReady = false;
828829
secondaryIndexReady = false;
829830
} else {
830-
annotationIndexReady = annotationReadyFilesFromStorage.contains(file.getUri());
831-
secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(file.getUri());
831+
annotationIndexReady = annotationReadyFilesFromStorage.contains(getFileUri(file));
832+
secondaryIndexReady = secondaryIndexReadyFilesFromStorage.contains(getFileUri(file));
832833
}
833834
synchronizeIndexedFile(study, file, fileSamplesMap, annotationIndexReady, secondaryIndexReady, token, false);
834835
modified = true;
@@ -854,7 +855,7 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
854855
.iterator(study.getName(), runningIndexFilesQuery, INDEXED_FILES_QUERY_OPTIONS, token)) {
855856
while (iterator.hasNext()) {
856857
File file = iterator.next();
857-
Integer fileId = fileURIMap.get(file.getUri());
858+
Integer fileId = fileURIMap.get(getFileUri(file));
858859
FileMetadata fileMetadata;
859860
if (fileId == null) {
860861
fileMetadata = null;
@@ -893,10 +894,10 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
893894
modified = true;
894895
} else {
895896
// Running job. Might be transforming, or have just started. Do not modify the status!
896-
loadingFilesRegardingCatalog.add(file.getUri());
897+
loadingFilesRegardingCatalog.add(getFileUri(file));
897898
}
898899
} else {
899-
loadingFilesRegardingCatalog.add(file.getUri());
900+
loadingFilesRegardingCatalog.add(getFileUri(file));
900901
}
901902
}
902903
}
@@ -941,6 +942,14 @@ protected boolean synchronizeFiles(StudyMetadata study, List<File> files, String
941942
return modified;
942943
}
943944

945+
private static URI getFileUri(File file) {
946+
if (file.getType() == File.Type.VIRTUAL) {
947+
return UriUtils.toUri(file.getPath());
948+
} else {
949+
return file.getUri();
950+
}
951+
}
952+
944953
private boolean synchronizeIndexedFile(StudyMetadata study, File file, Map<URI, Set<String>> fileSamplesMap,
945954
boolean annotationIndexReady, boolean secondaryIndexReady, String token, boolean mainIndexReady)
946955
throws CatalogException {
@@ -996,7 +1005,7 @@ private boolean synchronizeIndexedFile(StudyMetadata study, File file, Map<URI,
9961005
modified = true;
9971006
}
9981007

999-
Set<String> storageSamples = fileSamplesMap.get(file.getUri());
1008+
Set<String> storageSamples = fileSamplesMap.get(getFileUri(file));
10001009
Set<String> catalogSamples = new HashSet<>(file.getSampleIds());
10011010
if (storageSamples == null) {
10021011
storageSamples = new HashSet<>();

0 commit comments

Comments
 (0)