Skip to content

Commit e891447

Browse files
author
Sam Hokin
committed
Require that the six main files all be present.
1 parent cdc6883 commit e891447

1 file changed

Lines changed: 20 additions & 12 deletions

File tree

lis-annotation/src/main/java/org/intermine/bio/dataconversion/AnnotationFileConverter.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,12 @@ public class AnnotationFileConverter extends DatastoreFileConverter {
8686
// for distinguishing chromosomes from supercontigs
8787
DatastoreUtils dsu;
8888

89-
// count non-README files processed
90-
int processedCount = 0;
89+
// we require the main six files to store anything (pathway file is optional)
90+
boolean cdsFileExists = false;
91+
boolean mrnaFileExists = false;
92+
boolean proteinFileExists = false;
93+
boolean gfaFileExists = false;
94+
boolean gff3FileExists = false;
9195

9296
// map GFF types to InterMine classes; be sure to include extras in the additions file!
9397
Map<String,String> featureClasses = Map.ofEntries(entry("gene", "Gene"),
@@ -133,27 +137,26 @@ public void process(Reader reader) throws IOException {
133137
} else if (getCurrentFile().getName().endsWith(".gene_models_main.gff3.gz")) {
134138
System.out.println("## Processing "+getCurrentFile().getName());
135139
processGFF3File();
136-
processedCount++;
140+
gff3FileExists = true;
137141
} else if (getCurrentFile().getName().endsWith(".gfa.tsv.gz")) {
138142
System.out.println("## Processing "+getCurrentFile().getName());
139143
processGFAFile();
140-
processedCount++;
144+
gfaFileExists = true;
141145
} else if (getCurrentFile().getName().endsWith(".pathway.tsv.gz")) {
142146
System.out.println("## Processing "+getCurrentFile().getName());
143147
processPathwayFile();
144-
processedCount++;
145148
} else if (getCurrentFile().getName().endsWith(".protein.faa.gz") || getCurrentFile().getName().endsWith(".protein_primary.faa.gz")) {
146149
System.out.println("## Processing "+getCurrentFile().getName());
147150
processProteinFasta();
148-
processedCount++;
151+
proteinFileExists = true;
149152
} else if (getCurrentFile().getName().endsWith(".cds.fna.gz") || getCurrentFile().getName().endsWith(".cds_primary.fna.gz")) {
150153
System.out.println("## Processing "+getCurrentFile().getName());
151154
processCDSFasta();
152-
processedCount++;
155+
cdsFileExists = true;
153156
} else if (getCurrentFile().getName().endsWith(".mrna.fna.gz") || getCurrentFile().getName().endsWith(".mrna_primary.fna.gz")) {
154157
System.out.println("## Processing "+getCurrentFile().getName());
155158
processMRNAFasta();
156-
processedCount++;
159+
mrnaFileExists = true;
157160
} else {
158161
System.out.println("## Skipping file "+getCurrentFile().getName());
159162
}
@@ -165,10 +168,15 @@ public void process(Reader reader) throws IOException {
165168
@Override
166169
public void close() throws ObjectStoreException, RuntimeException {
167170
if (readme==null) {
168-
throw new RuntimeException("README file not read. Aborting.");
169-
}
170-
if (processedCount==0) {
171-
throw new RuntimeException("No non-README files were found. Aborting.");
171+
throw new RuntimeException("README file not found. Aborting.");
172+
}
173+
if (!cdsFileExists) System.err.println("ERROR: cds FASTA file is missing.");
174+
if (!mrnaFileExists) System.err.println("ERROR: mrna FASTA file is missing.");
175+
if (!proteinFileExists) System.err.println("ERROR: protein FASTA file is missing.");
176+
if (!gfaFileExists) System.err.println("ERROR: gfa file is missing.");
177+
if (!gff3FileExists) System.err.println("ERROR: GFF3 file is missing.");
178+
if (!cdsFileExists || !mrnaFileExists || !proteinFileExists || !gfaFileExists || !gff3FileExists) {
179+
throw new RuntimeException("Missing required annotation file(s). Aborting.");
172180
}
173181
// set references and collections for objects loaded from FASTAs based on matching identifiers
174182
for (String primaryIdentifier : cdses.keySet()) {

0 commit comments

Comments
 (0)