66import java .util .Vector ;
77import java .util .concurrent .Callable ;
88
9- import org . apache . commons . lang . StringEscapeUtils ;
9+ import genepi . imputationserver . util . OutputWriter ;
1010
1111import genepi .imputationserver .steps .vcf .VcfFile ;
1212import genepi .imputationserver .steps .vcf .VcfFileUtil ;
1313import genepi .imputationserver .util .RefPanel ;
1414import genepi .imputationserver .util .RefPanelPopulation ;
15- import genepi .imputationserver .util .importer .ImporterFactory ;
16- import genepi .imputationserver .util .report .CloudgeneReport ;
17- import genepi .io .FileUtil ;
1815import picocli .CommandLine .Command ;
1916import picocli .CommandLine .Option ;
2017import picocli .CommandLine .Parameters ;
@@ -59,12 +56,12 @@ public class InputValidationCommand implements Callable<Integer> {
5956 private String contactEmail = "n/a" ;
6057
6158 @ Option (names = "--report" , description = "Cloudgene Report Output" , required = false )
62- private String report = "cloudgene.report.json" ;
63-
64- private CloudgeneReport context = new CloudgeneReport ();
59+ private String report = null ;
6560
6661 private RefPanel panel = null ;
6762
63+ private OutputWriter output = null ;
64+
6865 public InputValidationCommand () {
6966 VcfFileUtil .setTabixBinary ("tabix" );
7067 }
@@ -128,17 +125,17 @@ protected void setupTabix(String path) {
128125 @ Override
129126 public Integer call () throws Exception {
130127
131- context .setFilename (report );
128+ if (report != null ) {
129+ output = new OutputWriter (report );
130+ } else {
131+ output = new OutputWriter ();
132+ }
132133
133134 if (panel == null ) {
134135 try {
135136 panel = RefPanel .loadFromJson (reference );
136- if (panel == null ) {
137- context .error ("Reference not found." );
138- return -1 ;
139- }
140137 } catch (Exception e ) {
141- context .error ("Unable to parse reference panel: " + StringEscapeUtils . escapeHtml ( e . getMessage ()) );
138+ output .error ("Unable to parse reference panel:" , e );
142139 return -1 ;
143140 }
144141 }
@@ -147,10 +144,6 @@ public Integer call() throws Exception {
147144 return -1 ;
148145 }
149146
150- if (!importVcfFiles ()) {
151- return -1 ;
152- }
153-
154147 if (!checkVcfFiles ()) {
155148 return -1 ;
156149 } else {
@@ -162,8 +155,6 @@ public Integer call() throws Exception {
162155 private boolean checkVcfFiles () throws Exception {
163156
164157 List <VcfFile > validVcfFiles = new Vector <VcfFile >();
165-
166- context .beginTask ("Analyze files " );
167158 List <String > chromosomes = new Vector <String >();
168159
169160 int chunks = 0 ;
@@ -173,21 +164,9 @@ private boolean checkVcfFiles() throws Exception {
173164 boolean phased = true ;
174165
175166 Collections .sort (files );
176- String infos = null ;
177167
178168 for (String filename : files ) {
179169
180- if (infos == null ) {
181- // first files, no infos available
182- context .updateTask (
183- "Analyze file " + StringEscapeUtils .escapeHtml (FileUtil .getFilename (filename )) + "..." ,
184- CloudgeneReport .RUNNING );
185-
186- } else {
187- context .updateTask ("Analyze file " + StringEscapeUtils .escapeHtml (FileUtil .getFilename (filename ))
188- + "...\n \n " + infos , CloudgeneReport .RUNNING );
189- }
190-
191170 try {
192171
193172 VcfFile vcfFile = VcfFileUtil .load (filename , chunksize , true );
@@ -196,157 +175,130 @@ private boolean checkVcfFiles() throws Exception {
196175 vcfFile .setPhased (true );
197176 }
198177
199- if (VcfFileUtil .isValidChromosome (vcfFile .getChromosome ())) {
200-
201- validVcfFiles .add (vcfFile );
202- chromosomes .add (vcfFile .getChromosome ());
203-
204- String chromosomeString = "" ;
205- for (String chr : chromosomes ) {
206- chromosomeString += " " + chr ;
207- }
208-
209- // check if all files have same amount of samples
210- if (noSamples != 0 && noSamples != vcfFile .getNoSamples ()) {
211- context .endTask (
212- "Please double check, if all uploaded VCF files include the same amount of samples ("
213- + vcfFile .getNoSamples () + " vs " + noSamples + ")" ,
214- CloudgeneReport .ERROR );
215- return false ;
216- }
217-
218- noSamples = vcfFile .getNoSamples ();
219- noSnps += vcfFile .getNoSnps ();
220- chunks += vcfFile .getChunks ().size ();
221-
222- phased = phased && vcfFile .isPhased ();
178+ if (!VcfFileUtil .isValidChromosome (vcfFile .getChromosome ())) {
179+ output .error ("No valid chromosomes found!" );
180+ return false ;
181+ }
223182
224- if (vcfFile .isPhasedAutodetect () && !vcfFile .isPhased ()) {
183+ validVcfFiles .add (vcfFile );
184+ chromosomes .add (vcfFile .getChromosome ());
225185
226- context .endTask (
227- "File should be phased, but also includes unphased and/or missing genotypes! Please double-check!" ,
228- CloudgeneReport .ERROR );
229- return false ;
230- }
186+ // check if all files have same amount of samples
187+ if (noSamples != 0 && noSamples != vcfFile .getNoSamples ()) {
188+ output .error ("Please double check, if all uploaded VCF files include the same amount of samples ("
189+ + vcfFile .getNoSamples () + " vs " + noSamples + ")" );
190+ return false ;
191+ }
231192
232- if (noSamples < minSamples && minSamples != 0 ) {
233- context .endTask ("At least " + minSamples + " samples must be uploaded." , CloudgeneReport .ERROR );
234- return false ;
235- }
193+ noSamples = vcfFile .getNoSamples ();
194+ noSnps += vcfFile .getNoSnps ();
195+ chunks += vcfFile .getChunks ().size ();
236196
237- if ( noSamples > maxSamples && maxSamples != 0 ) {
197+ phased = phased && vcfFile . isPhased ();
238198
239- context .endTask ("The maximum number of samples is " + maxSamples + ". Please contact "
240- + contactName + " (<a href=\" " + contactEmail + "\" >" + contactEmail
241- + "</a>) to discuss this large imputation." , CloudgeneReport .ERROR );
242- return false ;
243- }
199+ if (vcfFile .isPhasedAutodetect () && !vcfFile .isPhased ()) {
200+ output .error ("File should be phased, but also includes unphased and/or missing genotypes! Please double-check!" );
201+ return false ;
202+ }
244203
245- if (build .equals ("hg19" ) && vcfFile .hasChrPrefix ()) {
246- context .endTask ("Your upload data contains chromosome '" + vcfFile .getRawChromosome ()
247- + "'. This is not a valid hg19 encoding. Please ensure that your input data is build hg19 and chromosome is encoded as '"
248- + vcfFile .getChromosome () + "'." , CloudgeneReport .ERROR );
249- return false ;
250- }
204+ if (noSamples < minSamples && minSamples != 0 ) {
205+ output .error ("At least " + minSamples + " samples must be uploaded." );
206+ return false ;
207+ }
251208
252- if (build .equals ("hg38" ) && !vcfFile .hasChrPrefix ()) {
253- context .endTask ("Your upload data contains chromosome '" + vcfFile .getRawChromosome ()
254- + "'. This is not a valid hg38 encoding. Please ensure that your input data is build hg38 and chromosome is encoded as 'chr"
255- + vcfFile .getChromosome () + "'." , CloudgeneReport .ERROR );
256- return false ;
257- }
209+ if (noSamples > maxSamples && maxSamples != 0 ) {
258210
259- infos = "Samples: " + noSamples + " \n " + "Chromosomes:" + chromosomeString + "\n " + "SNPs: "
260- + noSnps + "\n " + "Chunks: " + chunks + "\n " + "Datatype: "
261- + ( phased ? "phased" : "unphased" ) + " \n " + "Build: " + ( build == null ? "hg19" : build )
262- + " \n " + "Reference Panel: " + panel . getId () + " (" + panel . getBuild () + ")" + " \n "
263- + "Population: " + population + " \n " + "Phasing: " + phasing + " \n " + "Mode: " + mode ;
211+ output . error ( "The maximum number of samples is " + maxSamples + ". Please contact "
212+ + contactName + " (<a href= \" " + contactEmail + "\" >" + contactEmail
213+ + "</a>) to discuss this large imputation." );
214+ return false ;
215+ }
264216
265- if (r2Filter != null && !r2Filter .isEmpty () && !r2Filter .equals ("0" )) {
266- infos += "\n Rsq filter: " + r2Filter ;
267- }
217+ if (build .equals ("hg19" ) && vcfFile .hasChrPrefix ()) {
218+ output .error ("Your upload data contains chromosome '" + vcfFile .getRawChromosome ()
219+ + "'. This is not a valid hg19 encoding. Please ensure that your input data is build hg19 and chromosome is encoded as '"
220+ + vcfFile .getChromosome () + "'." );
221+ return false ;
222+ }
268223
269- } else {
270- context .endTask ("No valid chromosomes found!" , CloudgeneReport .ERROR );
224+ if (build .equals ("hg38" ) && !vcfFile .hasChrPrefix ()) {
225+ output .error ("Your upload data contains chromosome '" + vcfFile .getRawChromosome ()
226+ + "'. This is not a valid hg38 encoding. Please ensure that your input data is build hg38 and chromosome is encoded as 'chr"
227+ + vcfFile .getChromosome () + "'." );
271228 return false ;
272229 }
273230
231+
274232 } catch (IOException e ) {
275- context .endTask (StringEscapeUtils .escapeHtml (e .getMessage ())
276- + " (see <a href=\" /start.html#!pages/help\" >Help</a>)." , CloudgeneReport .ERROR );
233+ output .error (e );
277234 return false ;
278-
279235 }
280236
281237 }
282238
283- if (validVcfFiles .size () > 0 ) {
284-
285- context .endTask (validVcfFiles .size () + " valid VCF file(s) found.\n \n " + infos , CloudgeneReport .OK );
286-
287- if (!phased && (phasing == null || phasing .isEmpty () || phasing .equals ("no_phasing" ))) {
288- context .error ("Your input data is unphased. Please select an algorithm for phasing." );
289- return false ;
290- }
291-
292- // init counters
293- context .incCounter ("samples" , noSamples );
294- context .incCounter ("genotypes" , noSamples * noSnps );
295- context .incCounter ("chromosomes" , noSamples * chromosomes .size ());
296- context .incCounter ("runs" , 1 );
297- context .incCounter ("refpanel_" + panel .getId (), 1 );
298- context .incCounter ("phasing_" + "eagle" , 1 );
299- return true ;
300-
301- } else {
239+ if (validVcfFiles .isEmpty ()) {
240+ output .error ("The provided files are not VCF files (see <a href=\" /start.html#!pages/help\" >Help</a>)." );
241+ return false ;
242+ }
302243
303- context . endTask ( "The provided files are not VCF files (see <a href= \" /start.html#!pages/help \" >Help</a>)." ,
304- CloudgeneReport . ERROR );
244+ if (! phased && ( phasing == null || phasing . isEmpty () || phasing . equals ( "no_phasing" ))) {
245+ output . error ( "Your input data is unphased. Please select an algorithm for phasing." );
305246 return false ;
306247 }
248+
249+ List <String > summary = new Vector <String >();
250+ summary .add (validVcfFiles .size () + " valid VCF file(s) found." );
251+ summary .add ("" );
252+ summary .add ("Samples: " + noSamples );
253+ summary .add ("Chromosomes: " + String .join (" " , chromosomes ));
254+ summary .add ("SNPs: " + noSnps );
255+ summary .add ("Chunks: " + chunks );
256+ summary .add ("Datatype: " + (phased ? "phased" : "unphased" ));
257+ summary .add ("Build: " + (build == null ? "hg19" : build ));
258+ summary .add ("Reference Panel: " + panel .getId () + " (" + panel .getBuild () + ")" );
259+ summary .add ("Population: " + population );
260+ summary .add ("Phasing: " + phasing );
261+ summary .add ("Mode: " + mode );
262+ if (r2Filter != null && !r2Filter .isEmpty () && !r2Filter .equals ("0" )) {
263+ summary .add ("Rsq filter: " + r2Filter );
264+ }
265+ output .message (summary );
266+
267+ // init counters
268+ output .print ("" );
269+ output .setCounter ("samples" , noSamples );
270+ output .setCounter ("genotypes" , noSamples * noSnps );
271+ output .setCounter ("chromosomes" , noSamples * chromosomes .size ());
272+ output .setCounter ("runs" , 1 );
273+ output .setCounter ("refpanel_" + panel .getId (), 1 );
274+ output .setCounter ("phasing_" + "eagle" , 1 ); //phasing?
275+ return true ;
276+
307277 }
308278
309279 private boolean checkParameters () throws Exception {
310280
311281 try {
312282
313283 if (!panel .supportsPopulation (population )) {
314- StringBuilder report = new StringBuilder ();
315- report .append ("Population '" + population + "' is not supported by reference panel '" + panel .getId ()
316- + "'.\n " );
284+ List <String > messages = new Vector <String >();
285+ messages .add ("Population '" + population + "' is not supported by reference panel '" + panel .getId () + "'." );
317286 if (panel .getPopulations () != null ) {
318- report . append ("Available populations:" );
287+ messages . add ("Available populations:" );
319288 for (RefPanelPopulation pop : panel .getPopulations ()) {
320- report . append ( " \n - " + pop .getId ());
289+ messages . add ( " - " + pop .getId ());
321290 }
322291 }
323- context .error (report . toString () );
292+ output .error (messages );
324293 return false ;
325294 }
326295
327296 } catch (Exception e ) {
328- context .error ("Unable to parse reference panel: " + StringEscapeUtils . escapeHtml ( e . getMessage ()) );
297+ output .error ("Unable to parse reference panel. " , e );
329298 return false ;
330299 }
331300
332301 return true ;
333302 }
334303
335- private boolean importVcfFiles () throws Exception {
336-
337- for (String input : files ) {
338-
339- if (ImporterFactory .needsImport (input )) {
340-
341- context .log ("URL-based uploads are no longer supported. Please use direct file uploads instead." );
342- context .error ("URL-based uploads are no longer supported. Please use direct file uploads instead." );
343- return false ;
344- }
345-
346- }
347-
348- return true ;
349-
350- }
351-
352304}
0 commit comments