@@ -125,6 +125,7 @@ def parse_arguments():
125125
126126 # Add extra arguments.
127127 setattr (args , 'date' , date )
128+ setattr (args , 'min_mem' , 150 )
128129 shared .expand_arguments (args )
129130
130131 return (args )
@@ -181,7 +182,7 @@ def download_prot_accession2taxid_file(
181182 message = 'Download complete!'
182183 shared .give_user_feedback (message , log_file , quiet )
183184
184- return prot_accession2taxid_file
185+ return
185186
186187
187188def download_nr (nr_file , log_file , quiet ):
@@ -210,7 +211,8 @@ def make_diamond_database(
210211 diamond_database_prefix ,
211212 nproc ,
212213 log_file ,
213- quiet ):
214+ quiet ,
215+ verbose ):
214216 message = (
215217 'Constructing DIAMOND database {0}.dmnd from {1} using {2} cores. '
216218 'Please be patient...' .format (
@@ -221,8 +223,11 @@ def make_diamond_database(
221223 path_to_diamond , 'makedb' ,
222224 '--in' , nr_file ,
223225 '-d' , diamond_database_prefix ,
224- '-p' , str (nproc ),
225- '--quiet' ]
226+ '-p' , str (nproc )]
227+
228+ if not verbose :
229+ command += ['--quiet' ]
230+
226231 try :
227232 subprocess .check_call (command )
228233 except :
@@ -255,15 +260,14 @@ def import_prot_accession2taxid(prot_accession2taxid_file, log_file, quiet):
255260
256261
257262def make_fastaid2LCAtaxid_file (
258- taxonomy_folder ,
263+ nodes_dmp ,
259264 fastaid2LCAtaxid_file ,
260265 nr_file ,
261266 prot_accession2taxid_file ,
262267 log_file ,
263268 quiet ):
264269 prot_accession2taxid = import_prot_accession2taxid (
265270 prot_accession2taxid_file , log_file , quiet )
266- nodes_dmp = '{0}/nodes.dmp' .format (taxonomy_folder )
267271 (taxid2parent , taxid2rank ) = tax .import_nodes (nodes_dmp , log_file , quiet )
268272
269273 message = ('Finding LCA of all protein accession numbers in fasta headers '
@@ -316,10 +320,8 @@ def make_fastaid2LCAtaxid_file(
316320 # numbers, it is counted as a correction as well.
317321 corrected += 1
318322
319- message = (
320- 'Done! File {0} is created. '
321- '{1} of {2} headers ({3:.1f}%) corrected. Please wait patiently '
322- 'for Python to collect garbage.' .format (
323+ message = ('Done! File {0} is created. '
324+ '{1} of {2} headers ({3:.1f}%) corrected.' .format (
323325 fastaid2LCAtaxid_file ,
324326 corrected ,
325327 total ,
@@ -329,8 +331,7 @@ def make_fastaid2LCAtaxid_file(
329331 return
330332
331333
332- def find_offspring (taxonomy_folder , fastaid2LCAtaxid_file , log_file , quiet ):
333- nodes_dmp = '{0}/nodes.dmp' .format (taxonomy_folder )
334+ def find_offspring (nodes_dmp , fastaid2LCAtaxid_file , log_file , quiet ):
334335 (taxid2parent , taxid2rank ) = tax .import_nodes (nodes_dmp , log_file , quiet )
335336
336337 message = 'Searching nr database for taxids with multiple offspring.'
@@ -376,41 +377,79 @@ def write_taxids_with_multiple_offspring_file(
376377def prepare (step_list , args ):
377378 shared .print_variables (args , step_list )
378379
380+ if not os .path .isdir (args .taxonomy_folder ):
381+ os .mkdir (args .taxonomy_folder )
382+ message = 'Taxonomy folder {0} is created.' .format (
383+ args .taxonomy_folder )
384+ shared .give_user_feedback (message , args .log_file , args .quiet )
385+
386+ if not os .path .isdir (args .database_folder ):
387+ os .mkdir (args .database_folder )
388+ message = 'Database folder {0} is created.' .format (
389+ args .database_folder )
390+ shared .give_user_feedback (message , args .log_file , args .quiet )
391+
379392 if 'download_taxonomy_files' in step_list :
380393 download_taxonomy_files (
381394 args .taxonomy_folder , args .date , args .log_file , args .quiet )
382395
396+ setattr (args , 'nodes_dmp' , '{0}nodes.dmp' .format (args .taxonomy_folder ))
397+
383398 if 'download_prot_accession2taxid_file' in step_list :
399+ setattr (args ,
400+ 'prot_accession2taxid_file' ,
401+ '{0}{1}.prot.accession2taxid.gz' .format (
402+ args .taxonomy_folder , args .date ))
403+
384404 download_prot_accession2taxid_file (
385405 args .prot_accession2taxid_file ,
386406 args .date ,
387407 args .log_file ,
388408 args .quiet )
389-
409+
390410 if 'download_nr' in step_list :
411+ setattr (args ,
412+ 'nr_file' ,
413+ '{0}{1}.nr.gz' .format (args .database_folder , args .date ))
414+
391415 download_nr (args .nr_file , args .log_file , args .quiet )
392416
393417 if 'make_diamond_database' in step_list :
418+ setattr (args ,
419+ 'diamond_database_prefix' ,
420+ '{0}{1}.nr' .format (args .database_folder , args .date ))
421+
394422 make_diamond_database (
395423 args .path_to_diamond ,
396424 args .nr_file ,
397425 args .diamond_database_prefix ,
398426 args .nproc ,
399427 args .log_file ,
400- args .quiet )
428+ args .quiet ,
429+ args .verbose )
401430
402431 if 'make_fastaid2LCAtaxid_file' in step_list :
432+ setattr (args ,
433+ 'fastaid2LCAtaxid_file' ,
434+ '{0}{1}.nr.fastaid2LCAtaxid' .format (
435+ args .database_folder , args .date ))
436+
403437 make_fastaid2LCAtaxid_file (
404- args .taxonomy_folder ,
438+ args .nodes_dmp ,
405439 args .fastaid2LCAtaxid_file ,
406440 args .nr_file ,
407441 args .prot_accession2taxid_file ,
408442 args .log_file ,
409443 args .quiet )
410444
411445 if 'make_taxids_with_multiple_offspring_file' in step_list :
446+ setattr (args ,
447+ 'taxids_with_multiple_offspring_file' ,
448+ '{0}{1}.nr.taxids_with_multiple_offspring' .format (
449+ args .database_folder , args .date ))
450+
412451 taxid2offspring = find_offspring (
413- args .taxonomy_folder ,
452+ args .nodes_dmp ,
414453 args .fastaid2LCAtaxid_file ,
415454 args .log_file ,
416455 args .quiet )
@@ -506,52 +545,20 @@ def run_fresh(args):
506545 shared .give_user_feedback (message , args .log_file , args .quiet )
507546
508547 # Check memory.
509- min_mem = 150
510- (total_memory , error ) = check .check_memory (min_mem )
548+ (total_memory , error ) = check .check_memory (args .min_mem )
511549 if error :
512550 message = (
513551 'at least {0}GB of memory is needed for a fresh database '
514- 'construction. {1}GB is found on your system. You can either '
515- 'try to find a machine with more memory, or download '
516- 'preconstructed database files from '
552+ 'construction. {1}GB is found on your system. You can try to '
553+ 'find a machine with more memory, or download preconstructed '
554+ 'database files from '
517555 'tbb.bio.uu.nl/bastiaan/CAT_prepare/.' .format (
518- min_mem , total_memory ))
556+ args . min_mem , total_memory ))
519557 shared .give_user_feedback (message , args .log_file , args .quiet ,
520558 error = True )
521559
522560 sys .exit (1 )
523561
524- if not os .path .isdir (args .taxonomy_folder ):
525- os .mkdir (args .taxonomy_folder )
526-
527- message = '{0} is created.' .format (args .taxonomy_folder )
528- shared .give_user_feedback (message , args .log_file , args .quiet )
529-
530- if not os .path .isdir (args .database_folder ):
531- os .mkdir (args .database_folder )
532-
533- message = '{0} is created.' .format (args .database_folder )
534- shared .give_user_feedback (message , args .log_file , args .quiet )
535-
536- setattr (args ,
537- 'prot_accession2taxid_file' ,
538- '{0}{1}.prot.accession2taxid.gz' .format (
539- args .taxonomy_folder , args .date ))
540- setattr (args ,
541- 'nr_file' ,
542- '{0}{1}.nr.gz' .format (args .database_folder , args .date ))
543- setattr (args ,
544- 'diamond_database_prefix' ,
545- '{0}{1}.nr' .format (args .database_folder , args .date ))
546- setattr (args ,
547- 'fastaid2LCAtaxid_file' ,
548- '{0}{1}.nr.fastaid2LCAtaxid' .format (
549- args .database_folder , args .date ))
550- setattr (args ,
551- 'taxids_with_multiple_offspring_file' ,
552- '{0}{1}.nr.taxids_with_multiple_offspring' .format (
553- args .database_folder , args .date ))
554-
555562 step_list = ['download_taxonomy_files' ,
556563 'download_prot_accession2taxid_file' ,
557564 'download_nr' ,
@@ -671,7 +678,8 @@ def run_existing(args):
671678 'not all of the downstream files that depend on it are '
672679 'present. In order to prevent strange bugs from arising, '
673680 'remove all files from the database folder and try again.' )
674- shared .give_user_feedback (message , args .log_file , args .quiet , error = True )
681+ shared .give_user_feedback (message , args .log_file , args .quiet ,
682+ error = True )
675683
676684 sys .exit (1 )
677685
@@ -700,9 +708,6 @@ def run_existing(args):
700708 message = 'Nr file will be downloaded to database folder.'
701709 shared .give_user_feedback (message , args .log_file , args .quiet )
702710
703- setattr (args ,
704- 'nr_file' ,
705- '{0}{1}.nr.gz' .format (args .database_folder , args .date ))
706711 step_list .append ('download_nr' )
707712 else :
708713 pass
@@ -711,41 +716,25 @@ def run_existing(args):
711716 shared .give_user_feedback (message , args .log_file , args .quiet )
712717
713718 if not args .diamond_database :
714- message = ('DIAMOND database will be constructed from the nr file.'
715- '' .format (args .nr_file ))
719+ message = 'DIAMOND database will be constructed from the nr file.'
716720 shared .give_user_feedback (message , args .log_file , args .quiet )
717721
718- setattr (args ,
719- 'diamond_database_prefix' ,
720- '{0}{1}.nr' .format (args .database_folder , args .date ))
721722 step_list .append ('make_diamond_database' )
722723 else :
723724 message = 'DIAMOND database found: {0}.' .format (args .diamond_database )
724725 shared .give_user_feedback (message , args .log_file , args .quiet )
725726
726- setattr (args ,
727- 'diamond_database_prefix' ,
728- args .diamond_database .rsplit ('.dmnd' , 1 )[0 ])
729-
730727 if not args .fastaid2LCAtaxid_file :
731728 if not args .prot_accession2taxid_file :
732729 message = ('Prot.accession2taxid file will be downloaded to '
733730 'taxonomy folder.' )
734731 shared .give_user_feedback (message , args .log_file , args .quiet )
735732
736- setattr (args ,
737- 'prot_accession2taxid_file' ,
738- '{0}{1}.prot.accession2taxid.gz' .format (
739- args .taxonomy_folder , args .date ))
740733 step_list .append ('download_prot_accession2taxid_file' )
741734
742735 message = 'File fastaid2LCAtaxid will be created.'
743736 shared .give_user_feedback (message , args .log_file , args .quiet )
744737
745- setattr (args ,
746- 'fastaid2LCAtaxid_file' ,
747- '{0}{1}.nr.fastaid2LCAtaxid' .format (
748- args .database_folder , args .date ))
749738 step_list .append ('make_fastaid2LCAtaxid_file' )
750739 else :
751740 message = ('Fastaid2LCAtaxid found: {0}.' .format (
@@ -760,10 +749,6 @@ def run_existing(args):
760749 message = 'File taxids_with_multiple_offspring will be created.'
761750 shared .give_user_feedback (message , args .log_file , args .quiet )
762751
763- setattr (args ,
764- 'taxids_with_multiple_offspring_file' ,
765- '{0}{1}.nr.taxids_with_multiple_offspring' .format (
766- args .database_folder , args .date ))
767752 step_list .append ('make_taxids_with_multiple_offspring_file' )
768753 else :
769754 message = 'Taxids_with_multiple_offspring found: {0}' .format (
@@ -787,20 +772,18 @@ def run_existing(args):
787772 'to existing folders?' )
788773 shared .give_user_feedback (message , args .log_file , args .quiet ,
789774 show_time = False )
790-
791- if ('make_fastaid2LCAtaxid_file' in step_list or
792- 'make_taxids_with_multiple_offspring_file' in step_list ):
775+
776+ if 'make_fastaid2LCAtaxid_file' in step_list :
793777 # Check memory.
794- min_mem = 100
795- (total_memory , error ) = check .check_memory (min_mem )
778+ (total_memory , error ) = check .check_memory (args .min_mem )
796779 if error :
797780 message = (
798781 'at least {0}GB of memory is needed for the database '
799- 'construction. {1}GB is found on your system. You can '
800- 'either try to find a machine with more memory, or '
801- 'download preconstructed database files '
782+ 'construction. {1}GB is found on your system. You can try '
783+ 'to find a machine with more memory, or download '
784+ 'preconstructed database files '
802785 'from tbb.bio.uu.nl/bastiaan/CAT_prepare/.' .format (
803- min_mem , total_memory ))
786+ args . min_mem , total_memory ))
804787 shared .give_user_feedback (message , args .log_file , args .quiet ,
805788 error = True )
806789
@@ -818,18 +801,6 @@ def run_existing(args):
818801 shared .give_user_feedback (message , args .log_file , args .quiet ,
819802 show_time = False )
820803
821- if not os .path .isdir (args .taxonomy_folder ):
822- os .mkdir (args .taxonomy_folder )
823- message = 'Taxonomy folder {0} is created.' .format (
824- args .taxonomy_folder )
825- shared .give_user_feedback (message , args .log_file , args .quiet )
826-
827- if not os .path .isdir (args .database_folder ):
828- os .mkdir (args .database_folder )
829- message = 'Database folder {0} is created.' .format (
830- args .database_folder )
831- shared .give_user_feedback (message , args .log_file , args .quiet )
832-
833804 prepare (step_list , args )
834805
835806 return
0 commit comments