Skip to content

Commit cd921e1

Browse files
Code streamlining.
1 parent b5d5b87 commit cd921e1

File tree

3 files changed

+75
-101
lines changed

3 files changed

+75
-101
lines changed

CAT_pack/about.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
22

33
__author__ = 'F. A. Bastiaan von Meijenfeldt'
4-
__version__ = '5.1.1'
5-
__date__ = '23 June, 2020'
4+
__version__ = '5.1.2'
5+
__date__ = '15 July, 2020'

CAT_pack/prepare.py

Lines changed: 70 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ def parse_arguments():
125125

126126
# Add extra arguments.
127127
setattr(args, 'date', date)
128+
setattr(args, 'min_mem', 150)
128129
shared.expand_arguments(args)
129130

130131
return (args)
@@ -181,7 +182,7 @@ def download_prot_accession2taxid_file(
181182
message = 'Download complete!'
182183
shared.give_user_feedback(message, log_file, quiet)
183184

184-
return prot_accession2taxid_file
185+
return
185186

186187

187188
def download_nr(nr_file, log_file, quiet):
@@ -210,7 +211,8 @@ def make_diamond_database(
210211
diamond_database_prefix,
211212
nproc,
212213
log_file,
213-
quiet):
214+
quiet,
215+
verbose):
214216
message = (
215217
'Constructing DIAMOND database {0}.dmnd from {1} using {2} cores. '
216218
'Please be patient...'.format(
@@ -221,8 +223,11 @@ def make_diamond_database(
221223
path_to_diamond, 'makedb',
222224
'--in', nr_file,
223225
'-d', diamond_database_prefix,
224-
'-p', str(nproc),
225-
'--quiet']
226+
'-p', str(nproc)]
227+
228+
if not verbose:
229+
command += ['--quiet']
230+
226231
try:
227232
subprocess.check_call(command)
228233
except:
@@ -255,15 +260,14 @@ def import_prot_accession2taxid(prot_accession2taxid_file, log_file, quiet):
255260

256261

257262
def make_fastaid2LCAtaxid_file(
258-
taxonomy_folder,
263+
nodes_dmp,
259264
fastaid2LCAtaxid_file,
260265
nr_file,
261266
prot_accession2taxid_file,
262267
log_file,
263268
quiet):
264269
prot_accession2taxid = import_prot_accession2taxid(
265270
prot_accession2taxid_file, log_file, quiet)
266-
nodes_dmp = '{0}/nodes.dmp'.format(taxonomy_folder)
267271
(taxid2parent, taxid2rank) = tax.import_nodes(nodes_dmp, log_file, quiet)
268272

269273
message = ('Finding LCA of all protein accession numbers in fasta headers '
@@ -316,10 +320,8 @@ def make_fastaid2LCAtaxid_file(
316320
# numbers, it is counted as a correction as well.
317321
corrected += 1
318322

319-
message = (
320-
'Done! File {0} is created. '
321-
'{1} of {2} headers ({3:.1f}%) corrected. Please wait patiently '
322-
'for Python to collect garbage.'.format(
323+
message = ('Done! File {0} is created. '
324+
'{1} of {2} headers ({3:.1f}%) corrected.'.format(
323325
fastaid2LCAtaxid_file,
324326
corrected,
325327
total,
@@ -329,8 +331,7 @@ def make_fastaid2LCAtaxid_file(
329331
return
330332

331333

332-
def find_offspring(taxonomy_folder, fastaid2LCAtaxid_file, log_file, quiet):
333-
nodes_dmp = '{0}/nodes.dmp'.format(taxonomy_folder)
334+
def find_offspring(nodes_dmp, fastaid2LCAtaxid_file, log_file, quiet):
334335
(taxid2parent, taxid2rank) = tax.import_nodes(nodes_dmp, log_file, quiet)
335336

336337
message = 'Searching nr database for taxids with multiple offspring.'
@@ -376,41 +377,79 @@ def write_taxids_with_multiple_offspring_file(
376377
def prepare(step_list, args):
377378
shared.print_variables(args, step_list)
378379

380+
if not os.path.isdir(args.taxonomy_folder):
381+
os.mkdir(args.taxonomy_folder)
382+
message = 'Taxonomy folder {0} is created.'.format(
383+
args.taxonomy_folder)
384+
shared.give_user_feedback(message, args.log_file, args.quiet)
385+
386+
if not os.path.isdir(args.database_folder):
387+
os.mkdir(args.database_folder)
388+
message = 'Database folder {0} is created.'.format(
389+
args.database_folder)
390+
shared.give_user_feedback(message, args.log_file, args.quiet)
391+
379392
if 'download_taxonomy_files' in step_list:
380393
download_taxonomy_files(
381394
args.taxonomy_folder, args.date, args.log_file, args.quiet)
382395

396+
setattr(args, 'nodes_dmp', '{0}nodes.dmp'.format(args.taxonomy_folder))
397+
383398
if 'download_prot_accession2taxid_file' in step_list:
399+
setattr(args,
400+
'prot_accession2taxid_file',
401+
'{0}{1}.prot.accession2taxid.gz'.format(
402+
args.taxonomy_folder, args.date))
403+
384404
download_prot_accession2taxid_file(
385405
args.prot_accession2taxid_file,
386406
args.date,
387407
args.log_file,
388408
args.quiet)
389-
409+
390410
if 'download_nr' in step_list:
411+
setattr(args,
412+
'nr_file',
413+
'{0}{1}.nr.gz'.format(args.database_folder, args.date))
414+
391415
download_nr(args.nr_file, args.log_file, args.quiet)
392416

393417
if 'make_diamond_database' in step_list:
418+
setattr(args,
419+
'diamond_database_prefix',
420+
'{0}{1}.nr'.format(args.database_folder, args.date))
421+
394422
make_diamond_database(
395423
args.path_to_diamond,
396424
args.nr_file,
397425
args.diamond_database_prefix,
398426
args.nproc,
399427
args.log_file,
400-
args.quiet)
428+
args.quiet,
429+
args.verbose)
401430

402431
if 'make_fastaid2LCAtaxid_file' in step_list:
432+
setattr(args,
433+
'fastaid2LCAtaxid_file',
434+
'{0}{1}.nr.fastaid2LCAtaxid'.format(
435+
args.database_folder, args.date))
436+
403437
make_fastaid2LCAtaxid_file(
404-
args.taxonomy_folder,
438+
args.nodes_dmp,
405439
args.fastaid2LCAtaxid_file,
406440
args.nr_file,
407441
args.prot_accession2taxid_file,
408442
args.log_file,
409443
args.quiet)
410444

411445
if 'make_taxids_with_multiple_offspring_file' in step_list:
446+
setattr(args,
447+
'taxids_with_multiple_offspring_file',
448+
'{0}{1}.nr.taxids_with_multiple_offspring'.format(
449+
args.database_folder, args.date))
450+
412451
taxid2offspring = find_offspring(
413-
args.taxonomy_folder,
452+
args.nodes_dmp,
414453
args.fastaid2LCAtaxid_file,
415454
args.log_file,
416455
args.quiet)
@@ -506,52 +545,20 @@ def run_fresh(args):
506545
shared.give_user_feedback(message, args.log_file, args.quiet)
507546

508547
# Check memory.
509-
min_mem = 150
510-
(total_memory, error) = check.check_memory(min_mem)
548+
(total_memory, error) = check.check_memory(args.min_mem)
511549
if error:
512550
message = (
513551
'at least {0}GB of memory is needed for a fresh database '
514-
'construction. {1}GB is found on your system. You can either '
515-
'try to find a machine with more memory, or download '
516-
'preconstructed database files from '
552+
'construction. {1}GB is found on your system. You can try to '
553+
'find a machine with more memory, or download preconstructed '
554+
'database files from '
517555
'tbb.bio.uu.nl/bastiaan/CAT_prepare/.'.format(
518-
min_mem, total_memory))
556+
args.min_mem, total_memory))
519557
shared.give_user_feedback(message, args.log_file, args.quiet,
520558
error=True)
521559

522560
sys.exit(1)
523561

524-
if not os.path.isdir(args.taxonomy_folder):
525-
os.mkdir(args.taxonomy_folder)
526-
527-
message = '{0} is created.'.format(args.taxonomy_folder)
528-
shared.give_user_feedback(message, args.log_file, args.quiet)
529-
530-
if not os.path.isdir(args.database_folder):
531-
os.mkdir(args.database_folder)
532-
533-
message = '{0} is created.'.format(args.database_folder)
534-
shared.give_user_feedback(message, args.log_file, args.quiet)
535-
536-
setattr(args,
537-
'prot_accession2taxid_file',
538-
'{0}{1}.prot.accession2taxid.gz'.format(
539-
args.taxonomy_folder, args.date))
540-
setattr(args,
541-
'nr_file',
542-
'{0}{1}.nr.gz'.format(args.database_folder, args.date))
543-
setattr(args,
544-
'diamond_database_prefix',
545-
'{0}{1}.nr'.format(args.database_folder, args.date))
546-
setattr(args,
547-
'fastaid2LCAtaxid_file',
548-
'{0}{1}.nr.fastaid2LCAtaxid'.format(
549-
args.database_folder, args.date))
550-
setattr(args,
551-
'taxids_with_multiple_offspring_file',
552-
'{0}{1}.nr.taxids_with_multiple_offspring'.format(
553-
args.database_folder, args.date))
554-
555562
step_list = ['download_taxonomy_files',
556563
'download_prot_accession2taxid_file',
557564
'download_nr',
@@ -671,7 +678,8 @@ def run_existing(args):
671678
'not all of the downstream files that depend on it are '
672679
'present. In order to prevent strange bugs from arising, '
673680
'remove all files from the database folder and try again.')
674-
shared.give_user_feedback(message, args.log_file, args.quiet, error=True)
681+
shared.give_user_feedback(message, args.log_file, args.quiet,
682+
error=True)
675683

676684
sys.exit(1)
677685

@@ -700,9 +708,6 @@ def run_existing(args):
700708
message = 'Nr file will be downloaded to database folder.'
701709
shared.give_user_feedback(message, args.log_file, args.quiet)
702710

703-
setattr(args,
704-
'nr_file',
705-
'{0}{1}.nr.gz'.format(args.database_folder, args.date))
706711
step_list.append('download_nr')
707712
else:
708713
pass
@@ -711,41 +716,25 @@ def run_existing(args):
711716
shared.give_user_feedback(message, args.log_file, args.quiet)
712717

713718
if not args.diamond_database:
714-
message = ('DIAMOND database will be constructed from the nr file.'
715-
''.format(args.nr_file))
719+
message = 'DIAMOND database will be constructed from the nr file.'
716720
shared.give_user_feedback(message, args.log_file, args.quiet)
717721

718-
setattr(args,
719-
'diamond_database_prefix',
720-
'{0}{1}.nr'.format(args.database_folder, args.date))
721722
step_list.append('make_diamond_database')
722723
else:
723724
message = 'DIAMOND database found: {0}.'.format(args.diamond_database)
724725
shared.give_user_feedback(message, args.log_file, args.quiet)
725726

726-
setattr(args,
727-
'diamond_database_prefix',
728-
args.diamond_database.rsplit('.dmnd', 1)[0])
729-
730727
if not args.fastaid2LCAtaxid_file:
731728
if not args.prot_accession2taxid_file:
732729
message = ('Prot.accession2taxid file will be downloaded to '
733730
'taxonomy folder.')
734731
shared.give_user_feedback(message, args.log_file, args.quiet)
735732

736-
setattr(args,
737-
'prot_accession2taxid_file',
738-
'{0}{1}.prot.accession2taxid.gz'.format(
739-
args.taxonomy_folder, args.date))
740733
step_list.append('download_prot_accession2taxid_file')
741734

742735
message = 'File fastaid2LCAtaxid will be created.'
743736
shared.give_user_feedback(message, args.log_file, args.quiet)
744737

745-
setattr(args,
746-
'fastaid2LCAtaxid_file',
747-
'{0}{1}.nr.fastaid2LCAtaxid'.format(
748-
args.database_folder, args.date))
749738
step_list.append('make_fastaid2LCAtaxid_file')
750739
else:
751740
message = ('Fastaid2LCAtaxid found: {0}.'.format(
@@ -760,10 +749,6 @@ def run_existing(args):
760749
message = 'File taxids_with_multiple_offspring will be created.'
761750
shared.give_user_feedback(message, args.log_file, args.quiet)
762751

763-
setattr(args,
764-
'taxids_with_multiple_offspring_file',
765-
'{0}{1}.nr.taxids_with_multiple_offspring'.format(
766-
args.database_folder, args.date))
767752
step_list.append('make_taxids_with_multiple_offspring_file')
768753
else:
769754
message = 'Taxids_with_multiple_offspring found: {0}'.format(
@@ -787,20 +772,18 @@ def run_existing(args):
787772
'to existing folders?')
788773
shared.give_user_feedback(message, args.log_file, args.quiet,
789774
show_time=False)
790-
791-
if ('make_fastaid2LCAtaxid_file' in step_list or
792-
'make_taxids_with_multiple_offspring_file' in step_list):
775+
776+
if 'make_fastaid2LCAtaxid_file' in step_list:
793777
# Check memory.
794-
min_mem = 100
795-
(total_memory, error) = check.check_memory(min_mem)
778+
(total_memory, error) = check.check_memory(args.min_mem)
796779
if error:
797780
message = (
798781
'at least {0}GB of memory is needed for the database '
799-
'construction. {1}GB is found on your system. You can '
800-
'either try to find a machine with more memory, or '
801-
'download preconstructed database files '
782+
'construction. {1}GB is found on your system. You can try '
783+
'to find a machine with more memory, or download '
784+
'preconstructed database files '
802785
'from tbb.bio.uu.nl/bastiaan/CAT_prepare/.'.format(
803-
min_mem, total_memory))
786+
args.min_mem, total_memory))
804787
shared.give_user_feedback(message, args.log_file, args.quiet,
805788
error=True)
806789

@@ -818,18 +801,6 @@ def run_existing(args):
818801
shared.give_user_feedback(message, args.log_file, args.quiet,
819802
show_time=False)
820803

821-
if not os.path.isdir(args.taxonomy_folder):
822-
os.mkdir(args.taxonomy_folder)
823-
message = 'Taxonomy folder {0} is created.'.format(
824-
args.taxonomy_folder)
825-
shared.give_user_feedback(message, args.log_file, args.quiet)
826-
827-
if not os.path.isdir(args.database_folder):
828-
os.mkdir(args.database_folder)
829-
message = 'Database folder {0} is created.'.format(
830-
args.database_folder)
831-
shared.give_user_feedback(message, args.log_file, args.quiet)
832-
833804
prepare(step_list, args)
834805

835806
return

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 5.1.2
4+
Code streamlining.
5+
36
## 5.1.1
47
CAT and BAT can now compress the DIAMOND alignment file, and import gzip compressed alignment files.
58

0 commit comments

Comments
 (0)