Skip to content

Commit 4661093

Browse files
author
Elizabeth McDaniel
committed
changes to curated markers and corresponding workflows with phylogenies
1 parent 38699bb commit 4661093

File tree

5 files changed

+12
-17
lines changed

5 files changed

+12
-17
lines changed

bin/create-genome-phylogeny

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ from Bio.SeqRecord import SeqRecord
2323
from collections import defaultdict, Counter
2424

2525
# Arguments
26-
parser = argparse.ArgumentParser(description = "Create ribosomal phylogenies using specific ribosomal markers for archaea and/or bacteria")
26+
parser = argparse.ArgumentParser(description = "Create ribosomal phylogenies using specific ribosomal markers for archaea or bacteria")
2727
parser._action_groups.pop()
2828
required = parser.add_argument_group("required arguments")
2929
optional = parser.add_argument_group("optional arguments")
3030
metadata = parser.add_argument_group("metadata output files for ITOL")
3131
required.add_argument('--input', metavar='INPUT', help='Directory where genomes to be screened are held')
3232
required.add_argument('--output', metavar='OUTPUT', help='Directory to store results and intermediate files')
33-
required.add_argument('--domain', metavar='DOMAIN', help='archaea, bacteria, all')
33+
required.add_argument('--domain', metavar='DOMAIN', help='archaea, bacteria')
3434
required.add_argument('--phylogeny', metavar='PHY', help='fastree, raxml')
3535
optional.add_argument('--threads',metavar='THREADS',help='Optional: number of threads for calculating a tree using RAxML. This is not taken into account using Fastree')
3636
optional.add_argument('--loci', metavar='LOCI', default='12', help='Output genomes with less than x number of loci. By default prints genomes that have less than 12 ribosomal loci markers.')
@@ -105,7 +105,7 @@ if os.path.isdir(OUTPUT) == True:
105105
sys.exit()
106106

107107
# check for ribosomal markers directory
108-
if os.path.isdir("ribosomal_markers/") == False:
108+
if os.path.isdir("curated_markers/ribosomal_markers/") == False:
109109
print(" The ribosomal markers directory could not be found."+"\n"+" Please either download the markers from https://github.com/elizabethmcd/metabolisHMM/releases/download/v2.0/metabolisHMM_v2.0_markers.tgz and decompress the tarball, or move the directory to where you are running the workflow from.")
110110
sys.exit()
111111

@@ -134,16 +134,14 @@ elif METADATA == 'OFF':
134134

135135

136136
# different ribosomal markers for archaea/bacteria/all
137-
bacteria_list = ['rpL14','rpL15','rpL16','rpL18','rpL22','rpL24','rpL2','rpL3','rpL4','rpL5','rpL6','rpS10','rpS17','rpS19','rpS3','rpS8']
138-
archaea_list = ['rpL14','rpL15','rpL18','rpL22','rpL24','rpL2','rpL3','rpL4','rpL5','rpL6','rpS17','rpS19','rpS3','rpS8']
139-
all_list = ['rpL14','rpL15','rpL18','rpL22','rpL24','rpL2', 'rpL3','rpL4','rpL5','rpL6','rpS17','rpS19','rpS3','rpS8']
137+
bacteria_list = ['rpL14_bact','rpL15_bact','rpL16_bact','rpL18_bact','rpL22_bact','rpL24_bact','rpL2_bact','rpL3_bact','rpL4_bact','rpL5_bact','rpL6_bact','rpS10_bact','rpS17_bact','rpS19_bact','rpS3_bact','rpS8_bact']
138+
archaea_list = ['rpL14_arch','rpL15_arch','rpL16_arch','rpL18_arch','rpL22_arch','rpL24_arch','rpL2_arch','rpL3_arch','rpL4_arch','rpL5_arch','rpL6_arch', 'rpS10_arch','rpS17_arch','rpS19_arch','rpS3_arch','rpS8_arch']
139+
140140

141141
if DOMAIN == 'archaea':
142142
prot_list=archaea_list
143143
elif DOMAIN == 'bacteria':
144144
prot_list=bacteria_list
145-
elif DOMAIN == 'all':
146-
prot_list=all_list
147145

148146

149147
# if .fna predict CDS and reformat header names because prodigal makes them stupid
@@ -187,7 +185,7 @@ for genome in reformatted_genomes:
187185
dir=name
188186
os.makedirs(OUTPUT+"/out/"+dir)
189187
for prot in prot_list:
190-
marker ="ribosomal_markers/"+prot+"_bact.hmm"
188+
marker ="curated_markers/ribosomal_markers/"+prot+".hmm"
191189
outname= OUTPUT + "/out/"+dir+"/"+name + "-" + prot + ".out"
192190
cmd = ["hmmsearch", "--tblout="+outname, marker, genome]
193191
subprocess.call(cmd, stdout=FNULL)

bin/single-marker-phylogeny

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ optional.add_argument("--threads",metavar='THREADS',help="number of threads for
3838
optional.add_argument("--kofam", metavar='KOFAM', help="Use KEGG HMMs from the KofamKOALA set. Options = ON or OFF")
3939
optional.add_argument("--ko_list", metavar='KOLIST', help="Point to location of the KofamKoala ko_list file if using the KofamKOALA KEGG HMMs")
4040
ribosomal.add_argument("--ribo_tree", metavar='RIBO', default='OFF', help="Make corresponding ribosomal phylogeny of genomes containing hits of the provided single marker. " )
41-
ribosomal.add_argument("--domain", metavar='DOMAIN', help="If constructing corresponding ribosomal tree, select the domain for which your hits belong to. Options: bacteria, archaea, all")
41+
ribosomal.add_argument("--domain", metavar='DOMAIN', help="If constructing corresponding ribosomal tree, select the domain for which your hits belong to. Options: bacteria, archaea")
4242
ribosomal.add_argument('--loci', metavar='LOCI', default='12', help='Output genomes with less than x number of loci. By default prints genomes that have less than 12 ribosomal loci markers.')
4343
metadata.add_argument('--metadata', metavar='METADATA',help='Option for outputting ITOL formatted metadata files. ON or OFF')
4444
metadata.add_argument('--names', metavar='NAMES', help="Provided .csv formatted metadata file of filenames and corresponding taxonomical or group names")
@@ -284,15 +284,12 @@ elif PHYTOOL == "raxml":
284284

285285
# Corresponding ribosomal tree of hits and options
286286
# lists
287-
bacteria_list = ['rpL14','rpL15','rpL16','rpL18','rpL22','rpL24','rpL2','rpL3','rpL4','rpL5','rpL6','rpS10','rpS17','rpS19','rpS3','rpS8']
288-
archaea_list = ['rpL14','rpL15','rpL18','rpL22','rpL24','rpL2','rpL3','rpL4','rpL5','rpL6','rpS17','rpS19','rpS3','rpS8']
289-
all_list = ['rpL14','rpL15','rpL18','rpL22','rpL24','rpL2', 'rpL3','rpL4','rpL5','rpL6','rpS17','rpS19','rpS3','rpS8']
287+
bacteria_list = ['rpL14_bact','rpL15_bact','rpL16_bact','rpL18_bact','rpL22_bact','rpL24_bact','rpL2_bact','rpL3_bact','rpL4_bact','rpL5_bact','rpL6_bact','rpS10_bact','rpS17_bact','rpS19_bact','rpS3_bact','rpS8_bact']
288+
archaea_list = ['rpL14_arch','rpL15_arch','rpL16_arch','rpL18_arch','rpL22_arch','rpL24_arch','rpL2_arch','rpL3_arch','rpL4_arch','rpL5_arch','rpL6_arch', 'rpS10_arch','rpS17_arch','rpS19_arch','rpS3_arch','rpS8_arch']
290289
if DOMAIN == 'archaea':
291290
prot_list=archaea_list
292291
elif DOMAIN == 'bacteria':
293292
prot_list=bacteria_list
294-
elif DOMAIN == 'all':
295-
prot_list=all_list
296293
# runs phylogeny on specific hits of single marker
297294
if RIBO == 'ON':
298295
print("Making corresponding ribosomal phylogeny of single marker hits...")
@@ -306,7 +303,7 @@ if RIBO == 'ON':
306303
genome = OUTPUT + "/genomes/"+hit+".reformatted.faa"
307304
os.makedirs(OUTPUT+"/ribo_out/"+hit)
308305
for prot in prot_list:
309-
marker ="ribosomal_markers/"+prot+"_bact.hmm"
306+
marker ="curated_markers/ribosomal_markers/"+prot+".hmm"
310307
outname= OUTPUT + "/ribo_out/"+hit+"/"+hit + "-" + prot + ".out"
311308
cmd = ["hmmsearch", "--tblout="+outname, marker, genome]
312309
subprocess.call(cmd, stdout=FNULL)

bin/summarize-metabolism

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ out_intm = OUTPUT + "/out"
8282
out_results = OUTPUT + "/results"
8383
out_genomes = OUTPUT + "/genomes"
8484
OUTFILE = args.summary
85-
markers=glob.glob("metabolic_markers/*.hmm")
85+
markers=glob.glob("curated_markers/metabolic_markers/*.hmm")
8686
genomes=glob.glob(GENOMEFILES)
8787
PLOTTING = args.plotting
8888

markerdb_v1.8.tgz

-2.63 MB
Binary file not shown.

metabolisHMM_markers_v1.9.tgz

2.87 MB
Binary file not shown.

0 commit comments

Comments
 (0)