Skip to content

Commit 2df6e85

Browse files
authored
Include taxids in top abundance outputs (#71)
1 parent bce7883 commit 2df6e85

File tree

3 files changed

+23
-10
lines changed

3 files changed

+23
-10
lines changed

bin/bracken_top_n_linelist.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ def parse_bracken_report(bracken_report_path):
1212
with open(bracken_report_path, 'r') as f:
1313
reader = csv.DictReader(f, dialect='excel-tab')
1414
for row in reader:
15+
try:
16+
row['fraction_total_reads'] = float(row['fraction_total_reads'])
17+
except ValueError as e:
18+
row['fraction_total_reads'] = None
1519
bracken_report_lines.append(row)
1620

1721
return bracken_report_lines
@@ -25,7 +29,7 @@ def main(args):
2529
output_fields = ['sample_id', 'taxonomy_level']
2630
output_line = {
2731
'sample_id': args.sample_id,
28-
'taxonomy_level': bracken_report_sorted[0]['taxonomy_lvl']
32+
'taxonomy_level': args.taxonomy_level,
2933
}
3034

3135
for n in range(args.top_n):
@@ -34,25 +38,33 @@ def main(args):
3438
try:
3539
output_line[name_field] = bracken_report_sorted[n]['name']
3640
except IndexError as e:
37-
output_line[name_field] = "None"
41+
output_line[name_field] = None
3842
output_fields.append(name_field)
43+
44+
taxid_field = 'abundance_' + num + '_taxonomy_id'
45+
try:
46+
output_line[taxid_field] = bracken_report_sorted[n]['taxonomy_id']
47+
except IndexError as e:
48+
output_line[taxid_field] = None
49+
output_fields.append(taxid_field)
50+
3951
fraction_total_reads_field = 'abundance_' + num + '_fraction_total_reads'
4052
try:
41-
output_line[fraction_total_reads_field] = bracken_report_sorted[n]['fraction_total_reads']
53+
output_line[fraction_total_reads_field] = round(bracken_report_sorted[n]['fraction_total_reads'], 6)
4254
except IndexError as e:
4355
output_line[fraction_total_reads_field] = 0.0
4456
output_fields.append(fraction_total_reads_field)
4557

4658

47-
csv.register_dialect('unix-csv-quote-minimal', delimiter=',', doublequote=False, lineterminator='\n', quoting=csv.QUOTE_MINIMAL)
48-
writer = csv.DictWriter(sys.stdout, fieldnames=output_fields, dialect='unix-csv-quote-minimal')
59+
writer = csv.DictWriter(sys.stdout, fieldnames=output_fields, dialect='unix', quoting=csv.QUOTE_MINIMAL)
4960
writer.writeheader()
5061
writer.writerow(output_line)
5162

5263
if __name__ == '__main__':
5364
parser = argparse.ArgumentParser()
5465
parser.add_argument('bracken_report')
5566
parser.add_argument('-s', '--sample-id')
67+
parser.add_argument('-l', '--taxonomy-level')
5668
parser.add_argument('-n', '--top-n', type=int)
5769
args = parser.parse_args()
5870
main(args)

modules/bracken.nf

+3-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ process abundance_top_n {
5656
tuple val(sample_id), path("${sample_id}_${taxonomic_level}_top_*.tsv"), val(taxonomic_level)
5757

5858
script:
59-
def top_n = taxonomic_level == 'Genus' ? '3' : '5'
59+
top_n = taxonomic_level == 'Genus' ? '3' : '5'
60+
taxonomic_level_char = taxonomic_level.substring(0,1)
6061
"""
61-
bracken_top_n_linelist.py ${bracken_abundances} -n ${top_n} -s ${sample_id} > ${sample_id}_${taxonomic_level}_top_${top_n}.tsv
62+
bracken_top_n_linelist.py ${bracken_abundances} -n ${top_n} -s ${sample_id} -l ${taxonomic_level_char} > ${sample_id}_${taxonomic_level}_top_${top_n}.tsv
6263
"""
6364
}

nextflow.config

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,16 @@ manifest {
33
description = 'Routine Sequence QC'
44
mainScript = 'main.nf'
55
nextflowVersion = '>=20.01.0'
6-
version = '0.3.4'
6+
version = '0.4.0'
77
}
88

99
params {
1010
run_dir = "NO_FILE"
1111
instrument_type = "miseq"
1212
illumina_suffixes = ['*_R{1,2}_001', '*_R{1,2}', '*_{1,2}' ]
1313
fastq_exts = ['.fastq.gz', '.fq.gz', '.fastq', '.fq']
14-
kraken2_db = "/data/ref_databases/kraken2/2021-05-17_standard"
15-
bracken_db = "/data/ref_databases/kraken2/2021-05-17_standard"
14+
kraken2_db = "/data/ref_databases/kraken2/latest_standard"
15+
bracken_db = "/data/ref_databases/kraken2/latest_standard"
1616
seqtk_fqchk_threshold = 30
1717
mash_sketch_kmer_size = 21
1818
mash_sketch_minimum_copies = 5

0 commit comments

Comments
 (0)