forked from bgruening/galaxytools
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsylph_profile.xml
More file actions
210 lines (196 loc) · 10.9 KB
/
sylph_profile.xml
File metadata and controls
210 lines (196 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
<tool id="sylph_profile" name="sylph profile" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
<expand macro='description'/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro='requirements'/>
<command detect_errors='exit_code'><![CDATA[
#import re
##SYMLINK SYLPH DB
@DB_SELECTOR@
##Single input
#if $sketch.type == 'single':
@SINGLE_INPUT@
##Single group
#else if $sketch.type == 'single_group':
@SINGLE_GROUP@
##Paired input
#else if $sketch.type == 'paired':
@PAIRED@
##Paired group
#else if $sketch.type == 'paired_group':
@PAIRED_GROUP@
#end if
##SKETCHING
sylph sketch
#if $sketch.type == 'single':
$input
#else if $sketch.type == 'single_group':
-r $input
#else if $sketch.type == 'paired':
-1 $read1
-2 $read2
#else if $sketch.type == 'paired_group':
-1 $read1
-2 $read2
#end if
-t \${GALAXY_SLOTS:-4}
-d sylph_sketches &&
##MAIN COMMAND
sylph profile
database.syldb
sylph_sketches/*.sylsp
#if $min_num_kmers:
--min-number-kmers ${min_num_kmers}
#end if
-t \${GALAXY_SLOTS:-4}
-o $output
#if $outputs:
#if $database_select.select == 'cached':
&& ln -s '$database_select.sylph_database.fields.path/database.tsv.gz' 'database.tsv.gz'
#else:
&& ln -s '$database_select.metadata' 'database.tsv.gz'
#end if
&& python '$__tool_directory__/sylph_to_taxprof.py' -s $output -m database.tsv.gz -o metaphlan_
#if 'krona' in $outputs:
&& python '$__tool_directory__/sylphformatoutput.py' format_for_krona --metaphlan_output *.sylphmpa --krona_output krona.tsv
&& mkdir krona_out && mv *krona.tsv krona_out
#end if
&& mkdir metaphlan_out && mv *.sylphmpa metaphlan_out
#end if
]]></command>
<inputs>
<conditional name="sketch">
<param name="type" type="select" label="Select the type of reads used">
<option value="single">Individual single-end reads</option>
<option value="single_group">Group of single-ended reads</option>
<option value="paired">One set of paired-end reads</option>
<option value="paired_group"> Group of paired-ended reads</option>
</param>
<!-- Only permitting fastq as tool input only allows fastq and fastq.gz as file ext -->
<when value="single">
<param name="input" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Single-end input reads"/>
</when>
<when value="single_group">
<param name="input" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Single-end input reads" multiple="true"/>
</when>
<when value="paired">
<param name="input_1" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Paired-end input reads 1"/>
<param name="input_2" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Paired-end input reads 2"/>
</when>
<when value="paired_group">
<param name="input" type="data_collection" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Paired-end input reads" collection_type="paired"/>
</when>
</conditional>
<param name="min_num_kmers" type="integer" min="1" value="50" label="Minimum number of k-mers for Sylph to output a result." help="States the minimum number of k-mers needed for sylph to output a result.This is (approximately) the contig length divided by -c. With default settings, --min-number-kmers 10 can work with contigs ~2500 bp. For smaller contigs, consider -c 100."/>
<expand macro="output_format"/>
<expand macro="input_database"/>
</inputs>
<outputs>
<data format="tabular" name="output" label="${tool.name} on ${on_string}"/>
<collection name="metaphlan_out" type="list" label="${tool.name} on ${on_string}: MetaPhlAn-style output">
<filter> outputs and 'metaphlan' in outputs</filter>
<discover_datasets pattern="__name_and_ext__" directory="metaphlan_out/" />
</collection>
<collection name="krona_out" type="list" label="${tool.name} on ${on_string}: Krona-useable output">
<filter>outputs and 'krona' in outputs</filter>
<discover_datasets pattern="__name_and_ext__" directory="krona_out/" />
</collection>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="sylph_database" value="sylph_db"/>
<conditional name="sketch">
<param name="type" value="single"/>
<param name="input" value="single_1.fastq.gz" ftype="fastq"/>
</conditional>
<output name="output" value="output_1.tabular"/>
</test>
<!--Test 2 Group of Single-end Inputs-->
<test expect_num_outputs="1">
<param name="sylph_database" value="sylph_db"/>
<conditional name="sketch">
<param name="type" value="single_group"/>
<param name="input" value="single_1.fastq.gz,single_2.fastq.gz" ftype="fastq"/>
</conditional>
<param name="min_num_kmers" value="49"/>
<output name="output" value="output_2.tabular" compare="sim_size"/>
</test>
<!-- Test 3 Paired-end reads -->
<test expect_num_outputs="1">
<param name="sylph_database" value="sylph_db"/>
<conditional name="sketch">
<param name="type" value="paired"/>
<param name="input_1" value="test R1.fq" ftype="fastq"/>
<param name="input_2" value="test R2.fq" ftype="fastqsanger"/>
</conditional>
<output name="output" value="output_3.tabular"/>
</test>
<!-- Test 4 Collection of Paired-end Reads -->
<test expect_num_outputs="1">
<param name="sylph_database" value="sylph_db"/>
<conditional name="sketch">
<param name="type" value="paired_group"/>
<param name="input">
<collection type="paired" name="test">
<element name="forward" ftype="fastq" value="test R1.fq"/>
<element name="reverse" ftype="fastq" value="test R2.fq"/>
</collection>
</param>
</conditional>
<output name="output" value="output_4.tabular"/>
</test>
<!-- Test 5 output format conversion -->
<test expect_num_outputs="3">
<param name="sylph_database" value="sylph_db"/>
<conditional name="sketch">
<param name="type" value="single"/>
<param name="input" value="single_1.fastq.gz" ftype="fastq"/>
</conditional>
<param name="outputs" value="metaphlan,krona"/>
<!-- With test data, output will be empty for krona tool so only check against metaphlan converter, but keep num_outputs-->
<output_collection name="metaphlan_out" type="list">
<element name="metaphlan_single_1.fastq.gz" value="test.sylphmpa"/>
</output_collection>
</test>
</tests>
<help><![CDATA[
**What is sylph?**
Sylph is an extremely fast and memory efficient program for profiling and searching metagenomic samples against databases. It is 10-100x faster than other popular software such as MetaPhlAn or Kraken and more memory efficient too.
**What can sylph do?**
- Profile metagenomes: sylph can calculate the abundances of genomes in a sample using a reference database. This is the same type of output as Kraken or MetaPhlAn.
- Search genomes against metagenomes: sylph can check if a genome is contained in your sample (e.g. is this E. coli genome in my sample?).
- ANI querying: sylph can estimate the containment average nucleotide identity (ANI) of a reference genome to the genomes in your sample.
- Use custom reference databases: Eukaryotes, viruses, and any collections of fasta files are ok.
- Long-reads are usable: sylph is primarily optimized for short-reads, but it can utilize nanopore or PacBio reads with high precision.
- Calculate coverage: sylph can estimate the coverage (not just the abundance) of genomes in your database.
`[See here for more information on what sylph can and can not do]. <https://github.com/bluenote-1577/sylph/wiki/Introduction:-what-is-sylph-and-how-does-it-work%3F>`_
----
**Output**
Sylph profile outputs a TSV (tab-separated values) file. Each row is one genome detected in the metagenome sample.
- Sample_file: the filename of the reads/sample.
- Genome_file: the filename of the detected genome.
- Taxonomic_abundance: normalized taxonomic abundance as a percentage. Coverage-normalized - same as MetaPhlAn abundance
- Sequence_abundance: normalized sequence abundance as a percentage. The "number of reads" assigned to each genome - same as Kraken abundance
- Adjusted_ANI: adjusted containment ANI estimate.
- If coverage adjustment is possible (cov is < 3x cov): returns coverage-adjusted ANI
- If coverage is too low/high: returns Naive_ANI (see below)
- Eff_cov/True_cov: an estimate of the effective, or if -u specified, the true coverage. Always a decimal number.
- ANI_5-95_percentile: [5%,95%] confidence intervals. Not always a decimal number.
- If coverage adjustment is possible: float-float e.g. 98.52-99.55
- If coverage is too low/high: NA-NA is given.
- Eff_lambda: estimate of the effective coverage parameter. Not always a decimal number.
- If coverage adjustment is possible: lambda estimate is given
- If coverage is too low/high: LOW or HIGH is output
- Lambda_5-95_percentile: [5%, 95%] confidence intervals for lambda. Same format rules as ANI_5-95_percentile.
- Median_cov: median k-mer multiplicity for k-mers with >= 1 multiplicity.
- Mean_cov_geq1: mean k-mer multiplicity for k-mers with >= 1 multiplicity.
- Containment_ind: int/int showing the containment index (number of k-mers found in sample divided by total k-mers), e.g. 959/1053.
- Naive_ANI: containment ANI without coverage adjustment.
- kmers_reassigned: the number of k-mers reassigned away from the genome.
- Contig_name: name of the first contig in the genome
Additional files are able to be output. The metaphlan-style output is formatted similarly to that output by the `[Metaphlan <toolshed.g2.bx.psu.edu/repos/iuc/metaphlan/metaphlan/4.1.1+galaxy3>`_
This output is *NOT* compatible with Krona directly. For that, please select the Krona-style output option.
]]></help>
<expand macro="citation"/>
</tool>