1+ <tool id =" kat_hist" name =" KAT Histogram" version =" @TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile =" @PROFILE@" >
2+ <description >creates a histogram of k-mer occurrences</description >
3+ <macros >
4+ <import >macros.xml</import >
5+ </macros >
6+ <expand macro =" xrefs" />
7+ <expand macro =" requirements" />
8+ <stdio >
9+ <regex match =" corrupted double-linked list" source =" stderr" level =" warning" description =" KAT C++ heap corruption - output files were still produced correctly" />
10+ <exit_code range =" 2:133" level =" fatal" description =" Error occurred" />
11+ <exit_code range =" 135:" level =" fatal" description =" Error occurred" />
12+ <exit_code range =" :-1" level =" fatal" description =" Error occurred" />
13+ </stdio >
14+ <version_command >echo @TOOL_VERSION@</version_command >
15+ <command ><![CDATA[
16+ #for $i in range(len($input_files)):
17+ #set $f = $input_files[$i]
18+ @SET_EXT@
19+ cp '$f' 'input_${i}${ext}' &&
20+ #end for
21+ kat hist
22+ -o kat_hist_out
23+ -t "\${GALAXY_SLOTS:-1}"
24+ --low $histogram_options.low
25+ --high $histogram_options.high
26+ --inc $histogram_options.inc
27+ --mer_len $kmer_options.mer_len
28+ --hash_size $kmer_options.hash_size
29+ $kmer_options.non_canonical
30+ @CMD_5PTRIM@
31+ --output_type '$advanced_options.output_type'
32+ #for $i in range(len($input_files)):
33+ #set $f = $input_files[$i]
34+ @SET_EXT@
35+ 'input_${i}${ext}'
36+ #end for
37+ ]]> </command >
38+ <inputs >
39+ <param name =" input_files" type =" data" format =" fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple =" true" label =" Input files" help =" One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file" />
40+ <section name =" histogram_options" title =" Histogram Options" expanded =" true" >
41+ <param argument =" --low" type =" integer" value =" 1" min =" 1" label =" Low count" help =" Low count value of histogram" />
42+ <param argument =" --high" type =" integer" value =" 10000" min =" 1" label =" High count" help =" High count value of histogram" />
43+ <param argument =" --inc" type =" integer" value =" 1" min =" 1" label =" Increment" help =" Increment for each bin" />
44+ </section >
45+ <expand macro =" kmer_params" />
46+ <section name =" advanced_options" title =" Advanced Options" expanded =" false" >
47+ <param name =" trim_5p" argument =" --5ptrim" type =" text" value =" " label =" 5' trim" help =" Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0)." >
48+ <validator type =" regex" message =" Must be a number or comma-separated numbers" >^[0-9]+(,[0-9]+)*$|^$</validator >
49+ </param >
50+ <expand macro =" output_type_param" />
51+ </section >
52+ </inputs >
53+ <outputs >
54+ <data name =" output_hist" format =" tabular" from_work_dir =" kat_hist_out" label =" ${tool.name} on ${on_string}: Histogram" />
55+ <data name =" output_plot" format =" png" from_work_dir =" kat_hist_out.png" label =" ${tool.name} on ${on_string}: PNG Plot" >
56+ <filter >advanced_options['output_type'] == 'png'</filter >
57+ </data >
58+ <data name =" output_pdf" format =" pdf" from_work_dir =" kat_hist_out.pdf" label =" ${tool.name} on ${on_string}: PDF Plot" >
59+ <filter >advanced_options['output_type'] == 'pdf'</filter >
60+ </data >
61+ </outputs >
62+ <tests >
63+ <!-- Test 01: Default options with PNG Output -->
64+ <test expect_num_outputs =" 2" >
65+ <param name =" input_files" location =" https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype =" fastqsanger.gz" />
66+ <section name =" histogram_options" >
67+ <param name =" low" value =" 1" />
68+ <param name =" high" value =" 10000" />
69+ <param name =" inc" value =" 1" />
70+ </section >
71+ <output name =" output_hist" file =" kat_hist.out" >
72+ <assert_contents >
73+ <has_text text =" # Kmer value:27" />
74+ <has_n_lines n =" 10007" />
75+ </assert_contents >
76+ </output >
77+ <output name =" output_plot" file =" kat_hist.out.png" ftype =" png" >
78+ <assert_contents >
79+ <has_image_channels channels =" 4" />
80+ <has_image_height height =" 1800" />
81+ <has_image_width width =" 2400" />
82+ </assert_contents >
83+ </output >
84+ </test >
85+ <!-- Test 02: Custom k-mer length with PDF Output -->
86+ <test expect_num_outputs =" 2" >
87+ <param name =" input_files" location =" https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype =" fastqsanger.gz" />
88+ <section name =" kmer_options" >
89+ <param name =" mer_len" value =" 21" />
90+ </section >
91+ <section name =" advanced_options" >
92+ <param name =" output_type" value =" pdf" />
93+ </section >
94+ <output name =" output_hist" >
95+ <assert_contents >
96+ <has_text text =" # Kmer value:21" />
97+ <has_n_lines n =" 10007" />
98+ </assert_contents >
99+ </output >
100+ <output name =" output_pdf" ftype =" pdf" >
101+ <assert_contents >
102+ <has_size size =" 13956" />
103+ </assert_contents >
104+ </output >
105+ </test >
106+ <!-- Test 03: 5' trim with PNG Output -->
107+ <test expect_num_outputs =" 2" >
108+ <param name =" input_files" location =" https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype =" fastqsanger.gz" />
109+ <section name =" advanced_options" >
110+ <param name =" trim_5p" value =" 5" />
111+ </section >
112+ <output name =" output_hist" >
113+ <assert_contents >
114+ <has_text text =" # Kmer value:27" />
115+ <has_n_lines n =" 10007" />
116+ </assert_contents >
117+ </output >
118+ <output name =" output_plot" ftype =" png" >
119+ <assert_contents >
120+ <has_image_channels channels =" 4" />
121+ <has_image_height height =" 1800" />
122+ <has_image_width width =" 2400" />
123+ </assert_contents >
124+ </output >
125+ </test >
126+ </tests >
127+ <help format =" markdown" ><![CDATA[
128+ **KAT Histogram (kat hist)**
129+
130+ Creates a histogram of k-mer occurrences from sequencing data.
131+
132+ -----
133+
134+ **What it does**
135+
136+ The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
137+
138+ ```
139+ low + i*inc <= c < low + (i+1)*inc
140+ ```
141+
142+ The final bucket catches all k-mers with count >= the last bucket's lower bound.
143+
144+ This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
145+
146+ -----
147+
148+ **Input**
149+
150+ - One or more **FASTA** or **FASTQ** files (gzipped accepted)
151+ - Or a single pre-computed **Jellyfish hash** file
152+
153+ -----
154+
155+ **Outputs**
156+
157+ 1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
158+ 2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
159+
160+ -----
161+
162+ **Tips**
163+
164+ - The default k-mer length of **27** works well for most short-read sequencing data.
165+ - For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
166+ - If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
167+ - For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
168+ ]]> </help >
169+ <expand macro =" citations" />
170+ <expand macro =" creator" />
171+ </tool >
0 commit comments