From 0a6939f7a06d2fd2d278033d86ebcfe5c7949b9e Mon Sep 17 00:00:00 2001
From: Saim Momin <mominsaim12@gmail.com>
Date: Wed, 10 Jun 2026 14:38:20 +0200
Subject: [PATCH 1/4] Add KAT hist, comp, gcp

---
 tools/kat/kat_comp.xml | 171 +++++++++++++++++++++++++++++++++++++++++
 tools/kat/kat_gcp.xml  | 171 +++++++++++++++++++++++++++++++++++++++++
 tools/kat/kat_hist.xml | 171 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 513 insertions(+)
 create mode 100644 tools/kat/kat_comp.xml
 create mode 100644 tools/kat/kat_gcp.xml
 create mode 100644 tools/kat/kat_hist.xml
diff --git a/tools/kat/kat_comp.xml b/tools/kat/kat_comp.xml
new file mode 100644
index 0000000000..52254c71e0
--- /dev/null
+++ b/tools/kat/kat_comp.xml
@@ -0,0 +1,171 @@
+<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>creates a histogram of k-mer occurrences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <stdio>
+        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
+        <exit_code range="2:133" level="fatal" description="Error occurred"/>
+        <exit_code range="135:" level="fatal" description="Error occurred"/>
+        <exit_code range=":-1" level="fatal" description="Error occurred"/>
+    </stdio>
+    <version_command>echo @TOOL_VERSION@</version_command>
+    <command><![CDATA[
+            #for $i in range(len($input_files)):
+                #set $f = $input_files[$i]
+                @SET_EXT@
+                cp '$f' 'input_${i}${ext}' &&
+            #end for
+            kat hist
+                -o kat_hist_out
+                -t "\${GALAXY_SLOTS:-1}"
+                --low $histogram_options.low
+                --high $histogram_options.high
+                --inc $histogram_options.inc
+                --mer_len $kmer_options.mer_len
+                --hash_size $kmer_options.hash_size
+                $kmer_options.non_canonical
+                @CMD_5PTRIM@
+                --output_type '$advanced_options.output_type'
+                #for $i in range(len($input_files)):
+                    #set $f = $input_files[$i]
+                    @SET_EXT@
+                    'input_${i}${ext}'
+                #end for
+        ]]></command>
+    <inputs>
+        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
+        <section name="histogram_options" title="Histogram Options" expanded="true">
+            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
+            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
+            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
+        </section>
+        <expand macro="kmer_params"/>
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
+                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+            </param>
+            <expand macro="output_type_param"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
+        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
+            <filter>advanced_options['output_type'] == 'png'</filter>
+        </data>
+        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
+            <filter>advanced_options['output_type'] == 'pdf'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 01: Default options with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="histogram_options">
+                <param name="low" value="1"/>
+                <param name="high" value="10000"/>
+                <param name="inc" value="1"/>
+            </section>
+            <output name="output_hist" file="kat_hist.out">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" file="kat_hist.out.png" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 02: Custom k-mer length with PDF Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="kmer_options">
+                <param name="mer_len" value="21"/>
+            </section>
+            <section name="advanced_options">
+                <param name="output_type" value="pdf"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:21"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_pdf" ftype="pdf">
+                <assert_contents>
+                    <has_size size="13956"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 03: 5' trim with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="advanced_options">
+                <param name="trim_5p" value="5"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help format="markdown"><![CDATA[
+**KAT Histogram (kat hist)**
+
+Creates a histogram of k-mer occurrences from sequencing data.
+
+-----
+
+**What it does**
+
+The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
+
+```
+low + i*inc <= c < low + (i+1)*inc
+```
+
+The final bucket catches all k-mers with count >= the last bucket's lower bound.
+
+This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
+
+-----
+
+**Input**
+
+- One or more **FASTA** or **FASTQ** files (gzipped accepted)
+- Or a single pre-computed **Jellyfish hash** file
+
+-----
+
+**Outputs**
+
+1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
+2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
+
+-----
+
+**Tips**
+
+- The default k-mer length of **27** works well for most short-read sequencing data.
+- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
+- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
+- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
+    ]]></help>
+    <expand macro="citations"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file
diff --git a/tools/kat/kat_gcp.xml b/tools/kat/kat_gcp.xml
new file mode 100644
index 0000000000..52254c71e0
--- /dev/null
+++ b/tools/kat/kat_gcp.xml
@@ -0,0 +1,171 @@
+<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>creates a histogram of k-mer occurrences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <stdio>
+        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
+        <exit_code range="2:133" level="fatal" description="Error occurred"/>
+        <exit_code range="135:" level="fatal" description="Error occurred"/>
+        <exit_code range=":-1" level="fatal" description="Error occurred"/>
+    </stdio>
+    <version_command>echo @TOOL_VERSION@</version_command>
+    <command><![CDATA[
+            #for $i in range(len($input_files)):
+                #set $f = $input_files[$i]
+                @SET_EXT@
+                cp '$f' 'input_${i}${ext}' &&
+            #end for
+            kat hist
+                -o kat_hist_out
+                -t "\${GALAXY_SLOTS:-1}"
+                --low $histogram_options.low
+                --high $histogram_options.high
+                --inc $histogram_options.inc
+                --mer_len $kmer_options.mer_len
+                --hash_size $kmer_options.hash_size
+                $kmer_options.non_canonical
+                @CMD_5PTRIM@
+                --output_type '$advanced_options.output_type'
+                #for $i in range(len($input_files)):
+                    #set $f = $input_files[$i]
+                    @SET_EXT@
+                    'input_${i}${ext}'
+                #end for
+        ]]></command>
+    <inputs>
+        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
+        <section name="histogram_options" title="Histogram Options" expanded="true">
+            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
+            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
+            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
+        </section>
+        <expand macro="kmer_params"/>
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
+                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+            </param>
+            <expand macro="output_type_param"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
+        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
+            <filter>advanced_options['output_type'] == 'png'</filter>
+        </data>
+        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
+            <filter>advanced_options['output_type'] == 'pdf'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 01: Default options with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="histogram_options">
+                <param name="low" value="1"/>
+                <param name="high" value="10000"/>
+                <param name="inc" value="1"/>
+            </section>
+            <output name="output_hist" file="kat_hist.out">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" file="kat_hist.out.png" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 02: Custom k-mer length with PDF Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="kmer_options">
+                <param name="mer_len" value="21"/>
+            </section>
+            <section name="advanced_options">
+                <param name="output_type" value="pdf"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:21"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_pdf" ftype="pdf">
+                <assert_contents>
+                    <has_size size="13956"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 03: 5' trim with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="advanced_options">
+                <param name="trim_5p" value="5"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help format="markdown"><![CDATA[
+**KAT Histogram (kat hist)**
+
+Creates a histogram of k-mer occurrences from sequencing data.
+
+-----
+
+**What it does**
+
+The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
+
+```
+low + i*inc <= c < low + (i+1)*inc
+```
+
+The final bucket catches all k-mers with count >= the last bucket's lower bound.
+
+This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
+
+-----
+
+**Input**
+
+- One or more **FASTA** or **FASTQ** files (gzipped accepted)
+- Or a single pre-computed **Jellyfish hash** file
+
+-----
+
+**Outputs**
+
+1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
+2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
+
+-----
+
+**Tips**
+
+- The default k-mer length of **27** works well for most short-read sequencing data.
+- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
+- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
+- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
+    ]]></help>
+    <expand macro="citations"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file
diff --git a/tools/kat/kat_hist.xml b/tools/kat/kat_hist.xml
new file mode 100644
index 0000000000..52254c71e0
--- /dev/null
+++ b/tools/kat/kat_hist.xml
@@ -0,0 +1,171 @@
+<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>creates a histogram of k-mer occurrences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <stdio>
+        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
+        <exit_code range="2:133" level="fatal" description="Error occurred"/>
+        <exit_code range="135:" level="fatal" description="Error occurred"/>
+        <exit_code range=":-1" level="fatal" description="Error occurred"/>
+    </stdio>
+    <version_command>echo @TOOL_VERSION@</version_command>
+    <command><![CDATA[
+            #for $i in range(len($input_files)):
+                #set $f = $input_files[$i]
+                @SET_EXT@
+                cp '$f' 'input_${i}${ext}' &&
+            #end for
+            kat hist
+                -o kat_hist_out
+                -t "\${GALAXY_SLOTS:-1}"
+                --low $histogram_options.low
+                --high $histogram_options.high
+                --inc $histogram_options.inc
+                --mer_len $kmer_options.mer_len
+                --hash_size $kmer_options.hash_size
+                $kmer_options.non_canonical
+                @CMD_5PTRIM@
+                --output_type '$advanced_options.output_type'
+                #for $i in range(len($input_files)):
+                    #set $f = $input_files[$i]
+                    @SET_EXT@
+                    'input_${i}${ext}'
+                #end for
+        ]]></command>
+    <inputs>
+        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
+        <section name="histogram_options" title="Histogram Options" expanded="true">
+            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
+            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
+            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
+        </section>
+        <expand macro="kmer_params"/>
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
+                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+            </param>
+            <expand macro="output_type_param"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
+        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
+            <filter>advanced_options['output_type'] == 'png'</filter>
+        </data>
+        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
+            <filter>advanced_options['output_type'] == 'pdf'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 01: Default options with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="histogram_options">
+                <param name="low" value="1"/>
+                <param name="high" value="10000"/>
+                <param name="inc" value="1"/>
+            </section>
+            <output name="output_hist" file="kat_hist.out">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" file="kat_hist.out.png" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 02: Custom k-mer length with PDF Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="kmer_options">
+                <param name="mer_len" value="21"/>
+            </section>
+            <section name="advanced_options">
+                <param name="output_type" value="pdf"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:21"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_pdf" ftype="pdf">
+                <assert_contents>
+                    <has_size size="13956"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 03: 5' trim with PNG Output -->
+        <test expect_num_outputs="2">
+            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            <section name="advanced_options">
+                <param name="trim_5p" value="5"/>
+            </section>
+            <output name="output_hist">
+                <assert_contents>
+                    <has_text text="# Kmer value:27"/>
+                    <has_n_lines n="10007"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot" ftype="png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help format="markdown"><![CDATA[
+**KAT Histogram (kat hist)**
+
+Creates a histogram of k-mer occurrences from sequencing data.
+
+-----
+
+**What it does**
+
+The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
+
+```
+low + i*inc <= c < low + (i+1)*inc
+```
+
+The final bucket catches all k-mers with count >= the last bucket's lower bound.
+
+This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
+
+-----
+
+**Input**
+
+- One or more **FASTA** or **FASTQ** files (gzipped accepted)
+- Or a single pre-computed **Jellyfish hash** file
+
+-----
+
+**Outputs**
+
+1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
+2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
+
+-----
+
+**Tips**
+
+- The default k-mer length of **27** works well for most short-read sequencing data.
+- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
+- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
+- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
+    ]]></help>
+    <expand macro="citations"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file

From a3cdee9b66a4e85f48e5d2069ddd14da27411c24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= <bjoern@gruenings.eu>
Date: Wed, 10 Jun 2026 22:01:43 +0200
Subject: [PATCH 2/4] Update .shed.yml

---
 tools/kat/.shed.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kat/.shed.yml b/tools/kat/.shed.yml
index 05a46ede9d..6a23a97d46 100644
--- a/tools/kat/.shed.yml
+++ b/tools/kat/.shed.yml
@@ -6,7 +6,7 @@ long_description: |
   using k-mer frequency analysis. It includes tools for generating k-mer histicity plots, comparing
   k-mer spectra between datasets, filtering sequences by k-mer content, and analysing GC/coverage
   distributions.
-homepage_url: https://github.com/TGAC/KAT
+homepage_url: https://github.com/EarlhamInst/KAT
 remote_repository_url: https://github.com/bgruening/galaxytools/tree/master/tools/kat
 type: unrestricted
 categories:

From 8dfbe81013d542b0acdf5b984c0254279e77de36 Mon Sep 17 00:00:00 2001
From: Saim Momin <mominsaim12@gmail.com>
Date: Thu, 11 Jun 2026 11:18:08 +0200
Subject: [PATCH 3/4] remove kat_hist

---
 tools/kat/kat_hist.xml | 171 -----------------------------------------
 1 file changed, 171 deletions(-)
 delete mode 100644 tools/kat/kat_hist.xml

diff --git a/tools/kat/kat_hist.xml b/tools/kat/kat_hist.xml
deleted file mode 100644
index 52254c71e0..0000000000
--- a/tools/kat/kat_hist.xml
+++ /dev/null
@@ -1,171 +0,0 @@
-<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-    <description>creates a histogram of k-mer occurrences</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="xrefs"/>
-    <expand macro="requirements"/>
-    <stdio>
-        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
-        <exit_code range="2:133" level="fatal" description="Error occurred"/>
-        <exit_code range="135:" level="fatal" description="Error occurred"/>
-        <exit_code range=":-1" level="fatal" description="Error occurred"/>
-    </stdio>
-    <version_command>echo @TOOL_VERSION@</version_command>
-    <command><![CDATA[
-            #for $i in range(len($input_files)):
-                #set $f = $input_files[$i]
-                @SET_EXT@
-                cp '$f' 'input_${i}${ext}' &&
-            #end for
-            kat hist
-                -o kat_hist_out
-                -t "\${GALAXY_SLOTS:-1}"
-                --low $histogram_options.low
-                --high $histogram_options.high
-                --inc $histogram_options.inc
-                --mer_len $kmer_options.mer_len
-                --hash_size $kmer_options.hash_size
-                $kmer_options.non_canonical
-                @CMD_5PTRIM@
-                --output_type '$advanced_options.output_type'
-                #for $i in range(len($input_files)):
-                    #set $f = $input_files[$i]
-                    @SET_EXT@
-                    'input_${i}${ext}'
-                #end for
-        ]]></command>
-    <inputs>
-        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
-        <section name="histogram_options" title="Histogram Options" expanded="true">
-            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
-            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
-            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
-        </section>
-        <expand macro="kmer_params"/>
-        <section name="advanced_options" title="Advanced Options" expanded="false">
-            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
-                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
-            </param>
-            <expand macro="output_type_param"/>
-        </section>
-    </inputs>
-    <outputs>
-        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
-        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
-            <filter>advanced_options['output_type'] == 'png'</filter>
-        </data>
-        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
-            <filter>advanced_options['output_type'] == 'pdf'</filter>
-        </data>
-    </outputs>
-    <tests>
-        <!-- Test 01: Default options with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="histogram_options">
-                <param name="low" value="1"/>
-                <param name="high" value="10000"/>
-                <param name="inc" value="1"/>
-            </section>
-            <output name="output_hist" file="kat_hist.out">
-                <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
-                </assert_contents>
-            </output>
-            <output name="output_plot" file="kat_hist.out.png" ftype="png">
-                <assert_contents>
-                    <has_image_channels channels="4"/> 
-                    <has_image_height height="1800"/> 
-                    <has_image_width width="2400"/> 
-                </assert_contents>
-            </output>
-        </test>
-        <!-- Test 02: Custom k-mer length with PDF Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="kmer_options">
-                <param name="mer_len" value="21"/>
-            </section>
-            <section name="advanced_options">
-                <param name="output_type" value="pdf"/>
-            </section>
-            <output name="output_hist">
-                <assert_contents>
-                    <has_text text="# Kmer value:21"/>
-                    <has_n_lines n="10007"/>
-                </assert_contents>
-            </output>
-            <output name="output_pdf" ftype="pdf">
-                <assert_contents>
-                    <has_size size="13956"/>
-                </assert_contents>
-            </output>
-        </test>
-        <!-- Test 03: 5' trim with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="advanced_options">
-                <param name="trim_5p" value="5"/>
-            </section>
-            <output name="output_hist">
-                <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
-                </assert_contents>
-            </output>
-            <output name="output_plot" ftype="png">
-                <assert_contents>
-                    <has_image_channels channels="4"/> 
-                    <has_image_height height="1800"/> 
-                    <has_image_width width="2400"/> 
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help format="markdown"><![CDATA[
-**KAT Histogram (kat hist)**
-
-Creates a histogram of k-mer occurrences from sequencing data.
-
------
-
-**What it does**
-
-The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
-
-```
-low + i*inc <= c < low + (i+1)*inc
-```
-
-The final bucket catches all k-mers with count >= the last bucket's lower bound.
-
-This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
-
------
-
-**Input**
-
-- One or more **FASTA** or **FASTQ** files (gzipped accepted)
-- Or a single pre-computed **Jellyfish hash** file
-
------
-
-**Outputs**
-
-1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
-2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
-
------
-
-**Tips**
-
-- The default k-mer length of **27** works well for most short-read sequencing data.
-- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
-- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
-- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
-    ]]></help>
-    <expand macro="citations"/>
-    <expand macro="creator"/>
-</tool>
\ No newline at end of file

From 785224de809446259fe6be679e509e8f55edbae9 Mon Sep 17 00:00:00 2001
From: Saim Momin <mominsaim12@gmail.com>
Date: Thu, 11 Jun 2026 21:02:34 +0200
Subject: [PATCH 4/4] Fix overwritten tools

---
 tools/kat/kat_comp.xml | 501 ++++++++++++++++++++++++++++++++++-------
 tools/kat/kat_gcp.xml  | 268 ++++++++++++++--------
 2 files changed, 597 insertions(+), 172 deletions(-)

diff --git a/tools/kat/kat_comp.xml b/tools/kat/kat_comp.xml
index 52254c71e0..bfd210b4fa 100644
--- a/tools/kat/kat_comp.xml
+++ b/tools/kat/kat_comp.xml
@@ -1,170 +1,505 @@
-<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-    <description>creates a histogram of k-mer occurrences</description>
+<tool id="kat_comp" name="KAT Comp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>compares k-mer count hashes</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="xrefs"/>
     <expand macro="requirements"/>
     <stdio>
-        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
+        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption — output files were still produced correctly"/>
         <exit_code range="2:133" level="fatal" description="Error occurred"/>
         <exit_code range="135:" level="fatal" description="Error occurred"/>
         <exit_code range=":-1" level="fatal" description="Error occurred"/>
     </stdio>
     <version_command>echo @TOOL_VERSION@</version_command>
     <command><![CDATA[
-            #for $i in range(len($input_files)):
-                #set $f = $input_files[$i]
+        ## Link dataset 1 files with appropriate extensions
+        #for $i, $f in enumerate($input1.files):
+            @SET_EXT@
+            cp '$f' 'input1_${i}${ext}' &&
+        #end for
+
+        ## Link dataset 2 files with appropriate extensions
+        #for $i, $f in enumerate($input2.files):
+            @SET_EXT@
+            cp '$f' 'input2_${i}${ext}' &&
+        #end for
+
+        ## Link optional dataset 3 files with appropriate extensions
+        #if $input3.use_input3 == 'yes':
+            #for $i, $f in enumerate($input3.files):
+                @SET_EXT@
+                cp '$f' 'input3_${i}${ext}' &&
+            #end for
+        #end if
+
+        kat comp
+            -o kat-comp
+            -t "\${GALAXY_SLOTS:-1}"
+            -x '$dataset_options.d1_scale'
+            -y '$dataset_options.d2_scale'
+            -i '$dataset_options.d1_bins'
+            -j '$dataset_options.d2_bins'
+            #if str($dataset_options.d1_5ptrim).strip():
+                --d1_5ptrim '$dataset_options.d1_5ptrim'
+            #end if
+            #if str($dataset_options.d2_5ptrim).strip():
+                --d2_5ptrim '$dataset_options.d2_5ptrim'
+            #end if
+            $input1.non_canonical_1
+            $input2.non_canonical_2
+            $kmer_options.disable_hash_grow
+            -m '$kmer_options.mer_len'
+            -H '$kmer_options.hash_size_1'
+            -I '$kmer_options.hash_size_2'
+            $output_options.density_plot
+            -p '$output_options.output_type'
+            $output_options.output_hists
+            #if $input3.use_input3 == 'yes':
+                $input3.non_canonical_3
+                -J '$input3.hash_size_3'
+                #if str($input3.d3_5ptrim).strip():
+                    --d3_5ptrim '$input3.d3_5ptrim'
+                #end if
+            #end if
+            #for $i, $f in enumerate($input1.files):
+                @SET_EXT@
+                'input1_${i}${ext}'
+            #end for
+            #for $i, $f in enumerate($input2.files):
                 @SET_EXT@
-                cp '$f' 'input_${i}${ext}' &&
+                'input2_${i}${ext}'
             #end for
-            kat hist
-                -o kat_hist_out
-                -t "\${GALAXY_SLOTS:-1}"
-                --low $histogram_options.low
-                --high $histogram_options.high
-                --inc $histogram_options.inc
-                --mer_len $kmer_options.mer_len
-                --hash_size $kmer_options.hash_size
-                $kmer_options.non_canonical
-                @CMD_5PTRIM@
-                --output_type '$advanced_options.output_type'
-                #for $i in range(len($input_files)):
-                    #set $f = $input_files[$i]
+            #if $input3.use_input3 == 'yes':
+                #for $i, $f in enumerate($input3.files):
                     @SET_EXT@
-                    'input_${i}${ext}'
+                    'input3_${i}${ext}'
                 #end for
-        ]]></command>
+            #end if
+    ]]></command>
     <inputs>
-        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
-        <section name="histogram_options" title="Histogram Options" expanded="true">
-            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
-            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
-            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
+        <!-- Dataset 1 -->
+        <section name="input1" title="Dataset 1" expanded="true">
+            <param name="files" type="data" format="fasta,fastqsanger,fasta.gz,fastqsanger.gz,jellyfish" multiple="true" label="Dataset 1 files" help="First set of FASTA/FASTQ files (gzip accepted) or a pre-counted Jellyfish hash. For the reads-vs-assembly use case, provide reads here."/>
+            <param argument="--non_canonical_1" type="boolean" truevalue="--non_canonical_1" falsevalue="" checked="false" label="Non-canonical k-mers (dataset 1)" help="Store explicit k-mer as found. By default, canonical k-mers are stored, counting both strands."/>
+        </section>
+
+        <!-- Dataset 2 -->
+        <section name="input2" title="Dataset 2" expanded="true">
+            <param name="files" type="data" format="fasta,fastqsanger,fasta.gz,fastqsanger.gz,jellyfish" multiple="true" label="Dataset 2 files" help="Second set of FASTA/FASTQ files (gzip accepted) or a pre-counted Jellyfish hash. For the reads-vs-assembly use case, provide the assembly here."/>
+            <param argument="--non_canonical_2" type="boolean" truevalue="--non_canonical_2" falsevalue="" checked="false" label="Non-canonical k-mers (dataset 2)" help="Store explicit k-mer as found. By default, canonical k-mers are stored, counting both strands."/>
         </section>
-        <expand macro="kmer_params"/>
-        <section name="advanced_options" title="Advanced Options" expanded="false">
-            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
-                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+
+        <!-- Optional Dataset 3 -->
+        <conditional name="input3">
+            <param name="use_input3" type="select" label="Use a third dataset as a k-mer filter">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="files" type="data" format="fasta,fastqsanger,fasta.gz,fastqsanger.gz,jellyfish" multiple="true" label="Dataset 3 files" help="Optional third dataset used as a k-mer filter, restricting analysis to k-mers present in this set."/>
+                <param argument="--non_canonical_3" type="boolean" truevalue="--non_canonical_3" falsevalue="" checked="false" label="Non-canonical k-mers (dataset 3)" help="Store explicit k-mer as found. By default, canonical k-mers are stored, counting both strands."/>
+                <param argument="--hash_size_3" type="integer" value="100000000" min="1" label="Hash size (dataset 3)" help="Hash size for k-mer counting of dataset 3 if counting is required. (default: 100000000)"/>
+                <param name="d3_5ptrim" type="text" value="" label="5' trim length (dataset 3)" help="Ignore the first X bases from reads in dataset 3. For multiple files, provide comma-separated values (e.g. 5,10,0). Leave blank to disable.">
+                    <validator type="regex" message="Must be empty, a number, or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+                </param>
+            </when>
+        </conditional>
+
+        <!-- Dataset scaling and binning -->
+        <section name="dataset_options" title="Dataset Options" expanded="false">
+            <param argument="--d1_scale" type="float" value="1.0" min="0.0" label="Dataset 1 scaling factor" help="Float multiplier applied to k-mer counts from dataset 1 before comparison. (default: 1.0)"/>
+            <param argument="--d2_scale" type="float" value="1.0" min="0.0" label="Dataset 2 scaling factor" help="Float multiplier applied to k-mer counts from dataset 2 before comparison. (default: 1.0)"/>
+            <param argument="--d1_bins" type="integer" value="1001" min="1" label="Dataset 1 bins" help="Number of bins (rows in the matrix) for dataset 1. (default: 1001)"/>
+            <param argument="--d2_bins" type="integer" value="1001" min="1" label="Dataset 2 bins" help="Number of bins (columns in the matrix) for dataset 2. (default: 1001)"/>
+            <param argument="--d1_5ptrim" type="text" value="" label="5' trim length (dataset 1)" help="Ignore the first X bases from reads in dataset 1. For multiple files, provide comma-separated values (e.g. 5,10,0). Leave blank to disable.">
+                <validator type="regex" message="Must be empty, a number, or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
             </param>
+            <param argument="--d2_5ptrim" type="text" value="" label="5' trim length (dataset 2)" help="Ignore the first X bases from reads in dataset 2. For multiple files, provide comma-separated values (e.g. 5,10,0). Leave blank to disable.">
+                <validator type="regex" message="Must be empty, a number, or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+            </param>
+        </section>
+
+        <!-- K-mer options -->
+        <section name="kmer_options" title="K-mer Options" expanded="false">
+            <param argument="--mer_len" type="integer" value="27" min="1" label="K-mer length" help="Larger values provide more discriminating power between k-mers but require more memory and result in lower coverage. (default: 27)"/>
+            <param argument="--hash_size_1" type="integer" value="100000000" min="1" label="Hash size (dataset 1)" help="Hash size for k-mer counting of dataset 1 if counting is required. Doubled automatically if too small. (default: 100000000)"/>
+            <param argument="--hash_size_2" type="integer" value="100000000" min="1" label="Hash size (dataset 2)" help="Hash size for k-mer counting of dataset 2 if counting is required. Doubled automatically if too small. (default: 100000000)"/>
+            <param argument="--disable_hash_grow" type="boolean" truevalue="--disable_hash_grow" falsevalue="" checked="false" label="Disable automatic hash growing" help="By default, jellyfish doubles the hash size if filled and recounts. Enabling this disables that behaviour and throws an error instead. Useful for large genomes or strict memory limits."/>
+        </section>
+
+        <!-- Output options -->
+        <section name="output_options" title="Output Options" expanded="false">
             <expand macro="output_type_param"/>
+            <param argument="--density_plot" type="boolean" truevalue="--density_plot" falsevalue="" checked="false" label="Create density plot" help="Makes a density plot. By default a spectra-cn plot is created."/>
+            <param argument="--output_hists" type="boolean" truevalue="--output_hists" falsevalue="" checked="false" label="Output histogram data and plots" help="Whether to output histogram data and plots for dataset 1 and dataset 2."/>
         </section>
     </inputs>
+
     <outputs>
-        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
-        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
-            <filter>advanced_options['output_type'] == 'png'</filter>
+        <!-- Comparison matrix -->
+        <data name="output_matrix" format="tabular" from_work_dir="kat-comp-main.mx" label="${tool.name} on ${on_string}: Comparison matrix"/>
+
+        <!-- Main plots -->
+        <data name="output_plot_png" format="png" from_work_dir="kat-comp-main.mx.spectra-cn.png" label="${tool.name} on ${on_string}: Spectra-CN Plot (PNG)">
+            <filter>output_options['output_type'] == 'png' and not output_options['density_plot']</filter>
+        </data>
+
+        <data name="output_plot_density_png" format="png" from_work_dir="kat-comp-main.mx.density.png" label="${tool.name} on ${on_string}: Density-CN Plot (PNG)">
+            <filter>output_options['output_type'] == 'png' and output_options['density_plot']</filter>
+        </data>
+
+        <data name="output_plot_pdf" format="pdf" from_work_dir="kat-comp-main.mx.spectra-cn.pdf" label="${tool.name} on ${on_string}: Spectra-CN Plot (PDF)">
+            <filter>output_options['output_type'] == 'pdf' and not output_options['density_plot']</filter>
+        </data>
+
+        <data name="output_plot_density_pdf" format="pdf" from_work_dir="kat-comp-main.mx.density.pdf" label="${tool.name} on ${on_string}: Density-CN Plot (PDF)">
+            <filter>output_options['output_type'] == 'pdf' and output_options['density_plot']</filter>
+        </data>
+
+        <!-- Stats -->
+        <data name="output_stats" format="tabular" from_work_dir="kat-comp.stats" label="${tool.name} on ${on_string}: Statistics"/>
+
+        <!-- Dataset 1 histogram (optional) -->
+        <data name="output_hist1" format="tabular" from_work_dir="kat-comp.1.hist" label="${tool.name} on ${on_string}: Dataset 1 histogram data">
+            <filter>output_options['output_hists']</filter>
+        </data>
+        <data name="output_hist1_plot_png" format="png" from_work_dir="kat-comp.1.hist.png" label="${tool.name} on ${on_string}: Dataset 1 histogram plot (PNG)">
+            <filter>output_options['output_hists'] and output_options['output_type'] == 'png'</filter>
+        </data>
+        <data name="output_hist1_plot_pdf" format="pdf" from_work_dir="kat-comp.1.hist.pdf" label="${tool.name} on ${on_string}: Dataset 1 histogram plot (PDF)">
+            <filter>output_options['output_hists'] and output_options['output_type'] == 'pdf'</filter>
+        </data>
+
+        <!-- Dataset 2 histogram (optional) -->
+        <data name="output_hist2" format="tabular" from_work_dir="kat-comp.2.hist" label="${tool.name} on ${on_string}: Dataset 2 histogram data">
+            <filter>output_options['output_hists']</filter>
         </data>
-        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
-            <filter>advanced_options['output_type'] == 'pdf'</filter>
+        <data name="output_hist2_plot_png" format="png" from_work_dir="kat-comp.2.hist.png" label="${tool.name} on ${on_string}: Dataset 2 histogram plot (PNG)">
+            <filter>output_options['output_hists'] and output_options['output_type'] == 'png'</filter>
+        </data>
+        <data name="output_hist2_plot_pdf" format="pdf" from_work_dir="kat-comp.2.hist.pdf" label="${tool.name} on ${on_string}: Dataset 2 histogram plot (PDF)">
+            <filter>output_options['output_hists'] and output_options['output_type'] == 'pdf'</filter>
+        </data>
+
+        <!-- Matrices generated if 3rd dataset is provided -->
+        <data name="kat_ends_matrix" format="tabular" from_work_dir="kat-comp-ends.mx" label="${tool.name} on ${on_string}: Ends Comparison matrix">
+            <filter>input3['use_input3'] == 'yes'</filter>
+        </data>
+
+        <data name="kat_middle_matrix" format="tabular" from_work_dir="kat-comp-middle.mx" label="${tool.name} on ${on_string}: Middle Comparison matrix">
+            <filter>input3['use_input3'] == 'yes'</filter>
+        </data>
+
+        <data name="kat_mixed_matrix" format="tabular" from_work_dir="kat-comp-mixed.mx" label="${tool.name} on ${on_string}: Mixed Comparison matrix">
+            <filter>input3['use_input3'] == 'yes'</filter>
         </data>
     </outputs>
+
     <tests>
-        <!-- Test 01: Default options with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="histogram_options">
-                <param name="low" value="1"/>
-                <param name="high" value="10000"/>
-                <param name="inc" value="1"/>
+        <!-- Test 01: Two FASTQ read sets (read-vs-read, density plot), PNG -->
+        <test expect_num_outputs="3">
+            <section name="input1">
+                <param name="files" value="ecoli_r1.1K.fastq" ftype="fastqsanger"/>
             </section>
-            <output name="output_hist" file="kat_hist.out">
+            <section name="input2">
+                <param name="files" value="ecoli_r2.1K.fastq" ftype="fastqsanger"/>
+            </section>
+            <conditional name="input3">
+                <param name="use_input3" value="no"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
+            </section>
+            <section name="output_options">
+                <param name="output_type" value="png"/>
+                <param name="density_plot" value="true"/>
+            </section>
+            <output name="output_matrix" location="https://zenodo.org/records/20347489/files/kat_comp_matrix.mx">
                 <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="1013"/>
+                    <has_text text="# MaxVal:55758"/>
                 </assert_contents>
             </output>
-            <output name="output_plot" file="kat_hist.out.png" ftype="png">
+            <output name="output_plot_density_png" file="kat_comp_density.png">
                 <assert_contents>
                     <has_image_channels channels="4"/> 
                     <has_image_height height="1800"/> 
                     <has_image_width width="2400"/> 
                 </assert_contents>
             </output>
+            <output name="output_stats" file="kat_comp_stats.tsv">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="39"/>
+                    <has_text text=" - Total shared found in hash 1: 14741"/>
+                    <has_text text=" - Total shared found in hash 2: 14213"/>
+                </assert_contents>
+            </output>
         </test>
-        <!-- Test 02: Custom k-mer length with PDF Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+
+        <!-- Test 02: Two FASTQ read sets (read-vs-read, density plot), PDF -->
+        <test expect_num_outputs="3">
+            <section name="input1">
+                <param name="files" value="ecoli_r1.1K.fastq" ftype="fastqsanger"/>
+            </section>
+            <section name="input2">
+                <param name="files" value="ecoli_r2.1K.fastq" ftype="fastqsanger"/>
+            </section>
+            <conditional name="input3">
+                <param name="use_input3" value="no"/>
+            </conditional>
             <section name="kmer_options">
-                <param name="mer_len" value="21"/>
+                <param name="mer_len" value="27"/>
             </section>
-            <section name="advanced_options">
+            <section name="output_options">
                 <param name="output_type" value="pdf"/>
+                <param name="density_plot" value="true"/>
             </section>
-            <output name="output_hist">
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="1013"/>
+                    <has_text text="# MaxVal:55758"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot_density_pdf">
                 <assert_contents>
-                    <has_text text="# Kmer value:21"/>
-                    <has_n_lines n="10007"/>
+                    <has_size value="21334"/>
                 </assert_contents>
             </output>
-            <output name="output_pdf" ftype="pdf">
+            <output name="output_stats">
                 <assert_contents>
-                    <has_size size="13956"/>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="39"/>
+                    <has_text text=" - Total shared found in hash 1: 14741"/>
+                    <has_text text=" - Total shared found in hash 2: 14213"/>
                 </assert_contents>
             </output>
         </test>
-        <!-- Test 03: 5' trim with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="advanced_options">
-                <param name="trim_5p" value="5"/>
+
+
+        <!-- Test 03: Reads vs assembly (spectra-cn), PNG -->
+        <test expect_num_outputs="3">
+            <section name="input1">
+                <param name="files" value="ecoli_r1.1K.fastq" ftype="fastqsanger"/>
             </section>
-            <output name="output_hist">
+            <section name="input2">
+                <param name="files" location="https://zenodo.org/records/20347489/files/EcoliK12.fasta" ftype="fasta"/>
+            </section>
+            <conditional name="input3">
+                <param name="use_input3" value="no"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
+            </section>
+            <section name="output_options">
+                <param name="output_type" value="png"/>
+                <param name="density_plot" value="false"/>
+            </section>
+            <output name="output_matrix">
                 <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="1013"/>
+                    <has_text text="# MaxVal:4485405"/>
                 </assert_contents>
             </output>
-            <output name="output_plot" ftype="png">
+            <output name="output_hist1_plot_png">
                 <assert_contents>
                     <has_image_channels channels="4"/> 
                     <has_image_height height="1800"/> 
                     <has_image_width width="2400"/> 
                 </assert_contents>
             </output>
+            <output name="output_stats">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="39"/>
+                    <has_text text=" - Total shared found in hash 1: 37899"/>
+                    <has_text text=" - Total shared found in hash 2: 45921"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test 04: Reads vs assembly with histogram outputs, PDF -->
+        <test expect_num_outputs="7">
+            <section name="input1">
+                <param name="files" value="ecoli_r1.1K.fastq" ftype="fastqsanger"/>
+            </section>
+            <section name="input2">
+                <param name="files" location="https://zenodo.org/records/20347489/files/EcoliK12.fasta" ftype="fasta"/>
+            </section>
+            <conditional name="input3">
+                <param name="use_input3" value="no"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
+            </section>
+            <section name="output_options">
+                <param name="output_type" value="pdf"/>
+                <param name="output_hists" value="true"/>
+            </section>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="1013"/>
+                    <has_text text="# MaxVal:4485405"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot_pdf">
+                <assert_contents>
+                    <has_size value="14721"/>
+                </assert_contents>
+            </output>
+            <output name="output_hist1">
+                <assert_contents>
+                    <has_size size="6010"/>
+                    <has_text text="# XLabel:27-mer frequency"/>
+                </assert_contents>
+            </output>
+            <output name="output_hist1_plot_pdf">
+                <assert_contents>
+                    <has_size value="11727"/>
+                </assert_contents>
+            </output>
+            <output name="output_hist2">
+                <assert_contents>
+                    <has_text text="# XLabel:27-mer frequency"/>
+                    <has_size size="6031"/>
+                </assert_contents>
+            </output>
+            <output name="output_hist1_plot_pdf">
+                <assert_contents>
+                    <has_size value="11727"/>
+                </assert_contents>
+            </output>
+            <output name="output_stats">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="39"/>
+                    <has_text text=" - Total shared found in hash 1: 37899"/>
+                    <has_text text=" - Total shared found in hash 2: 45921"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test 05: Three datasets, custom bins and scaling, gzipped inputs -->
+        <test expect_num_outputs="6">
+            <section name="input1">
+                <param name="files" value="ecoli_r1.1K.fastq.gz" ftype="fastqsanger.gz"/>
+            </section>
+            <section name="input2">
+                <param name="files" value="ecoli_r2.1K.fastq.gz" ftype="fastqsanger.gz"/>
+            </section>
+            <conditional name="input3">
+                <param name="use_input3" value="yes"/>
+                <param name="files" location="https://zenodo.org/records/20347489/files/EcoliK12.fasta" ftype="fasta"/>
+            </conditional>
+            <section name="dataset_options">
+                <param name="d1_scale" value="0.5"/>
+                <param name="d2_scale" value="0.5"/>
+                <param name="d1_bins" value="500"/>
+                <param name="d2_bins" value="500"/>
+            </section>
+            <section name="kmer_options">
+                <param name="mer_len" value="21"/>
+            </section>
+            <section name="output_options">
+                <param name="output_type" value="png"/>
+            </section>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="512"/>
+                    <has_text text="# MaxVal:60421"/>
+                    <has_size value="500312"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot_png">
+                <assert_contents>
+                    <has_image_channels channels="4"/> 
+                    <has_image_height height="1800"/> 
+                    <has_image_width width="2400"/> 
+                </assert_contents>
+            </output>
+            <output name="kat_ends_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="502"/>
+                    <has_size value="500146"/>
+                </assert_contents>
+            </output>
+            <output name="kat_middle_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="502"/>
+                    <has_size value="500148"/>
+                </assert_contents>
+            </output>
+            <output name="kat_mixed_matrix">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="502"/>
+                    <has_size value="500177"/>
+                </assert_contents>
+            </output>
+             <output name="output_stats">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_n_lines n="42"/>
+                    <has_text text=" - Total shared found in hash 1: 17153"/>
+                    <has_text text=" - Total shared found in hash 2: 16531"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
+
     <help format="markdown"><![CDATA[
-**KAT Histogram (kat hist)**
+**KAT Comp - K-mer Hash Comparison**
 
-Creates a histogram of k-mer occurrences from sequencing data.
+KAT (K-mer Analysis Toolkit) Comp compares k-mer spectra from two (or optionally three) datasets, producing an intersection matrix and a visualisation plot.
 
 -----
 
 **What it does**
 
-The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
+There are three main use cases:
 
-```
-low + i*inc <= c < low + (i+1)*inc
-```
+1. **Reads vs reads** – Compares k-mer spectra from two read datasets. The intersection matrix is visualised as a density plot showing how related the two spectra are.
 
-The final bucket catches all k-mers with count >= the last bucket's lower bound.
+2. **Reads vs assembly** – Compares k-mers from a read set (dataset 1) against those from an assembly (dataset 2). The matrix is visualised as a stacked spectra-cn histogram, revealing how well the assembly represents the read data and highlighting missing, duplicated, or collapsed content.
 
-This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
+3. **Filtered comparison** – A third dataset can be provided as a k-mer filter, restricting the comparison to only k-mers present in that set.
 
 -----
 
-**Input**
+**Inputs**
 
-- One or more **FASTA** or **FASTQ** files (gzipped accepted)
-- Or a single pre-computed **Jellyfish hash** file
+- **Dataset 1**: One or more FASTA/FASTQ files (gzip accepted) or a pre-counted Jellyfish hash. For the reads-vs-assembly use case, provide reads here.
+- **Dataset 2**: One or more FASTA/FASTQ files (gzip accepted) or a pre-counted Jellyfish hash. For the reads-vs-assembly use case, provide the assembly here.
+- **Dataset 3** *(optional)*: A third dataset used as a k-mer filter. Only k-mers present in this set are considered during comparison.
 
 -----
 
 **Outputs**
 
-1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
-2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
+- **Comparison matrix** (`.mx`): Tab-separated matrix of intersected k-mer counts, with rows corresponding to dataset 1 bins and columns to dataset 2 bins.
+- **Plot**: Visualisation of the matrix in the selected format (PNG, PDF, or PostScript). A spectra-cn plot by default, or a density plot if selected.
+- **Histogram data and plots** *(optional)*: Per-dataset k-mer frequency histograms and corresponding plots, enabled via *Output histogram data and plots*.
 
 -----
 
-**Tips**
+**Notes**
+
+- For the reads-vs-assembly use case, dataset 1 must be the **reads** and dataset 2 the **assembly**.
+- **Scaling factors** can be used to normalise coverage differences between datasets before comparison.
+- **5' trim** values can be specified per file using comma-separated integers (e.g. `5,5` for two files).
+- Use **Disable automatic hash growing** when working with large genomes or under strict memory limits.
+- If inputs are already Jellyfish hashes, k-mer length and hash size settings are ignored for those inputs.
+
 
-- The default k-mer length of **27** works well for most short-read sequencing data.
-- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
-- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
-- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
     ]]></help>
     <expand macro="citations"/>
     <expand macro="creator"/>
diff --git a/tools/kat/kat_gcp.xml b/tools/kat/kat_gcp.xml
index 52254c71e0..93756c439a 100644
--- a/tools/kat/kat_gcp.xml
+++ b/tools/kat/kat_gcp.xml
@@ -1,170 +1,260 @@
-<tool id="kat_hist" name="KAT Histogram" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-    <description>creates a histogram of k-mer occurrences</description>
+<tool id="kat_gcp" name="KAT GCP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>GC vs Coverage analysis of k-mers</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="xrefs"/>
     <expand macro="requirements"/>
     <stdio>
-        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption - output files were still produced correctly"/>
+        <regex match="corrupted double-linked list" source="stderr" level="warning" description="KAT C++ heap corruption — output files were still produced correctly"/>
         <exit_code range="2:133" level="fatal" description="Error occurred"/>
         <exit_code range="135:" level="fatal" description="Error occurred"/>
         <exit_code range=":-1" level="fatal" description="Error occurred"/>
     </stdio>
     <version_command>echo @TOOL_VERSION@</version_command>
     <command><![CDATA[
-            #for $i in range(len($input_files)):
-                #set $f = $input_files[$i]
+        #if $input_type.type == 'fastx':
+            #for $i in range(len($input_type.input_files)):
+                #set $f = $input_type.input_files[$i]
                 @SET_EXT@
                 cp '$f' 'input_${i}${ext}' &&
             #end for
-            kat hist
-                -o kat_hist_out
-                -t "\${GALAXY_SLOTS:-1}"
-                --low $histogram_options.low
-                --high $histogram_options.high
-                --inc $histogram_options.inc
-                --mer_len $kmer_options.mer_len
-                --hash_size $kmer_options.hash_size
-                $kmer_options.non_canonical
-                @CMD_5PTRIM@
-                --output_type '$advanced_options.output_type'
-                #for $i in range(len($input_files)):
-                    #set $f = $input_files[$i]
+        #end if
+        kat gcp
+            -o kat-gcp
+            -t "\${GALAXY_SLOTS:-1}"
+            -x '$matrix_options.cvg_scale'
+            -y '$matrix_options.cvg_bins'
+            #if str($matrix_options.five_prime_trim).strip():
+                --5ptrim '$matrix_options.five_prime_trim'
+            #end if
+            -m '$kmer_options.mer_len'
+            -H '$kmer_options.hash_size'
+            $kmer_options.non_canonical
+            -v
+            -p '$output_options.output_type'
+            #if $input_type.type == 'fastx':
+                #for $i in range(len($input_type.input_files)):
+                    #set $f = $input_type.input_files[$i]
                     @SET_EXT@
                     'input_${i}${ext}'
                 #end for
-        ]]></command>
+            #else:
+                '$input_type.jellyfish_hash'
+            #end if
+    ]]></command>
     <inputs>
-        <param name="input_files" type="data" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" multiple="true" label="Input files" help="One or more FastA/FastQ files (gzipped accepted), or a single Jellyfish hash file"/>
-        <section name="histogram_options" title="Histogram Options" expanded="true">
-            <param argument="--low" type="integer" value="1" min="1" label="Low count" help="Low count value of histogram"/>
-            <param argument="--high" type="integer" value="10000" min="1" label="High count" help="High count value of histogram"/>
-            <param argument="--inc" type="integer" value="1" min="1" label="Increment" help="Increment for each bin"/>
-        </section>
-        <expand macro="kmer_params"/>
-        <section name="advanced_options" title="Advanced Options" expanded="false">
-            <param name="trim_5p" argument="--5ptrim" type="text" value="" label="5' trim" help="Ignore the first X bases from reads. For multiple input files, provide comma-separated values (e.g. 5,10,0).">
-                <validator type="regex" message="Must be a number or comma-separated numbers">^[0-9]+(,[0-9]+)*$|^$</validator>
+        <conditional name="input_type">
+            <param name="type" type="select" label="Input type">
+                <option value="fastx" selected="true">FASTA/FASTQ files</option>
+                <option value="jellyfish">Jellyfish hash</option>
             </param>
+            <when value="fastx">
+                <param name="input_files" type="data" format="fasta,fastqsanger,fasta.gz,fastqsanger.gz" multiple="true" label="Input FASTA/FASTQ files" help="One or more sequence files to analyse"/>
+            </when>
+            <when value="jellyfish">
+                <param name="jellyfish_hash" type="data" format="jellyfish" label="Jellyfish hash" help="Pre-computed jellyfish k-mer hash file"/>
+            </when>
+        </conditional>
+        <section name="kmer_options" title="K-mer Options" expanded="false">
+            <param argument="-m" name="mer_len" type="integer" value="27" min="1" label="K-mer length" help="Larger values provide more discriminating power between k-mers but require additional memory and lower coverage"/>
+            <param argument="-H" name="hash_size" type="integer" value="100000000" min="1" label="Hash size" help="If k-mer counting is required, use this value as the hash size. The hash will be doubled and recounted if it is too small, increasing runtime and memory usage"/>
+            <param argument="-N" name="non_canonical" type="boolean" truevalue="--non_canonical" falsevalue="" checked="false" label="Non-canonical k-mers" help="Store explicit k-mer as found. By default, canonical k-mers are stored, meaning both strands are counted"/>
+        </section>
+        <section name="matrix_options" title="Matrix Options" expanded="false">
+            <param argument="-x" name="cvg_scale" type="float" value="1" min="0" label="Coverage scale" help="Number of bins for the GC data when creating the contamination matrix"/>
+            <param argument="-y" name="cvg_bins" type="integer" value="1000" min="1" label="Coverage bins" help="Number of bins for the coverage data when creating the contamination matrix"/>
+            <expand macro="five_prime_trim_param"/>
+        </section>
+        <section name="output_options" title="Output Options" expanded="false">
             <expand macro="output_type_param"/>
         </section>
     </inputs>
     <outputs>
-        <data name="output_hist" format="tabular" from_work_dir="kat_hist_out" label="${tool.name} on ${on_string}: Histogram"/>
-        <data name="output_plot" format="png" from_work_dir="kat_hist_out.png" label="${tool.name} on ${on_string}: PNG Plot">
-            <filter>advanced_options['output_type'] == 'png'</filter>
+        <data name="output_matrix" format="tabular" from_work_dir="kat-gcp.mx" label="${tool.name} on ${on_string}: GCP matrix"/>
+        <data name="output_plot" format="png" from_work_dir="kat-gcp.mx.png" label="${tool.name} on ${on_string}: GCP PNG Plot">
+            <filter>output_options['output_type'] == 'png'</filter>
         </data>
-        <data name="output_pdf" format="pdf" from_work_dir="kat_hist_out.pdf" label="${tool.name} on ${on_string}: PDF Plot">
-            <filter>advanced_options['output_type'] == 'pdf'</filter>
+        <data name="output_pdf" format="pdf" from_work_dir="kat-gcp.mx.pdf" label="${tool.name} on ${on_string}: GCP PDF">
+            <filter>output_options['output_type'] == 'pdf'</filter>
         </data>
+        <data name="output_stats" format="tabular" from_work_dir="kat-gcp.dist_analysis.json" label="${tool.name} on ${on_string}: JSON Stats"/>
     </outputs>
     <tests>
-        <!-- Test 01: Default options with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="histogram_options">
-                <param name="low" value="1"/>
-                <param name="high" value="10000"/>
-                <param name="inc" value="1"/>
+        <!-- Test 01: Testing GCP matrix generation -->
+        <test expect_num_outputs="3">
+            <conditional name="input_type">
+                <param name="type" value="fastx"/>
+                <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
             </section>
-            <output name="output_hist" file="kat_hist.out">
+            <output name="output_matrix" file="kat_gcp.mx">
                 <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
+                    <has_text text="###"/>
+                    <has_text text="# Rows:27"/>
+                    <has_text text="27-mer frequency"/>
                 </assert_contents>
             </output>
-            <output name="output_plot" file="kat_hist.out.png" ftype="png">
+            <output name="output_plot" file="kat_gcp.mx.png">
                 <assert_contents>
-                    <has_image_channels channels="4"/> 
-                    <has_image_height height="1800"/> 
-                    <has_image_width width="2400"/> 
+                    <has_size value="238000" delta="10000"/>
+                </assert_contents>
+            </output>
+            <output name="output_stats" file="kat_gcp.dist_analysis.json">
+                <assert_contents>
+                    <has_size value="940"/>
+                    <has_text text="coverage"/>
+                    <has_text text="&quot;k&quot;: 27"/>
+                    <has_text text="nb_peaks"/>
+                    <has_text text="mean_gc"/>
+                    <has_text text="est_genome_size"/>
                 </assert_contents>
             </output>
         </test>
-        <!-- Test 02: Custom k-mer length with PDF Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
+        <!-- Test 02: Uncompressed FASTQ input with k-mer length 21 -->
+        <test expect_num_outputs="3">
+            <conditional name="input_type">
+                <param name="type" value="fastx"/>
+                <param name="input_files" value="read_1.fastq" ftype="fastqsanger"/>
+            </conditional>
             <section name="kmer_options">
                 <param name="mer_len" value="21"/>
             </section>
-            <section name="advanced_options">
-                <param name="output_type" value="pdf"/>
-            </section>
-            <output name="output_hist">
+            <output name="output_matrix">
                 <assert_contents>
+                    <has_text text="###"/>
+                    <has_text text="# Rows:21"/>
+                    <has_text text="21-mer frequency"/>
                     <has_text text="# Kmer value:21"/>
-                    <has_n_lines n="10007"/>
                 </assert_contents>
             </output>
-            <output name="output_pdf" ftype="pdf">
+            <output name="output_plot">
+                <assert_contents>
+                    <has_size value="190500" delta="10000"/>
+                </assert_contents>
+            </output>
+            <output name="output_stats">
+                <assert_contents>
+                    <has_size value="742"/>
+                    <has_text text="coverage"/>
+                    <has_text text="&quot;k&quot;: 21"/>
+                    <has_text text="nb_peaks"/>
+                    <has_text text="mean_gc"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 03: Multiple paired FASTQ inputs with non-canonical k-mers -->
+        <test expect_num_outputs="3">
+            <conditional name="input_type">
+                <param name="type" value="fastx"/>
+                <param name="input_files" value="read1.fastq.gz,read2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
+                <param name="non_canonical" value="true"/>
+            </section>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_text text="###"/>
+                    <has_text text="# Rows:27"/>
+                    <has_text text="27-mer frequency"/>
+                    <has_text text="input_0.fastq.gz"/>
+                    <has_text text="input_1.fastq.gz"/>
+                </assert_contents>
+            </output>
+            <output name="output_plot">
                 <assert_contents>
-                    <has_size size="13956"/>
+                    <has_size value="241727"/>
+                </assert_contents>
+            </output>
+            <output name="output_stats">
+                <assert_contents>
+                    <has_size value="745"/>
+                    <has_text text="coverage"/>
+                    <has_text text="&quot;k&quot;: 27"/>
+                    <has_text text="nb_peaks"/>
+                    <has_text text="mean_gc"/>
                 </assert_contents>
             </output>
         </test>
-        <!-- Test 03: 5' trim with PNG Output -->
-        <test expect_num_outputs="2">
-            <param name="input_files" location="https://zenodo.org/records/20347489/files/kat-input-subsampled.fastq.gz" ftype="fastqsanger.gz"/>
-            <section name="advanced_options">
-                <param name="trim_5p" value="5"/>
+        <!-- Test 04: Custom coverage bins and PDF output -->
+        <test expect_num_outputs="3">
+            <conditional name="input_type">
+                <param name="type" value="fastx"/>
+                <param name="input_files" value="read1.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <section name="kmer_options">
+                <param name="mer_len" value="27"/>
+            </section>
+            <section name="matrix_options">
+                <param name="cvg_bins" value="500"/>
             </section>
-            <output name="output_hist">
+            <section name="output_options">
+                <param name="output_type" value="pdf"/>
+            </section>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_text text="###"/>
+                    <has_text text="# Rows:27"/>
+                    <has_text text="# Columns:501"/>
+                </assert_contents>
+            </output>
+            <output name="output_pdf">
                 <assert_contents>
-                    <has_text text="# Kmer value:27"/>
-                    <has_n_lines n="10007"/>
+                    <has_size value="23700" delta="2000"/>
                 </assert_contents>
             </output>
-            <output name="output_plot" ftype="png">
+            <output name="output_stats">
                 <assert_contents>
-                    <has_image_channels channels="4"/> 
-                    <has_image_height height="1800"/> 
-                    <has_image_width width="2400"/> 
+                    <has_size value="742"/>
+                    <has_text text="coverage"/>
+                    <has_text text="&quot;k&quot;: 27"/>
+                    <has_text text="nb_peaks"/>
+                    <has_text text="mean_gc"/>
                 </assert_contents>
             </output>
         </test>
     </tests>
     <help format="markdown"><![CDATA[
-**KAT Histogram (kat hist)**
+**KAT GCP - GC vs Coverage Analysis**
 
-Creates a histogram of k-mer occurrences from sequencing data.
+KAT (K-mer Analysis Toolkit) GCP compares GC content and k-mer coverage from sequence data.
 
 -----
 
 **What it does**
 
-The histogram shows how many k-mers appear at each frequency in the input data. Bucket *i* tallies k-mers with count *c* satisfying:
-
-```
-low + i*inc <= c < low + (i+1)*inc
-```
+This tool counts GC nucleotides for each distinct k-mer in the input and builds a matrix
+of GC count vs k-mer coverage. This matrix is useful for:
 
-The final bucket catches all k-mers with count >= the last bucket's lower bound.
-
-This tool is similar to the `histo` command in Jellyfish, but the output includes metadata that makes the histogram easier to plot and interpret.
+- Distinguishing legitimate genomic content from contamination
+- Identifying unexpected sequence content
+- Analysing biological composition of a sequencing library
 
 -----
 
-**Input**
+**Inputs**
 
-- One or more **FASTA** or **FASTQ** files (gzipped accepted)
-- Or a single pre-computed **Jellyfish hash** file
+- **FastA/FastQ files**: One or more sequence files (optionally gzip-compressed). KAT will perform k-mer counting internally.
+- **Jellyfish hash**: A pre-computed k-mer hash produced by Jellyfish.
 
 -----
 
 **Outputs**
 
-1. **Histogram** (tabular) - tab-separated count/frequency table with metadata header lines prefixed with `#`.
-2. **Plot** (PNG/PDF) - visual plot of the k-mer frequency histogram.
+- **GCP matrix** (`.mx`): Tab-separated matrix of GC count vs k-mer coverage bins. Each cell contains the count of distinct k-mers with that GC/coverage combination.
+- **GCP plot**: A 2D density plot visualising the matrix, useful for detecting contamination or ploidy signals.
+- **Stats**: A JSON file with summary statistics for the run.
 
 -----
 
 **Tips**
 
-- The default k-mer length of **27** works well for most short-read sequencing data.
-- For diploid or polyploid genomes, distinct peaks correspond to heterozygous and homozygous k-mers.
-- If the hash size is too small, KAT will automatically double it (increasing runtime and memory). Increase it if you expect many unique k-mers.
-- For multiple input files, provide different 5' trim values as comma-separated numbers (e.g. `5,10`).
+- For contamination screening, look for distinct clusters in the GCP plot that sit away from the main genomic peak.
+- Increase `--mer_len` for more specific k-mer discrimination (at the cost of memory).
+
     ]]></help>
     <expand macro="citations"/>
     <expand macro="creator"/>