Skip to content

Add fastplong to tools #6940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
11 changes: 11 additions & 0 deletions tools/fastplong/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: fastplong
owner: iuc
description: "Fastplong is used for ultrafast preprocessing and quality control for long reads (Nanopore, PacBio, Cyclone, etc.)."
homepage_url: https://github.com/OpenGene/fastplong
long_description: |
Fastplong is used for ultrafast preprocessing and quality control for long reads (Nanopore, PacBio, Cyclone, etc.).
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastplong
type: unrestricted
categories:
- Sequence Analysis
- Assembly
178 changes: 178 additions & 0 deletions tools/fastplong/fastplong.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
<tool id="fastplong" name="Fastplong" version="@TOOL_VERSION@+galaxy0" profile="23.2" license="MIT">
<description>Filter and trim long reads</description>
<creator>
<organization name="Masaryk University" url="https://www.muni.cz/" />
<person givenName="Hana" familyName="Resovska" email="[email protected]" />
</creator>
<macros>
<import>macros.xml</import>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">fastplong</requirement>
</requirements>
<command detect_errors="exit_code">
<![CDATA[
fastplong
--thread \${GALAXY_SLOTS:-1}
--report_title 'fastplong report for $in'
--in '$in'
--out '$output'
#if $reads_to_process:
--reads_to_process '$reads_to_process'
#end if
$disable_quality_filtering
$disable_adapter_trimming
#if $start_adapter:
--start_adapter '$start_adapter'
#end if
#if $end_adapter:
--end_adapter '$end_adapter'
#end if
#if $adapter_fasta:
--adapter_fasta '$adapter_fasta'
#end if
#if $distance_threshold:
--distance_threshold '$distance_threshold'
#end if
#if $trimming_extension:
--trimming_extension '$trimming_extension'
#end if
#if $trim_front:
--trim_front '$trim_front'
#end if
#if $trim_tail:
--trim_tail '$trim_tail'
#end if
$trim_poly_x
#if $poly_x_min_len:
--poly_x_min_len '$poly_x_min_len'
#end if
$cut_front
$cut_tail
#if $cut_window_size:
--cut_window_size '$cut_window_size'
#end if
#if $cut_mean_quality:
--cut_mean_quality '$cut_mean_quality'
#end if
#if $cut_front_window_size:
--cut_front_window_size '$cut_front_window_size'
#end if
#if $cut_front_mean_quality:
--cut_front_mean_quality '$cut_front_mean_quality'
#end if
#if $cut_tail_window_size:
--cut_tail_window_size '$cut_tail_window_size'
#end if
#if $cut_tail_mean_quality:
--cut_tail_mean_quality '$cut_tail_mean_quality'
#end if
#if $qualified_quality_phred:
--qualified_quality_phred '$qualified_quality_phred'
#end if
#if $unqualified_percent_limit:
--unqualified_percent_limit '$unqualified_percent_limit'
#end if
#if $n_base_limit:
--n_base_limit '$n_base_limit'
#end if
#if $mean_qual:
--mean_qual '$mean_qual'
#end if
$disable_length_filtering:
#if $length_required:
--length_required '$length_required'
#end if
#if $length_limit:
--length_limit '$length_limit'
#end if
$low_complexity_filter:
#if $complexity_threshold:
--complexity_threshold '$complexity_threshold'
#end if
$failed_out failed_reads.fastq
$verbose
]]>
</command>

<inputs>
<param argument="--in" type="data" format="fastq,fastq.gz" label="Input FASTQ file"/>
<param argument="--reads_to_process" type="integer" optional="true" label="Number of reads to process" help="Limit the number of reads to process. Useful for quick QC or creating subsets."/>
<param argument="--disable_quality_filtering" type="boolean" checked="false" truevalue="--disable_quality_filtering" falsevalue="" label="Disable quality filtering?" help="By default, quality filtering is enabled. Enable this option to turn it off."/>
<param argument="--disable_adapter_trimming" type="boolean" checked="false" truevalue="--disable_adapter_trimming" falsevalue="" label="Disable adapter trimming?" help="By default, adapter trimming is enabled. Enable this option to turn it off."/>
<param argument="--start_adapter" type="text" optional="true" label="Start adapter sequence (5')" help="Specify the adapter sequence at the 5' end of reads. Leave blank to auto-detect."/>
<param argument="--end_adapter" type="text" optional="true" label="End adapter sequence (3')" help="Specify the adapter sequence at the 3' end of reads. Leave blank to auto-detect."/>
<param argument="--adapter_fasta" type="data" format="fasta" optional="true" label="Adapter sequences (FASTA file)" help="Specify a FASTA file with adapter sequences to trim."/>
<param argument="--distance_threshold" type="float" optional="true" label="Adapter distance threshold" min="0" max="1" help="Threshold of adapter-length. Range: 0.0–1.0"/>
<param argument="--trimming_extension" type="integer" optional="true" label="Trimming extension length" help="Extend trimming beyond the adapter match to clean up the sequence more thoroughly. Default is 10"/>
<param argument="--trim_front" type="integer" optional="true" label="Trim front (5') bases" help="Trim this many bases from the start (5') of each read. Default: 0"/>
<param argument="--trim_tail" type="integer" optional="true" label="Trim tail (3') bases" help="Trim this many bases from the end (3') of each read. Default: 0"/>
<param argument="--trim_poly_x" type="boolean" checked="false" truevalue="--trim_poly_x" falsevalue="" label="Trim polyX tails?" help="Enable to trim polyX stretches at the 3' end."/>
<param argument="--poly_x_min_len" type="integer" optional="true" label="Minimum polyX length" help="Minimum length to detect polyX in the tail. Default: 10"/>
<param argument="--cut_front" type="boolean" checked="false" truevalue="--cut_front" falsevalue="" label="Enable sliding-window trimming from 5' end?" help="Trim from 5' end until window mean quality is above threshold."/>
<param argument="--cut_tail" type="boolean" checked="false" truevalue="--cut_tail" falsevalue="" label="Enable sliding-window trimming from 3' end?" help="Trim from 3' end until window mean quality is above threshold."/>
<param argument="--cut_window_size" type="integer" optional="true" label="Sliding window size" help="Window size shared by cut_front, cut_tail, cut_sliding. Default is 4"/>
<param argument="--cut_mean_quality" type="integer" optional="true" label="Mean quality threshold" help="Mean quality required for cutting. Default: 20"/>
<param argument="--cut_front_window_size" type="integer" optional="true" label="Front window size" help="Override window size for cut_front only. Default: cut_window_size"/>
<param argument="--cut_front_mean_quality" type="integer" optional="true" label="Front mean quality" help="Override quality threshold for cut_front only. Default: cut_mean_quality"/>
<param argument="--cut_tail_window_size" type="integer" optional="true" label="Tail window size" help="Override window size for cut_tail only. Default: cut_window_size"/>
<param argument="--cut_tail_mean_quality" type="integer" optional="true" label="Tail mean quality" help="Override quality threshold for cut_tail only. Default: cut_mean_quality"/>
<param argument="--qualified_quality_phred" type="integer" optional="true" min="0" label="Qualified base Phred score" help="Phred quality score that counts as qualified. Default: 15"/>
<param argument="--unqualified_percent_limit" type="integer" optional="true" min="0" max="100" label="Unqualified base percentage limit" help="Percent of bases allowed to be below quality threshold. Default: 40"/>
<param argument="--n_base_limit" type="integer" optional="true" label="N base limit" help="Reads with more than this number of N bases are discarded. Default: 5"/>
<param argument="--mean_qual" type="integer" optional="true" label="Minimum mean quality" help="Reads with mean quality below this are discarded. Default: 0"/>
<param argument="--disable_length_filtering" type="boolean" checked="false" truevalue="--disable_length_filtering" falsevalue="" label="Disable length filtering?" help="Length filtering is enabled by default. Enable this to disable it."/>
<param argument="--length_required" type="integer" optional="true" label="Minimum read length" help="Reads shorter than this will be discarded. Default: 15"/>
<param argument="--length_limit" type="integer" optional="true" label="Maximum read length" help="Reads longer than this will be discarded. Default: 0 (no limit)"/>
<param argument="--low_complexity_filter" type="boolean" checked="false" truevalue="--low_complexity_filter" falsevalue="" label="Enable low complexity filter?" help="Enable low complexity filtering."/>
<param argument="--complexity_threshold" type="integer" optional="true" min="0" max="100" label="Complexity threshold (%)" help="Minimum required sequence complexity (0–100). Default: 30"/>
<param argument="--failed_out" type="boolean" checked="false" truevalue="--failed_out" falsevalue="" label="Save failed reads?" help="Enable this to write failed reads to a separate FASTQ file."/>
<param argument="--verbose" type="boolean" checked="false" label="Enable verbose logging?" truevalue="--verbose" falsevalue="" help="Print log updates after every 1M reads processed. Useful for monitoring progress in long runs."/>
</inputs>

<outputs>
<data name="output" format="fastq" label="Filtered Output FASTQ"/>
<data name="failed_out_reads" format="fastq" from_work_dir="failed_reads.fastq" label="Failed Reads FASTQ">
<filter>failed_out</filter>
</data>
<data name="json_output_default" format="json" from_work_dir="fastplong.json" label="JSON Report (default)"/>
<data name="html_output_default" format="html" from_work_dir="fastplong.html" label="HTML Report (default)"/>
</outputs>

<tests>
<test expect_num_outputs="3">
<param name="in" value="input.fastq"/>
<output name="output" file="output.fastq"/>
</test>
<test expect_num_outputs="3">
<param name="in" value="input.fastq"/>
<param name="reads_to_process" value="3"/>
<output name="output" file="output_reads_to_process.fastq"/>
</test>
<test expect_num_outputs="4">
<param name="in" value="input.fastq"/>
<param name="failed_out" value="true"/>
<output name="failed_out_reads" value="output_failed_out.fastq"/>
</test>
</tests>

<help>
**Fastplong: Filter and trim long reads**

This tool wraps the `fastplong` command-line tool to allow filtering and trimming of long reads in FASTQ format.

**Inputs**
- A FASTQ file containing reads and optional parameters to control trimming, quality filtering, adapter detection, and more.

**Outputs**
- A filtered FASTQ file with trimmed reads, HTML and JSON report and an optional FASTQ file of failed reads.

For more information, see https://github.com/OpenGene/fastplong
</help>

<citations>
<citation type="doi">10.1002/imt2.107</citation>
<citation type="doi">10.1093/bioinformatics/bty560</citation>
</citations>

</tool>
3 changes: 3 additions & 0 deletions tools/fastplong/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<macros>
<token name="@TOOL_VERSION@">0.2.2</token>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this macro can be moved into the main file. A separate Macro file only makes sense imho if you have a lot of macros that are shared between multiple tools.

</macros>
24 changes: 24 additions & 0 deletions tools/fastplong/test-data/input.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
@name
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE

@name2
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE

@name3
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE

@name4
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE

@name5
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
16 changes: 16 additions & 0 deletions tools/fastplong/test-data/output.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@name
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
@name2
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
@name3
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
@name4
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
4 changes: 4 additions & 0 deletions tools/fastplong/test-data/output_failed_out.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@name5 failed_quality_filter
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
12 changes: 12 additions & 0 deletions tools/fastplong/test-data/output_reads_to_process.fastq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@name
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
@name2
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE
@name3
AGGTGCTGCGCATACTTTTCCACGGGGATACTACTGGGTGTTACCGTGGGAATGAATCCTTTTAACCTTAGCAATACGTAAAGGTGCT
+
///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEE///EEEEEEEEEEEEEEEEEEEEE