galaxyproject
diff --git a/‎tools/ngsderive/.shed.yml‎
Lines changed: 28 additions & 0 deletions b/‎tools/ngsderive/.shed.yml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎tools/ngsderive/ngsderive_strandedness.xml‎
Lines changed: 140 additions & 0 deletions b/‎tools/ngsderive/ngsderive_strandedness.xml‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎tools/ngsderive/test-data/forward_stranded.bam‎
13.3 KB b/‎tools/ngsderive/test-data/forward_stranded.bam‎
13.3 KB
diff --git a/‎tools/ngsderive/test-data/reverse_stranded.bam‎
13.2 KB b/‎tools/ngsderive/test-data/reverse_stranded.bam‎
13.2 KB
diff --git a/‎tools/ngsderive/test-data/strandedness_test.gtf‎
Lines changed: 18 additions & 0 deletions b/‎tools/ngsderive/test-data/strandedness_test.gtf‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tools/ngsderive/test-data/strandedness_test.gtf.gz‎
282 Bytes b/‎tools/ngsderive/test-data/strandedness_test.gtf.gz‎
282 Bytes
diff --git a/‎tools/ngsderive/test-data/unstranded.bam‎
13.6 KB b/‎tools/ngsderive/test-data/unstranded.bam‎
13.6 KB
@@ -0,0 +1,28 @@
+---
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }} from the ngsderive suite"
+categories:
+  - Sequence Analysis
+  - RNA
+  - Transcriptomics
+description: Forensic analysis tool for inferring properties from NGS data
+homepage_url: https://github.com/stjudecloud/ngsderive
+long_description: |
+  ngsderive is a forensic analysis tool useful for backwards computing
+  information from next-generation sequencing data. It includes subcommands
+  for inferring strandedness, read length, encoding, and other properties
+  from BAM files.
+name: ngsderive
+owner: iuc
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/ngsderive
+suite:
+  name: suite_ngsderive
+  description: >
+    ngsderive is a forensic analysis tool for inferring properties from NGS data
+  long_description: >
+    ngsderive is a forensic analysis tool useful for backwards computing
+    information from next-generation sequencing data. It includes subcommands
+    for inferring strandedness, read length, encoding, and other properties
+    from BAM files.
+type: unrestricted
@@ -0,0 +1,140 @@
+<tool id="ngsderive_strandedness" name="ngsderive strandedness" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
+    <description>infers strandedness from RNA-seq BAM files</description>
+    <macros>
+        <token name="@TOOL_VERSION@">4.0.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ngsderive</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+ln -s '${alignment_input}' input.bam &&
+ln -s '${alignment_input.metadata.bam_index}' input.bam.bai &&
+ln -s '${gtf_input}' annotation.${gtf_input.ext} &&
+
+ngsderive strandedness
+    input.bam
+    -g annotation.${gtf_input.ext}
+    -n $n_genes
+    -m $min_reads_per_gene
+    -q $mapq
+    $split_by_rg
+    > '${output}'
+    ]]></command>
+    <inputs>
+        <param name="alignment_input" type="data" format="bam" label="Input alignment file" help="Aligned paired-end RNA-seq reads in BAM format."/>
+        <param name="gtf_input" type="data" format="gtf,gtf.gz" label="Gene annotation file (GTF)" help="Gene model in GTF format. The file will be automatically sorted and indexed if necessary."/>
+        <param argument="-n" name="n_genes" type="integer" value="1000" min="1" label="Number of genes to sample" help="Number of random genes to sample for strandedness inference."/>
+        <param argument="-m" name="min_reads_per_gene" type="integer" value="10" min="1" label="Minimum reads per gene" help="Minimum number of reads per gene required for inclusion in the analysis."/>
+        <param argument="-q" name="mapq" type="integer" value="30" min="0" label="Minimum mapping quality (MAPQ)" help="Minimum MAPQ score for a read to be considered."/>
+        <param argument="--split-by-rg" type="boolean" truevalue="--split-by-rg" falsevalue="" checked="false" label="Split results by read group" help="Output one entry per read group in addition to an overall entry."/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular" label="${tool.name} on ${on_string}">
+            <actions>
+                <action name="column_names" type="metadata" default="File,ReadGroup,TotalReads,ForwardPct,ReversePct,Predicted"/>
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test forward-stranded data -->
+        <test expect_num_outputs="1">
+            <param name="alignment_input" value="forward_stranded.bam"/>
+            <param name="gtf_input" value="strandedness_test.gtf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_columns n="6"/>
+                    <has_text text="Stranded-Forward"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test reverse-stranded data -->
+        <test expect_num_outputs="1">
+            <param name="alignment_input" value="reverse_stranded.bam"/>
+            <param name="gtf_input" value="strandedness_test.gtf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_columns n="6"/>
+                    <has_text text="Stranded-Reverse"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test unstranded data -->
+        <test expect_num_outputs="1">
+            <param name="alignment_input" value="unstranded.bam"/>
+            <param name="gtf_input" value="strandedness_test.gtf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_columns n="6"/>
+                    <has_text text="Unstranded"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test with gzipped GTF annotation file -->
+        <test expect_num_outputs="1">
+            <param name="alignment_input" value="reverse_stranded.bam"/>
+            <param name="gtf_input" value="strandedness_test.gtf.gz" ftype="gtf.gz"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_columns n="6"/>
+                    <has_text text="Stranded-Reverse"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+ngsderive strandedness infers the strandedness protocol used to generate RNA-seq data by
+analyzing read alignments against a gene model. It can determine whether your data was
+generated using a Stranded-Forward, Stranded-Reverse, or Unstranded protocol.
+
+This tool is useful when you have RNA-seq data but are unsure about the library preparation
+protocol used. Knowing the correct strandedness is essential for accurate gene expression
+quantification.
+
+**How it works**
+
+The tool randomly samples genes from the provided gene model and examines how reads align
+to those genes. Based on the proportion of reads mapping in the forward vs reverse orientation,
+it classifies the library as:
+
+- **Unstranded**: ~40-60% forward reads
+- **Stranded-Forward**: ≥80% forward reads
+- **Stranded-Reverse**: ≥80% reverse reads
+- **Inconclusive**: Results don't clearly indicate a strandedness type
+
+**Inputs**
+
+- **Alignment file**: Paired-end RNA-seq alignments in BAM format
+- **Gene annotation**: GTF file with gene models (gzipped GTF supported)
+
+**Output**
+
+A tabular file with the following columns:
+
+- **File**: Name of the input BAM file
+- **ReadGroup**: Read group identifier (or "overall" for combined results)
+- **TotalReads**: Number of reads used in the analysis
+- **ForwardPct**: Percentage of reads supporting forward strandedness
+- **ReversePct**: Percentage of reads supporting reverse strandedness
+- **Predicted**: The inferred strandedness (Stranded-Forward, Stranded-Reverse, Unstranded, or Inconclusive)
+
+**Notes**
+
+- Only paired-end reads are currently supported
+- For best results, ensure your BAM file has sufficient read depth
+
+For more information, see the `ngsderive documentation <https://stjudecloud.github.io/ngsderive/subcommands/strandedness/>`_.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@software{ngsderive,
+    author = {{St. Jude Cloud Team}},
+    title = {ngsderive: Forensic analysis tool for NGS data},
+    url = {https://github.com/stjudecloud/ngsderive},
+    year = {2020}
+}
+        </citation>
+    </citations>
+</tool>
@@ -0,0 +1,18 @@
+chr1	test	gene	1000	2000	.	+	.	gene_id "GENE_PLUS_1"; gene_name "GENE_PLUS_1";
+chr1	test	transcript	1000	2000	.	+	.	gene_id "GENE_PLUS_1"; transcript_id "GENE_PLUS_1.1";
+chr1	test	exon	1000	2000	.	+	.	gene_id "GENE_PLUS_1"; transcript_id "GENE_PLUS_1.1"; exon_number "1";
+chr1	test	gene	3000	4000	.	-	.	gene_id "GENE_MINUS_1"; gene_name "GENE_MINUS_1";
+chr1	test	transcript	3000	4000	.	-	.	gene_id "GENE_MINUS_1"; transcript_id "GENE_MINUS_1.1";
+chr1	test	exon	3000	4000	.	-	.	gene_id "GENE_MINUS_1"; transcript_id "GENE_MINUS_1.1"; exon_number "1";
+chr1	test	gene	5000	6000	.	+	.	gene_id "GENE_PLUS_2"; gene_name "GENE_PLUS_2";
+chr1	test	transcript	5000	6000	.	+	.	gene_id "GENE_PLUS_2"; transcript_id "GENE_PLUS_2.1";
+chr1	test	exon	5000	6000	.	+	.	gene_id "GENE_PLUS_2"; transcript_id "GENE_PLUS_2.1"; exon_number "1";
+chr1	test	gene	7000	8000	.	-	.	gene_id "GENE_MINUS_2"; gene_name "GENE_MINUS_2";
+chr1	test	transcript	7000	8000	.	-	.	gene_id "GENE_MINUS_2"; transcript_id "GENE_MINUS_2.1";
+chr1	test	exon	7000	8000	.	-	.	gene_id "GENE_MINUS_2"; transcript_id "GENE_MINUS_2.1"; exon_number "1";
+chr1	test	gene	9000	10000	.	+	.	gene_id "GENE_PLUS_3"; gene_name "GENE_PLUS_3";
+chr1	test	transcript	9000	10000	.	+	.	gene_id "GENE_PLUS_3"; transcript_id "GENE_PLUS_3.1";
+chr1	test	exon	9000	10000	.	+	.	gene_id "GENE_PLUS_3"; transcript_id "GENE_PLUS_3.1"; exon_number "1";
+chr1	test	gene	11000	12000	.	-	.	gene_id "GENE_MINUS_3"; gene_name "GENE_MINUS_3";
+chr1	test	transcript	11000	12000	.	-	.	gene_id "GENE_MINUS_3"; transcript_id "GENE_MINUS_3.1";
+chr1	test	exon	11000	12000	.	-	.	gene_id "GENE_MINUS_3"; transcript_id "GENE_MINUS_3.1"; exon_number "1";