diff --git a/bio/pytrf/extract/environment.linux-64.pin.txt b/bio/pytrf/extract/environment.linux-64.pin.txt new file mode 100644 index 0000000000..43a48ac990 --- /dev/null +++ b/bio/pytrf/extract/environment.linux-64.pin.txt @@ -0,0 +1,37 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +# created-by: conda 25.11.0 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda#f0991f0f84902f6b6009b4d2350a83aa +https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_14.conda#91349c276f84f590487e4c7f6e90e077 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-8_cp312.conda#c3efd25ac4d74b1584d2f7a57195ddf1 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_14.conda#550dceb769d23bcf0e2f97fd4062d720 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda#8b09ae86839581147ef2e5c5e229d164 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda#35f29eec58405aaf55e01cb470d8c26a +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_14.conda#6c13aaae36d7514f28bd5544da1a7bb8 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hb9d3cd8_1.conda#d864d34357c3b65a4b731f78c0801dc4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_14.conda#8e96fe9b17d5871b5cf9d312cab832f6 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda#80c07c68d2f6870250959dcc95b209d1 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_14.conda#9531f671a13eec0597941fa19e489b96 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda#86bc20552bf46075e3d92b67f089172d +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda#a6abd2796fc332536735f68ba23f7901 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.0-hee844dc_0.conda#729a572a3ebb8c43933b30edcc628ceb +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.12-hd63d673_1_cpython.conda#5c00c8cea14ee8d02941cab9121dce41 +https://conda.anaconda.org/bioconda/linux-64/pyfastx-2.2.0-py312h4711d71_1.tar.bz2#0c029565f5abbf1c3349a4abc0b4c63c +https://conda.anaconda.org/bioconda/linux-64/pytrf-1.4.2-py312h0fa9677_0.tar.bz2#11c47fcb88ad7fe0ab94dcf11b8bebb9 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda#4de79c071274a53dcaf2a8c749d1499e +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/noarch/pip-25.3-pyh8b19718_0.conda#c55515ca43c6444d2572e0f0d93cb6b9 diff --git a/bio/pytrf/extract/environment.yaml b/bio/pytrf/extract/environment.yaml new file mode 100644 index 0000000000..9e6d45b5ab --- /dev/null +++ b/bio/pytrf/extract/environment.yaml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - nodefaults + +dependencies: + - pytrf =1.4 + - pyfastx =2.2 diff --git a/bio/pytrf/extract/meta.yaml b/bio/pytrf/extract/meta.yaml new file mode 100644 index 0000000000..243b879904 --- /dev/null +++ b/bio/pytrf/extract/meta.yaml @@ -0,0 +1,28 @@ +name: pytrf extract (NOT WORKING, see notes below) +description: > + Extract tandem repeat sequences with flanking regions from DNA sequences. + Requires output from pytrf findstr, findgtr, or findatr as input. +url: https://pytrf.readthedocs.io/en/latest/usage.html#commandline-interface +authors: + - Muhammad Rohan Ali Asmat +input: + - FASTA or FASTQ file (supports gzip compression) +output: + - Output file (default -> stdout, will be redirected to the log file). +params: + repeat_file: > + **Required.** Path to TSV or CSV file from pytrf findstr/findgtr/findatr. + out_format: > + Output format. Options: 'tsv' (default), 'csv', or 'fasta'. + Note: Only extract command supports FASTA output. + flank_length: > + Length of flanking sequence (default: 100). +notes: > + **Bioconda package:** https://bioconda.github.io/recipes/pytrf/README.html |nl| + **GitHub repository:** https://github.com/lmdu/pytrf |nl| + **License:** MIT License |nl| + **Disclaimer:** This is a minimal implementation supporting basic functionality. + pytrf is not a Python binding to TRF - it's an independent tool. |nl| + **Known issue:** PyTRF 1.4.2 has a bug in the `extract` command (delimiter error). |nl| + See: https://github.com/lmdu/pytrf/issues/6 |nl| + This wrapper skips extract tests until upstream patch is released. diff --git a/bio/pytrf/extract/test/Snakefile b/bio/pytrf/extract/test/Snakefile new file mode 100644 index 0000000000..c34b730d11 --- /dev/null +++ b/bio/pytrf/extract/test/Snakefile @@ -0,0 +1,17 @@ +# SAMPLE RULE: Extract tandem repeat sequences with flanking regions +# The pytrf extract wrapper requires output from findstr, findgtr, or findatr. +# +# Output: +# - If output file is specified, results are written to that file +# - If output is omitted, pytrf writes to stdout (redirected to log file) +rule pytrf_extract: + input: + "demo_data/{sample}.fasta", + output: + "results/{sample}_extract.tsv", + params: + repeat_file="demo_data/{sample}.tsv", + log: + "logs/{sample}.log", + wrapper: + "master/bio/pytrf/extract" diff --git a/bio/pytrf/extract/test/demo_data/small_test.fasta b/bio/pytrf/extract/test/demo_data/small_test.fasta new file mode 100644 index 0000000000..d5c7053d78 --- /dev/null +++ b/bio/pytrf/extract/test/demo_data/small_test.fasta @@ -0,0 +1,6 @@ +>seq1 +TCATCGGTCATCGGTCATCGGTCATCGGTCATCGG +>seq2 +ACCCCTCAGGGTACCCCTCAGGGTACCCCTCAGGGTACCCCTCAGGGTACCCCTCAGGGTACCCCTCAGGGTACCCCTCAGGGT +>seq3 +TGACTATATCCGCAAATGAAGGCTGTTCTCTGACATGACTATATCCGCAAATGAAGGCTGTTCTCTGACATGACTATATCCGCAAATGAAGGCTGTTCTCTGACATGACTATATCCGCAAATGAAGGCTGTTCTCTGACA diff --git a/bio/pytrf/extract/test/demo_data/small_test.tsv b/bio/pytrf/extract/test/demo_data/small_test.tsv new file mode 100644 index 0000000000..c0730984fc --- /dev/null +++ b/bio/pytrf/extract/test/demo_data/small_test.tsv @@ -0,0 +1,85 @@ +seq1 1 3 TCA 3 1 3 +seq1 4 6 TCG 3 1 3 +seq1 7 9 GTC 3 1 3 +seq1 10 12 ATC 3 1 3 +seq1 13 15 GGT 3 1 3 +seq1 16 18 CAT 3 1 3 +seq1 19 21 CGG 3 1 3 +seq1 22 24 TCA 3 1 3 +seq1 25 27 TCG 3 1 3 +seq1 28 30 GTC 3 1 3 +seq1 31 33 ATC 3 1 3 +seq1 34 36 GG 3 1 3 +seq2 1 3 ACC 3 1 3 +seq2 4 6 CCT 3 1 3 +seq2 7 9 CAG 3 1 3 +seq2 10 12 GGT 3 1 3 +seq2 13 15 ACC 3 1 3 +seq2 16 18 CCT 3 1 3 +seq2 19 21 CAG 3 1 3 +seq2 22 24 GGT 3 1 3 +seq2 25 27 ACC 3 1 3 +seq2 28 30 CCT 3 1 3 +seq2 31 33 CAG 3 1 3 +seq2 34 36 GGT 3 1 3 +seq2 37 39 ACC 3 1 3 +seq2 40 42 CCT 3 1 3 +seq2 43 45 CAG 3 1 3 +seq2 46 48 GGT 3 1 3 +seq2 49 51 ACC 3 1 3 +seq2 52 54 CCT 3 1 3 +seq2 55 57 CAG 3 1 3 +seq2 58 60 GGT 3 1 3 +seq2 61 63 ACC 3 1 3 +seq2 64 66 CCT 3 1 3 +seq2 67 69 CAG 3 1 3 +seq2 70 72 GGT 3 1 3 +seq2 73 75 ACC 3 1 3 +seq2 76 78 CCT 3 1 3 +seq2 79 81 CAG 3 1 3 +seq2 82 84 GGT 3 1 3 +seq3 1 3 TGA 3 1 3 +seq3 4 6 CTA 3 1 3 +seq3 7 9 TAT 3 1 3 +seq3 10 12 CCG 3 1 3 +seq3 13 15 CAA 3 1 3 +seq3 16 18 ATG 3 1 3 +seq3 19 21 AAG 3 1 3 +seq3 22 24 GCT 3 1 3 +seq3 25 27 GTT 3 1 3 +seq3 28 31 CT 2 2 4 +seq3 32 34 GAC 3 1 3 +seq3 35 37 ATG 3 1 3 +seq3 38 40 ACT 3 1 3 +seq3 41 44 AT 2 2 4 +seq3 45 47 CCG 3 1 3 +seq3 48 50 CAA 3 1 3 +seq3 51 53 ATG 3 1 3 +seq3 54 56 AAG 3 1 3 +seq3 57 59 GCT 3 1 3 +seq3 60 62 GTT 3 1 3 +seq3 63 66 CT 2 2 4 +seq3 67 69 GAC 3 1 3 +seq3 70 72 ATG 3 1 3 +seq3 73 75 ACT 3 1 3 +seq3 76 79 AT 2 2 4 +seq3 80 82 CCG 3 1 3 +seq3 83 85 CAA 3 1 3 +seq3 86 88 ATG 3 1 3 +seq3 89 91 AAG 3 1 3 +seq3 92 94 GCT 3 1 3 +seq3 95 97 GTT 3 1 3 +seq3 98 101 CT 2 2 4 +seq3 102 104 GAC 3 1 3 +seq3 105 107 ATG 3 1 3 +seq3 108 110 ACT 3 1 3 +seq3 111 114 AT 2 2 4 +seq3 115 117 CCG 3 1 3 +seq3 118 120 CAA 3 1 3 +seq3 121 123 ATG 3 1 3 +seq3 124 126 AAG 3 1 3 +seq3 127 129 GCT 3 1 3 +seq3 130 132 GTT 3 1 3 +seq3 133 136 CT 2 2 4 +seq3 137 139 GAC 3 1 3 +seq3 140 142 A 3 1 3 diff --git a/bio/pytrf/extract/test/expected/extract_basic.tsv b/bio/pytrf/extract/test/expected/extract_basic.tsv new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bio/pytrf/extract/wrapper.py b/bio/pytrf/extract/wrapper.py new file mode 100644 index 0000000000..d7cab51731 --- /dev/null +++ b/bio/pytrf/extract/wrapper.py @@ -0,0 +1,55 @@ +""" +Snakemake Wrapper for PyTRF extract +------------------------------------------------------ +Extract tandem repeat sequences with flanking regions. +""" + +from pathlib import Path +from snakemake.shell import shell + +# Logging +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +# Get input file +try: + input_file = Path(snakemake.input[0]).resolve() +except (IndexError, TypeError) as e: + raise ValueError(f"Input specification error: {e}") from e + +# Get output file if specified +OUTPUT_FILE = None +if snakemake.output: + OUTPUT_FILE = Path(snakemake.output[0]).resolve() + +# Get repeat_file (required) +try: + if not hasattr(snakemake.params, "repeat_file"): + raise ValueError("Parameter 'repeat_file' is required for extract") + repeat_file = Path(snakemake.params.repeat_file).resolve() +except (AttributeError, ValueError) as e: + raise RuntimeError(f"Parameter validation failed: {e}") from e + +# Build parameters +params = [f"-r {repeat_file}"] + +try: + if hasattr(snakemake.params, "out_format"): + params.append(f"-f {snakemake.params.out_format}") + + if hasattr(snakemake.params, "flank_length"): + params.append(f"-l {snakemake.params.flank_length}") +except (AttributeError, ValueError) as e: + raise RuntimeError(f"Parameter processing failed: {e}") from e + +# Build command +CMD = f"pytrf extract {input_file}" +if params: + CMD += " " + " ".join(params) +if OUTPUT_FILE: + CMD += f" -o {OUTPUT_FILE}" + +# Execute +try: + shell(f"{CMD} {log}") +except Exception as e: + raise RuntimeError(f"PyTRF extract execution failed: {e}") from e diff --git a/bio/pytrf/findatr/environment.linux-64.pin.txt b/bio/pytrf/findatr/environment.linux-64.pin.txt new file mode 100644 index 0000000000..43a48ac990 --- /dev/null +++ b/bio/pytrf/findatr/environment.linux-64.pin.txt @@ -0,0 +1,37 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +# created-by: conda 25.11.0 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda#f0991f0f84902f6b6009b4d2350a83aa +https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_14.conda#91349c276f84f590487e4c7f6e90e077 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-8_cp312.conda#c3efd25ac4d74b1584d2f7a57195ddf1 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_14.conda#550dceb769d23bcf0e2f97fd4062d720 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda#8b09ae86839581147ef2e5c5e229d164 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda#35f29eec58405aaf55e01cb470d8c26a +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_14.conda#6c13aaae36d7514f28bd5544da1a7bb8 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hb9d3cd8_1.conda#d864d34357c3b65a4b731f78c0801dc4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_14.conda#8e96fe9b17d5871b5cf9d312cab832f6 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda#80c07c68d2f6870250959dcc95b209d1 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_14.conda#9531f671a13eec0597941fa19e489b96 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda#86bc20552bf46075e3d92b67f089172d +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda#a6abd2796fc332536735f68ba23f7901 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.0-hee844dc_0.conda#729a572a3ebb8c43933b30edcc628ceb +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.12-hd63d673_1_cpython.conda#5c00c8cea14ee8d02941cab9121dce41 +https://conda.anaconda.org/bioconda/linux-64/pyfastx-2.2.0-py312h4711d71_1.tar.bz2#0c029565f5abbf1c3349a4abc0b4c63c +https://conda.anaconda.org/bioconda/linux-64/pytrf-1.4.2-py312h0fa9677_0.tar.bz2#11c47fcb88ad7fe0ab94dcf11b8bebb9 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda#4de79c071274a53dcaf2a8c749d1499e +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/noarch/pip-25.3-pyh8b19718_0.conda#c55515ca43c6444d2572e0f0d93cb6b9 diff --git a/bio/pytrf/findatr/environment.yaml b/bio/pytrf/findatr/environment.yaml new file mode 100644 index 0000000000..9e6d45b5ab --- /dev/null +++ b/bio/pytrf/findatr/environment.yaml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - nodefaults + +dependencies: + - pytrf =1.4 + - pyfastx =2.2 diff --git a/bio/pytrf/findatr/meta.yaml b/bio/pytrf/findatr/meta.yaml new file mode 100644 index 0000000000..64ddbc7132 --- /dev/null +++ b/bio/pytrf/findatr/meta.yaml @@ -0,0 +1,49 @@ +name: pytrf findatr +description: > + Find approximate/imperfect tandem repeats from DNA sequences. +url: https://pytrf.readthedocs.io/en/latest/usage.html#commandline-interface +authors: + - Muhammad Rohan Ali Asmat +input: + - FASTA or FASTQ file (supports gzip compression) +output: + - Output file (default -> stdout, will be redirected to the log file). +params: + out_format: > + Output format. Options: 'tsv' (default), 'csv', 'bed', or 'gff'. + min_motif: > + Minimum motif size in bp (default: 1). + max_motif: > + Maximum motif size in bp (default: 6). + min_seedrep: > + Minimum repeat number for seed (default: 3). + min_seedlen: > + Minimum length for seed (default: 10). + max_errors: > + Maximum number of continuous alignment errors (default: 3). + min_identity: > + Minimum identity for extending, 0 to 100 (default: 70). + max_extend: > + Maximum length allowed to extend (default: 2000). +notes: > + **Output columns (TSV/CSV/BED/GFF):** sequence or chromosome name, start position, + end position, motif sequence, motif length, repeat number, repeat length, seed start + position, seed end position, seed repeat number, seed length, number of matches, + number of substitutions, number of insertions, number of deletions, extend alignment + identity between imperfect repeat and its perfect counterpart. + |nl| |nl| + **Example:** |nl| + Example row in record: 0 1 32 T 1 32.0 32 1 1 1 1 10 22 0 0 31.25 |nl| + This indicates that in sequence '0', from position 1 to 32, there is a tandem repeat + with motif 'T' (length 1) repeated 32 times, resulting in a repeat length of 32 bp. + The seed repeat started at position 1 and ended at position 1, with a seed repeat number of 1 + and seed length of 1 bp. The alignment of the imperfect repeat to its perfect counterpart + has 10 matches, 22 substitutions, 0 insertions, and 0 deletions, yielding an identity of 31.25%. + |nl| + |nl| + **Bioconda package:** https://bioconda.github.io/recipes/pytrf/README.html |nl| + **GitHub repository:** https://github.com/lmdu/pytrf |nl| + **License:** MIT License |nl| + **Disclaimer:** This is a minimal implementation supporting basic functionality. + pytrf is not a Python binding to TRF - it's an independent tool. + \ No newline at end of file diff --git a/bio/pytrf/findatr/test/Snakefile b/bio/pytrf/findatr/test/Snakefile new file mode 100644 index 0000000000..4ffcffd045 --- /dev/null +++ b/bio/pytrf/findatr/test/Snakefile @@ -0,0 +1,20 @@ +# SAMPLE RULE: Find approximate/imperfect tandem repeats +# +# Output: +# - If output file is specified, results are written to that file +# - If output is omitted, PyTRF writes to stdout (redirected to log file) +# +# This example searches for approximate repeats with motif sizes between 3-10 bp, +# allowing detection of imperfect short/medium tandem repeats with mismatches. +rule pytrf_findatr: + input: + "demo_data/{sample}.fasta", + output: + "results/{sample}.tsv", + log: + "logs/{sample}.log", + params: + min_motif=3, + max_motif=10, + wrapper: + "master/bio/pytrf/findatr" diff --git a/bio/pytrf/findatr/test/demo_data/small_test.fasta b/bio/pytrf/findatr/test/demo_data/small_test.fasta new file mode 100644 index 0000000000..9e7b2e044a --- /dev/null +++ b/bio/pytrf/findatr/test/demo_data/small_test.fasta @@ -0,0 +1,2 @@ +>seq1 +TCATCGGTCATCGGTCATCGGTCATCGGTCATCGG diff --git a/bio/pytrf/findatr/test/expected/findatr_basic.tsv b/bio/pytrf/findatr/test/expected/findatr_basic.tsv new file mode 100644 index 0000000000..ad80a825da --- /dev/null +++ b/bio/pytrf/findatr/test/expected/findatr_basic.tsv @@ -0,0 +1 @@ +seq1 1 35 TCATCGG 7 5.0 35 1 35 5 35 35 0 0 0 100.0 diff --git a/bio/pytrf/findatr/wrapper.py b/bio/pytrf/findatr/wrapper.py new file mode 100644 index 0000000000..f0ee538dad --- /dev/null +++ b/bio/pytrf/findatr/wrapper.py @@ -0,0 +1,65 @@ +""" +Snakemake Wrapper for PyTRF findatr +------------------------------------------------------ +Find approximate/imperfect tandem repeats. +""" + +from pathlib import Path +from snakemake.shell import shell + +# Logging +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +# Get input file +try: + input_file = Path(snakemake.input[0]).resolve() +except (IndexError, TypeError) as e: + raise ValueError(f"Input specification error: {e}") from e + +# Get output file if specified +OUTPUT_FILE = None +if snakemake.output: + OUTPUT_FILE = Path(snakemake.output[0]).resolve() + +# Build parameters +params = [] + +try: + if hasattr(snakemake.params, "out_format"): + params.append(f"-f {snakemake.params.out_format}") + + if hasattr(snakemake.params, "min_motif"): + params.append(f"-m {snakemake.params.min_motif}") + + if hasattr(snakemake.params, "max_motif"): + params.append(f"-M {snakemake.params.max_motif}") + + if hasattr(snakemake.params, "min_seedrep"): + params.append(f"-r {snakemake.params.min_seedrep}") + + if hasattr(snakemake.params, "min_seedlen"): + params.append(f"-l {snakemake.params.min_seedlen}") + + if hasattr(snakemake.params, "max_errors"): + params.append(f"-e {snakemake.params.max_errors}") + + if hasattr(snakemake.params, "min_identity"): + params.append(f"-p {snakemake.params.min_identity}") + + if hasattr(snakemake.params, "max_extend"): + params.append(f"-x {snakemake.params.max_extend}") +except (AttributeError, ValueError) as e: + raise RuntimeError(f"Parameter processing failed: {e}") from e + +# Build command +CMD = f"pytrf findatr {input_file}" +if params: + CMD += " " + " ".join(params) +if OUTPUT_FILE: + CMD += f" -o {OUTPUT_FILE}" + +# Execute +try: + shell(f"{CMD} {log}") +except Exception as e: + raise RuntimeError(f"PyTRF findatr execution failed: {e}") from e diff --git a/bio/pytrf/findgtr/environment.linux-64.pin.txt b/bio/pytrf/findgtr/environment.linux-64.pin.txt new file mode 100644 index 0000000000..43a48ac990 --- /dev/null +++ b/bio/pytrf/findgtr/environment.linux-64.pin.txt @@ -0,0 +1,37 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +# created-by: conda 25.11.0 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda#f0991f0f84902f6b6009b4d2350a83aa +https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_14.conda#91349c276f84f590487e4c7f6e90e077 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-8_cp312.conda#c3efd25ac4d74b1584d2f7a57195ddf1 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_14.conda#550dceb769d23bcf0e2f97fd4062d720 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda#8b09ae86839581147ef2e5c5e229d164 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda#35f29eec58405aaf55e01cb470d8c26a +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_14.conda#6c13aaae36d7514f28bd5544da1a7bb8 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hb9d3cd8_1.conda#d864d34357c3b65a4b731f78c0801dc4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_14.conda#8e96fe9b17d5871b5cf9d312cab832f6 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda#80c07c68d2f6870250959dcc95b209d1 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_14.conda#9531f671a13eec0597941fa19e489b96 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda#86bc20552bf46075e3d92b67f089172d +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda#a6abd2796fc332536735f68ba23f7901 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.0-hee844dc_0.conda#729a572a3ebb8c43933b30edcc628ceb +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.12-hd63d673_1_cpython.conda#5c00c8cea14ee8d02941cab9121dce41 +https://conda.anaconda.org/bioconda/linux-64/pyfastx-2.2.0-py312h4711d71_1.tar.bz2#0c029565f5abbf1c3349a4abc0b4c63c +https://conda.anaconda.org/bioconda/linux-64/pytrf-1.4.2-py312h0fa9677_0.tar.bz2#11c47fcb88ad7fe0ab94dcf11b8bebb9 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda#4de79c071274a53dcaf2a8c749d1499e +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/noarch/pip-25.3-pyh8b19718_0.conda#c55515ca43c6444d2572e0f0d93cb6b9 diff --git a/bio/pytrf/findgtr/environment.yaml b/bio/pytrf/findgtr/environment.yaml new file mode 100644 index 0000000000..9e6d45b5ab --- /dev/null +++ b/bio/pytrf/findgtr/environment.yaml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - nodefaults + +dependencies: + - pytrf =1.4 + - pyfastx =2.2 diff --git a/bio/pytrf/findgtr/meta.yaml b/bio/pytrf/findgtr/meta.yaml new file mode 100644 index 0000000000..40a1e41a46 --- /dev/null +++ b/bio/pytrf/findgtr/meta.yaml @@ -0,0 +1,37 @@ +name: pytrf findgtr +description: > + Find exact generic tandem repeats from DNA sequences. + Identifies perfect repeats with any motif size. +url: https://pytrf.readthedocs.io/en/latest/usage.html#commandline-interface +authors: + - Muhammad Rohan Ali Asmat +input: + - FASTA or FASTQ file (supports gzip compression) +output: + - Output file (default -> stdout, will be redirected to the log file). +params: + out_format: > + Output format. Options: 'tsv' (default), 'csv', 'bed', or 'gff'. + min_motif: > + Minimum motif length in bp (default: 10). + max_motif: > + Maximum motif length in bp (default: 100). + min_repeat: > + Minimum number of times motif must repeat (default: 3). + Example: min_repeat=3 requires at least 3 copies of the motif. + min_length: > + Minimum total length of tandem repeat in bp (default: 10). + Example: min_length=50 requires entire repeat region ≥50 bp. +notes: > + **Output columns:** chrom, start (0-based), end (0-based exclusive), + motif, motif_length, repeat_number, repeat_length. |nl| + |nl| + **Example:** A 50 bp motif repeated 3 times (150 bp total) reported as + motif_length=50, repeat_number=3, repeat_length=150. + |nl| + |nl| + **Bioconda package:** https://bioconda.github.io/recipes/pytrf/README.html |nl| + **GitHub repository:** https://github.com/lmdu/pytrf |nl| + **License:** MIT License |nl| + **Disclaimer:** This is a minimal implementation supporting basic functionality. + pytrf is not a Python binding to TRF - it's an independent tool. diff --git a/bio/pytrf/findgtr/test/Snakefile b/bio/pytrf/findgtr/test/Snakefile new file mode 100644 index 0000000000..16bdbbbf80 --- /dev/null +++ b/bio/pytrf/findgtr/test/Snakefile @@ -0,0 +1,21 @@ +# SAMPLE RULE: Find generic tandem repeats with custom thresholds +# The PyTRF findgtr wrapper finds exact tandem repeats of any motif size. +# +# Output: +# - If output file is specified, results are written to that file +# - If output is omitted, PyTRF writes to stdout (redirected to log file) +# +# This example uses sensitive thresholds (min_motif=3, min_repeat=1) to detect +# shorter generic tandem repeats. +rule pytrf_findgtr: + input: + "demo_data/{sample}.fasta", + output: + "results/{sample}.tsv", + log: + "logs/{sample}.log", + params: + min_motif=3, + min_repeat=1, + wrapper: + "master/bio/pytrf/findgtr" diff --git a/bio/pytrf/findgtr/test/demo_data/small_test.fasta b/bio/pytrf/findgtr/test/demo_data/small_test.fasta new file mode 100644 index 0000000000..9e7b2e044a --- /dev/null +++ b/bio/pytrf/findgtr/test/demo_data/small_test.fasta @@ -0,0 +1,2 @@ +>seq1 +TCATCGGTCATCGGTCATCGGTCATCGGTCATCGG diff --git a/bio/pytrf/findgtr/test/expected/findgtr_basic.tsv b/bio/pytrf/findgtr/test/expected/findgtr_basic.tsv new file mode 100644 index 0000000000..0c25485350 --- /dev/null +++ b/bio/pytrf/findgtr/test/expected/findgtr_basic.tsv @@ -0,0 +1 @@ +seq1 1 35 TCATCGG 7 5 35 diff --git a/bio/pytrf/findgtr/wrapper.py b/bio/pytrf/findgtr/wrapper.py new file mode 100644 index 0000000000..671c87975b --- /dev/null +++ b/bio/pytrf/findgtr/wrapper.py @@ -0,0 +1,56 @@ +""" +Snakemake Wrapper for PyTRF findgtr +------------------------------------------------------ +Find exact generic tandem repeats (any motif size). +""" + +from pathlib import Path +from snakemake.shell import shell + +# Logging +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +# Get input file +try: + input_file = Path(snakemake.input[0]).resolve() +except (IndexError, TypeError) as e: + raise ValueError(f"Input specification error: {e}") from e + +# Get output file if specified +OUTPUT_FILE = None +if snakemake.output: + OUTPUT_FILE = Path(snakemake.output[0]).resolve() + +# Build parameters +params = [] + +try: + if hasattr(snakemake.params, "out_format"): + params.append(f"-f {snakemake.params.out_format}") + + if hasattr(snakemake.params, "min_motif"): + params.append(f"-m {snakemake.params.min_motif}") + + if hasattr(snakemake.params, "max_motif"): + params.append(f"-M {snakemake.params.max_motif}") + + if hasattr(snakemake.params, "min_repeat"): + params.append(f"-r {snakemake.params.min_repeat}") + + if hasattr(snakemake.params, "min_length"): + params.append(f"-l {snakemake.params.min_length}") +except (AttributeError, ValueError) as e: + raise RuntimeError(f"Parameter processing failed: {e}") from e + +# Build command +CMD = f"pytrf findgtr {input_file}" +if params: + CMD += " " + " ".join(params) +if OUTPUT_FILE: + CMD += f" -o {OUTPUT_FILE}" + +# Execute +try: + shell(f"{CMD} {log}") +except Exception as e: + raise RuntimeError(f"pytrf findgtr execution failed: {e}") from e diff --git a/bio/pytrf/findstr/environment.linux-64.pin.txt b/bio/pytrf/findstr/environment.linux-64.pin.txt new file mode 100644 index 0000000000..43a48ac990 --- /dev/null +++ b/bio/pytrf/findstr/environment.linux-64.pin.txt @@ -0,0 +1,37 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +# created-by: conda 25.11.0 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda#f0991f0f84902f6b6009b4d2350a83aa +https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_14.conda#91349c276f84f590487e4c7f6e90e077 +https://conda.anaconda.org/conda-forge/noarch/python_abi-3.12-8_cp312.conda#c3efd25ac4d74b1584d2f7a57195ddf1 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda#4222072737ccff51314b5ece9c7d6f5a +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_14.conda#550dceb769d23bcf0e2f97fd4062d720 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda#8b09ae86839581147ef2e5c5e229d164 +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda#35f29eec58405aaf55e01cb470d8c26a +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_14.conda#6c13aaae36d7514f28bd5544da1a7bb8 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda#1a580f7796c7bf6393fddb8bbbde58dc +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hb9d3cd8_1.conda#d864d34357c3b65a4b731f78c0801dc4 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_14.conda#8e96fe9b17d5871b5cf9d312cab832f6 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda#80c07c68d2f6870250959dcc95b209d1 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_14.conda#9531f671a13eec0597941fa19e489b96 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda#283b96675859b20a825f8fa30f311446 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda#86bc20552bf46075e3d92b67f089172d +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda#6432cb5d4ac0046c3ac0a8a0f95842f9 +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda#a6abd2796fc332536735f68ba23f7901 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.0-hee844dc_0.conda#729a572a3ebb8c43933b30edcc628ceb +https://conda.anaconda.org/conda-forge/linux-64/python-3.12.12-hd63d673_1_cpython.conda#5c00c8cea14ee8d02941cab9121dce41 +https://conda.anaconda.org/bioconda/linux-64/pyfastx-2.2.0-py312h4711d71_1.tar.bz2#0c029565f5abbf1c3349a4abc0b4c63c +https://conda.anaconda.org/bioconda/linux-64/pytrf-1.4.2-py312h0fa9677_0.tar.bz2#11c47fcb88ad7fe0ab94dcf11b8bebb9 +https://conda.anaconda.org/conda-forge/noarch/setuptools-80.9.0-pyhff2d567_0.conda#4de79c071274a53dcaf2a8c749d1499e +https://conda.anaconda.org/conda-forge/noarch/wheel-0.45.1-pyhd8ed1ab_1.conda#75cb7132eb58d97896e173ef12ac9986 +https://conda.anaconda.org/conda-forge/noarch/pip-25.3-pyh8b19718_0.conda#c55515ca43c6444d2572e0f0d93cb6b9 diff --git a/bio/pytrf/findstr/environment.yaml b/bio/pytrf/findstr/environment.yaml new file mode 100644 index 0000000000..9e6d45b5ab --- /dev/null +++ b/bio/pytrf/findstr/environment.yaml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - nodefaults + +dependencies: + - pytrf =1.4 + - pyfastx =2.2 diff --git a/bio/pytrf/findstr/meta.yaml b/bio/pytrf/findstr/meta.yaml new file mode 100644 index 0000000000..cdd48dc003 --- /dev/null +++ b/bio/pytrf/findstr/meta.yaml @@ -0,0 +1,33 @@ +name: pytrf findstr +description: > + Find exact short tandem repeats (SSRs/microsatellites) from DNA sequences. + Identifies perfect repeats with motif lengths of 1-6 bp (mono-, di-, tri-, + tetra-, penta-, and hexa-nucleotide repeats). +url: https://pytrf.readthedocs.io/en/latest/usage.html#commandline-interface +authors: + - Muhammad Rohan Ali Asmat +input: + - FASTA or FASTQ file (supports gzip compression) +output: + - Output file (default: stdout, will be redirected to the log file). +params: + out_format: > + Output format. Options: 'tsv' (default), 'csv', 'bed', or 'gff'. + repeats: > + List of 6 integers specifying minimum repeats for each SSR type: + [mono, di, tri, tetra, penta, hexa]. Default: [12, 7, 5, 4, 4, 4]. + Example: repeats=[10, 6, 4, 3, 3, 3] requires at least 10 mononucleotide + repeats, 6 dinucleotide repeats, etc. +notes: > + **Output columns:** chrom, start (0-based), end (0-based exclusive), + motif, motif_length (1-6 bp), repeat_number, repeat_length. |nl| + |nl| + **Example:** Sequence "ATATATAT" reported as motif="AT", motif_length=2, + repeat_number=4, repeat_length=8. + |nl| + |nl| + **Bioconda package:** https://bioconda.github.io/recipes/pytrf/README.html |nl| + **GitHub repository:** https://github.com/lmdu/pytrf |nl| + **License:** MIT License |nl| + **Disclaimer:** This is a minimal implementation supporting basic functionality. + pytrf is not a Python binding to TRF - it's an independent tool. diff --git a/bio/pytrf/findstr/test/Snakefile b/bio/pytrf/findstr/test/Snakefile new file mode 100644 index 0000000000..b2378e008f --- /dev/null +++ b/bio/pytrf/findstr/test/Snakefile @@ -0,0 +1,23 @@ +include: "test.smk" #[hide] + + +# SAMPLE RULE: Basic usage with custom parameters +# The pytrf findstr wrapper finds exact short tandem repeats (SSRs/microsatellites). +# +# Output: +# - If output file is specified, results are written to that file +# - If output is omitted, pytrf writes to stdout (which gets redirected to log file) +# +# This example uses custom repeat thresholds and CSV format output. +rule pytrf_findstr: + input: + "demo_data/{sample}.fasta", + output: + "results/{sample}.csv", + params: + repeats=[5, 1, 3, 3, 3, 3], + out_format="csv", + log: + "logs/{sample}.log", + wrapper: + "master/bio/pytrf/findstr" diff --git a/bio/pytrf/findstr/test/demo_data/small_test.fasta b/bio/pytrf/findstr/test/demo_data/small_test.fasta new file mode 100644 index 0000000000..9e7b2e044a --- /dev/null +++ b/bio/pytrf/findstr/test/demo_data/small_test.fasta @@ -0,0 +1,2 @@ +>seq1 +TCATCGGTCATCGGTCATCGGTCATCGGTCATCGG diff --git a/bio/pytrf/findstr/test/expected/findstr_basic.csv b/bio/pytrf/findstr/test/expected/findstr_basic.csv new file mode 100644 index 0000000000..d3c24353a9 --- /dev/null +++ b/bio/pytrf/findstr/test/expected/findstr_basic.csv @@ -0,0 +1,18 @@ +seq1,1,2,TC,2,1,2 +seq1,3,4,AT,2,1,2 +seq1,5,6,CG,2,1,2 +seq1,7,8,GT,2,1,2 +seq1,9,10,CA,2,1,2 +seq1,11,12,TC,2,1,2 +seq1,13,14,GG,2,1,2 +seq1,15,16,TC,2,1,2 +seq1,17,18,AT,2,1,2 +seq1,19,20,CG,2,1,2 +seq1,21,22,GT,2,1,2 +seq1,23,24,CA,2,1,2 +seq1,25,26,TC,2,1,2 +seq1,27,28,GG,2,1,2 +seq1,29,30,TC,2,1,2 +seq1,31,32,AT,2,1,2 +seq1,33,34,CG,2,1,2 +seq1,35,36,G,2,1,2 diff --git a/bio/pytrf/findstr/test/test.smk b/bio/pytrf/findstr/test/test.smk new file mode 100644 index 0000000000..e4c806d5ef --- /dev/null +++ b/bio/pytrf/findstr/test/test.smk @@ -0,0 +1,16 @@ +# SAMPLE RULE 1: Minimal usage with all defaults +# When params are omitted, pytrf findstr uses default values: +# - repeats: [12, 7, 5, 4, 4, 4] (standard thresholds for SSR detection) +# - out_format: 'tsv' (tab-separated values) +# +# When output is omitted, results go to stdout, which is redirected +# to the log file. This is useful for quick inspection or piping to other tools. +rule pytrf_findstr_defaults: + input: + "demo_data/small_test.fasta", + # No output specified - results written to stdout (redirected to log) + # No params specified - uses pytrf default values + log: + "logs/small_test_defaults.log", + wrapper: + "master/bio/pytrf/findstr" diff --git a/bio/pytrf/findstr/wrapper.py b/bio/pytrf/findstr/wrapper.py new file mode 100644 index 0000000000..dd456db9ba --- /dev/null +++ b/bio/pytrf/findstr/wrapper.py @@ -0,0 +1,50 @@ +""" +Snakemake Wrapper for PyTRF findstr +------------------------------------------------------ +Find exact short tandem repeats (SSRs/microsatellites). +""" + +from pathlib import Path +from snakemake.shell import shell + +# Logging +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + +# Get input file +try: + input_file = Path(snakemake.input[0]).resolve() +except (IndexError, TypeError) as e: + raise ValueError(f"Input specification error: {e}") from e + +# Get output file if specified +OUTPUT_FILE = None +if snakemake.output: + OUTPUT_FILE = Path(snakemake.output[0]).resolve() + +# Build parameters +params = [] + +try: + if hasattr(snakemake.params, "out_format"): + params.append(f"-f {snakemake.params.out_format}") + + if hasattr(snakemake.params, "repeats"): + repeats_list = snakemake.params.repeats + if isinstance(repeats_list, (list, tuple)): + REPEATS_STR = " ".join(str(x) for x in repeats_list) + params.append(f"-r {REPEATS_STR}") +except (AttributeError, ValueError) as e: + raise RuntimeError(f"Parameter processing failed: {e}") from e + +# Build command +CMD = f"pytrf findstr {input_file}" +if params: + CMD += " " + " ".join(params) +if OUTPUT_FILE: + CMD += f" -o {OUTPUT_FILE}" + +# Execute +try: + shell(f"{CMD} {log}") +except Exception as e: + raise RuntimeError(f"pytrf findstr execution failed: {e}") from e diff --git a/test_wrappers.py b/test_wrappers.py index 6cfc1eec8c..8edc6f1295 100644 --- a/test_wrappers.py +++ b/test_wrappers.py @@ -107,7 +107,6 @@ def _run(wrapper, cmd, check_log=None, compare_results_with_expected=None): f"file://{tmp_test_subdir}/", ] - if CONTAINERIZED: # run snakemake in container cmd = [ @@ -129,9 +128,7 @@ def _run(wrapper, cmd, check_log=None, compare_results_with_expected=None): with open(generated) as genf, open(expected) as expf: gen_lines = genf.readlines() exp_lines = expf.readlines() - diff = "".join( - difflib.Differ().compare(gen_lines, exp_lines) - ) + diff = "".join(difflib.Differ().compare(gen_lines, exp_lines)) raise ValueError( f"Unexpected results: {generated} != {expected}." f"Diff:\n{diff}" @@ -271,9 +268,19 @@ def test_agat(run): def test_alignoth(run): run( "bio/alignoth", - ["snakemake", "--cores", "1", "--use-conda", "-F", "out/json_plot.vl.json", "out/plot.html", "output-dir/"], + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "out/json_plot.vl.json", + "out/plot.html", + "output-dir/", + ], ) + def test_alignoth_report_meta(run): run( "meta/bio/alignoth_report", @@ -7152,9 +7159,104 @@ def test_orthanq(run): "--use-conda", "out/candidates", "out/candidates.vcf", -# "out/preprocess_hla.bcf", + # "out/preprocess_hla.bcf", "out/preprocess_virus.bcf", "out/calls_hla", "out/calls_virus", ], ) + + +def test_pytrf_findstr(run): + run( + "bio/pytrf/findstr", + [ + "snakemake", + "--cores", + "1", + "results/small_test.csv", + "--use-conda", + "-F", + "--allowed-rules", + "pytrf_findstr", + ], + compare_results_with_expected={ + "results/small_test.csv": "expected/findstr_basic.csv", + }, + ) + + +def test_pytrf_findstr_defaults(run): + run( + "bio/pytrf/findstr", + [ + "snakemake", + "--cores", + "1", + "logs/small_test_defaults.log", + "--use-conda", + "-F", + "--allowed-rules", + "pytrf_findstr_defaults", + ], + ) + + +def test_pytrf_findgtr(run): + run( + "bio/pytrf/findgtr", + [ + "snakemake", + "--cores", + "1", + "results/small_test.tsv", + "--use-conda", + "-F", + "--allowed-rules", + "pytrf_findgtr", + ], + compare_results_with_expected={ + "results/small_test.tsv": "expected/findgtr_basic.tsv", + }, + ) + + +def test_pytrf_findatr(run): + run( + "bio/pytrf/findatr", + [ + "snakemake", + "--cores", + "1", + "results/small_test.tsv", + "--use-conda", + "-F", + "--allowed-rules", + "pytrf_findatr", + ], + compare_results_with_expected={ + "results/small_test.tsv": "expected/findatr_basic.tsv", + }, + ) + + +@pytest.mark.skip( + reason="PyTRF extract command has a delimiter bug (see https://github.com/lmdu/pytrf/issues/6)" +) +def test_pytrf_extract(run): + run( + "bio/pytrf/extract", + [ + "snakemake", + "--cores", + "1", + "results/small_test_extract.tsv", + "--use-conda", + "-F", + "--allowed-rules", + "pytrf_extract", + ], + compare_results_with_expected={ + "results/small_test_extract.tsv": "expected/extract_basic.tsv", + }, + )