|
20 | 20 | --max-block-distance '$max_block_distance' |
21 | 21 | --min-block-length '$min_block_length' |
22 | 22 | --min-block-density '$min_block_density' |
| 23 | + --edit-distance '$edit_distance' |
23 | 24 | |
24 | 25 | $out_win_repeats |
25 | 26 | $out_gc |
|
57 | 58 | <param argument="--max-block-distance" type="integer" min="0" value="200" label="Maximum block distance for merging"/> |
58 | 59 | <param argument="--min-block-length" type="integer" min="0" value="500" label="Minimum block length"/> |
59 | 60 | <param argument="--min-block-density" type="float" min="0" max="1" value="0.5" label="Minimum block density (0–1)"/> |
| 61 | + <param argument="--edit-distance" type="integer" min="0" max="2" value="0" label="Edit distance for pattern matching (0–2)"/> |
60 | 62 |
|
61 | 63 | <param argument="--out-win-repeats" type="boolean" truevalue="--out-win-repeats" falsevalue="" checked="false" label="Window repeat counts"/> |
62 | 64 | <param argument="--out-gc" type="boolean" truevalue="--out-gc" falsevalue="" checked="false" label="Window GC"/> |
|
161 | 163 | </assert_contents> |
162 | 164 | </output> |
163 | 165 | </test> |
| 166 | + |
| 167 | + <!-- 3) Edit distance 1: more variants detected --> |
| 168 | + <test expect_num_outputs="2"> |
| 169 | + <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> |
| 170 | + <param name="canonical" value="TTAGGG"/> |
| 171 | + <param name="patterns" value="TTAGGG,CCCTAA"/> |
| 172 | + <param name="edit_distance" value="1"/> |
| 173 | + <output name="terminal_telomeres"> |
| 174 | + <assert_contents> |
| 175 | + <!-- p-arm telomere with extended start due to edit distance --> |
| 176 | + <has_text text="chr33_mat	442	14354	13912	p	"/> |
| 177 | + <!-- q-arm telomere --> |
| 178 | + <has_text text="chr33_mat	4219967	4246337	26370	q	"/> |
| 179 | + </assert_contents> |
| 180 | + </output> |
| 181 | + <output name="telo_report"> |
| 182 | + <assert_contents> |
| 183 | + <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> |
| 184 | + </assert_contents> |
| 185 | + </output> |
| 186 | + </test> |
| 187 | + |
| 188 | + <!-- 4) Edit distance 2: maximum variants detected --> |
| 189 | + <test expect_num_outputs="2"> |
| 190 | + <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> |
| 191 | + <param name="canonical" value="TTAGGG"/> |
| 192 | + <param name="patterns" value="TTAGGG,CCCTAA"/> |
| 193 | + <param name="edit_distance" value="2"/> |
| 194 | + <output name="terminal_telomeres"> |
| 195 | + <assert_contents> |
| 196 | + <!-- p-arm telomere extends to position 1 with edit distance 2 --> |
| 197 | + <has_text text="chr33_mat	1	14354	14353	p	"/> |
| 198 | + <!-- q-arm telomere --> |
| 199 | + <has_text text="chr33_mat	4219967	4246337	26370	q	"/> |
| 200 | + </assert_contents> |
| 201 | + </output> |
| 202 | + <output name="telo_report"> |
| 203 | + <assert_contents> |
| 204 | + <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> |
| 205 | + </assert_contents> |
| 206 | + </output> |
| 207 | + </test> |
164 | 208 | </tests> |
165 | 209 |
|
166 | 210 | <help><![CDATA[ |
|
187 | 231 | Key parameters: |
188 | 232 | - -c / --canonical: Canonical repeat (default TTAGGG). This is the vertebrate telomeric motif found at chromosome ends that binds to shelterin complex to form a telomere. |
189 | 233 | - -p / --patterns: Variant patterns (comma-separated). These are additional telomeric repeat motifs to search for, besides the canonical repeat, it includes other variants that can be part of telomeres. |
190 | | - - -w / -s: window size / step (defaults 1000/500) |
| 234 | + - -w / -s: window size / step (defaults 1000/500). |
191 | 235 | - -u / --ultra-fast: terminal scan only (default true); disabled automatically when -g/-e/-r/-m/-i are used. |
| 236 | + - -x / --edit-distance: Edit Hamming distance for pattern matching (0–2). Useful for identifying degenerate telomeric repeats (default 0). |
192 | 237 | ]]></help> |
193 | 238 |
|
194 | 239 | <expand macro="citations"/> |
|
0 commit comments