Skip to content

Commit 16082a6

Browse files
[MAFFT] add --anysymbol parameter (#1585)
* add --anysymbol parameter * use bboolean value * use boolean value * use boolean value * fix linting error? * fix linting error * bump --------- Co-authored-by: Björn Grüning <bjoern@gruenings.eu>
1 parent 45b6fe9 commit 16082a6

File tree

5 files changed

+177
-10
lines changed

5 files changed

+177
-10
lines changed

tools/mafft/macros.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<macros>
22
<token name="@TOOL_VERSION@">7.526</token>
3-
<token name="@VERSION_SUFFIX@">0</token>
3+
<token name="@VERSION_SUFFIX@">1</token>
44
<token name="@PROFILE@">22.01</token>
55
<!-- currently, the fasta3 executable is named according to its major version
66
=> needs updating together with the package requirement! -->

tools/mafft/mafft-add.xml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717
#elif $sequences.sequenceType == 'group'
1818
--addprofile input_dir/sequence
1919
#end if
20-
$keeplength
21-
$mapout
20+
$keeplength
21+
$mapout
2222
$reorder
2323
input_dir/alignment > '$outputAlignment'
24-
24+
2525
#if $mapout
2626
&& mv input_dir/sequence.map '$outputmap'
2727
#end if
2828
]]>
2929
</command>
3030
<configfiles>
3131
<configfile filename="mk_symlinks.sh"><![CDATA[
32-
mkdir input_dir &&
32+
mkdir input_dir &&
3333
ln -s '$inputSequences' input_dir/sequence &&
3434
ln -s '$inputAlignment' input_dir/alignment
3535
]]></configfile>
@@ -66,10 +66,12 @@ ln -s '$inputAlignment' input_dir/alignment
6666
<test expect_num_outputs="1">
6767
<param name="inputSequences" value="add_seq.fa"/>
6868
<param name="inputAlignment" value="mafft_default.aln"/>
69-
<param name="sequenceType" value="singleseq"/>
70-
<param name="preservegap" value="--add"/>
71-
<param name="keeplength" value="--keeplength"/>
72-
<param name="mapout" value=""/>
69+
<conditional name="sequences">
70+
<param name="sequenceType" value="singleseq"/>
71+
<param name="preservegap" value="--add"/>
72+
</conditional>
73+
<param name="keeplength" value="True"/>
74+
<param name="mapout" value="False"/>
7375
<output name="outputAlignment" ftype="fasta" file="mafft_add_result.aln"/>
7476
</test>
7577
</tests>

tools/mafft/mafft.xml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
108108
## handle scoring matrix
109109
$datatype_selection.datatype
110+
$anysymbol
110111
#if $datatype_selection.datatype != ""
111112
#if $datatype_selection.scoring_matrix.type == "custom"
112113
--aamatrix '$datatype_selection.scoring_matrix.aamatrix'
@@ -143,7 +144,7 @@
143144
#if $input.mapping == "implicit"
144145
#for $batch in $input.batches:
145146
cat $batch.inputs >> input.fa
146-
#end for
147+
#end for
147148
#elif $input.mapping == "merge"
148149
#for $batch in $input.batches:
149150
#for $dataset in $batch.inputs:
@@ -217,6 +218,7 @@ cat $dataset >> input.fa
217218
<expand macro="misc_scoring_scheme" />
218219
</when>
219220
</conditional>
221+
<param argument="--anysymbol" type="boolean" truevalue="--anysymbol" falsevalue="" checked="False" label="Support unusual characters?" help="e.g., U as selenocysteine in protein sequence; i as inosine in nucleotide sequence" />
220222
<conditional name="flavour">
221223
<param name="type" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section. With 'Auto', the tool automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size from few to many respectively. Default setting: FFT-NS-2.">
222224
<option value="mafft --auto">Auto</option>
@@ -343,6 +345,17 @@ cat $dataset >> input.fa
343345
</conditional>
344346
<output name="outputAlignment" ftype="fasta" file="mafft_default.aln"/>
345347
</test>
348+
<!-- test with anysymbol parameter -->
349+
<test expect_num_outputs="1">
350+
<conditional name="input">
351+
<param name="mapping" value="implicit"/>
352+
<repeat name="batches">
353+
<param name="inputs" value="amino_withU.fa"/>
354+
</repeat>
355+
</conditional>
356+
<param name="anysymbol" value="True"/>
357+
<output name="outputAlignment" ftype="fasta" file="mafft_amino_withU.aln"/>
358+
</test>
346359
<!-- test autodetection of suitable algorithm from input; expected to choose L-INS-i -->
347360
<test expect_num_outputs="1">
348361
<conditional name="input">
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
>CALJE_DJ01Gene08198
2+
MDRVKNQNEDLLSGVVPLMKLLCLTVIGLVLAHPKIQIVPRATFKLLSKLVFALFLPCLI
3+
FVHLGESVTLNSVLLWWFVPVNVLISTAIGCALGYIVAIICRPPPQFYRFAVIMTGFGNT
4+
GNLPIAIVGSVCHSSENPFGPDCHRTGVAYVSFAQWVAVILVYTLVYHMMEPPMQYYEIV
5+
SEENEIQEETVNNISRPLLHEAEWPGMVDKETVHSKTPFIARVFMSISGSSQNTFPDLDL
6+
TEDGGAGPSSPKSLRCLAEPKVVRRIRIVAEETPIRHILQPPTIASLLALIVGMVPLFRD
7+
FVFGTDAPLSFFTDSLEILAGAVVPSVMLILGGMLAEGPNDSALGTRTMIGITVARLLVL
8+
PLVGIGVVALADKLNFLVEGDQMYRFVLLLQYTTPSAILLGAIASLRVMLSRKLRHSCSG
9+
STSVQUSSNSICNLLILLHSLADEDVGASQSGELIKGTDATRHQLFPGKDAGFAREQRHG
10+
FHAAEVFIAPCSQKEEKMLFEARRLVRWGFHEPHGPRHAAFQPWPDLSRARKHCLAPEAG
11+
VDSIAKSAHRSHFSAFSPTP
12+
>CALJE_DJ01Gene07324
13+
MHQKRDIDPADLWLPYSFRSSNMISLATVYHVLEATVPLYAAMMLAYLSIKWWKLFTPDQ
14+
CTGINKFVAKFSVPLLSFHVISTNNPYQMNLKLLFSDSMQKILALLFFAVISKACFRGSL
15+
DWLITGFSLSTLPNTLIVGIPLLKDLYGDEAAKLLGQIIVLQSLVWYTLLLFLFEFRVAK
16+
AVAANRNDNTGELEASGGMQPKPEEDEAKTLSMRSIKSLLMKSLLILCMVGKKLMINPNT
17+
YASLAGFVWALISFRWGIELPLMIRSCISILSDGGLGMAMFSLGLFTASQSSIIACGTRM
18+
MVLSMGLRFIVGPALIAIPSYAIGMRATLLKVAIVQAALPQGIVPWVFAKEYGVQPDILS
19+
TGVIIGMIIAVPIALAYYSILDHN
20+
>CALJE_DJ01Gene04286
21+
MITWHDLYTVLTAVVPLYVAMILAYGSVRWWRIFSPDQCSGINRFVAIFAVPLLSFHFIS
22+
TNNPYTMNFRFIAADTLQKLIVLSALAAWARLSSRGSLDWAITVFSLATLPNTLVMGIPL
23+
LIAMYGSFSGSLMVQIVVLQCIIWYTLLLFLFEYRAARLLIADQFPDTAASIVSFRVDSD
24+
VVSLDGGRDLLQADAEVGGDGKIHVTVRRSNASRRSVSMAMTPRPSNLTGAEIYSLSSSR
25+
NPTPRGSNFNHSDFFAMVGGAPPPLRPSNFGPADLYSLHSSRGPTPRPSNFDDGAAVGSP
26+
RFVHYPAPNPEISASAPVTKKPTPTNPAPPSTAAATQQQNRAAHHDAKELHMFVWSSSAS
27+
PVSEVGGLHVFGGADFAAPDLAGRPDHGGAKEIRMLIPTDLPQNGGTKGVAIPEGEDYGG
28+
GGEDFSFGGGKGLEDLVGRDKENGPEGLSKLGSSSTAELHPKGGGGPSDGAKQAHPQMPP
29+
ASVMTRLILIMVWRKLIRNPNTYSSLIGLIWSLVAFRWHVTMPKIVEKSISILSDAGLGM
30+
AMFSLGLFMALQPKIIACGNSVATFAMAVRFLTGPAVMAAASIAVGLRGVLLHVAIVQAA
31+
LPQGIVPFVFAKEYNVHPAILSTAVIFGMLIALPITLVYYIILGL
32+
>CALJE_DJ01Gene08527
33+
MISLVIVYHVLEATVPLYAAMILAYLSVKWWKLFTPDQCTGINNFVAKFSVPLLSFHVIS
34+
PNNPYPINLKLLFSDSVQKILALLVFAVLSRPCFRGSLDWLITGFSTSTLPNTLIIGIPL
35+
LKGLYGDEAVKLLGQIIVLQSLVWNTLLLFLFEVRAAKAVAANPTDNTGELESSGGIQPK
36+
PEEDEVKCLLAGFIWALILFRWEVELPLIISKCISILSDGGLGMAMFSLGLFMASQSSII
37+
ASGTWMMVLSMGLRFIIGPALIAIPSYAIGLRATLLKVAIVQAALPQGIVPFVFAKEYSV
38+
HPDILSTGFKLDQGFLLADERAIEKIDAVELRENPDNEMGRSPESRRKGAVIVPIISDAI
39+
GADAGGGLFQAVSSAFRRPHC
40+
>CALJE_DJ01Gene19453
41+
MITGKDIYDVLAAIVPLYVAMILAYGSVRWWKIFTPDQCSGINRFVAVFAVPLLSFHFIS
42+
TNNPYAMNYHFIGADSLQKVVILFALFLWHNLSKRGNLDWTITLFSLSTLPNTLVMGIPL
43+
LRAMYGDFSGSLMVQIVVLQSVIWYTLMLFLFEYRGAKALISEQFPPDIAGSITSFRVDS
44+
DVVSLNGREPLQADAEIRQDGKLHVVVRRSTSSAARSMSSSYNKSHGLNSITSMTPRASN
45+
LTGVEIYSLQSSREPTPRASSFNQTDFYAMFSSKVTSPRPDPSKPALHQDIPAPKGLHAP
46+
SGNASPIRKAGTNGDLEIEDGCKSQEGKFPASPFVAQKKGTDTGGVAGLVENNHRMPPAS
47+
VMTRLILIMVWRKLIRNPNTYSSLLGLVWSLVSFRWNIEMPTIIKGSISILSDAGLGMAM
48+
FSLGLFMALQPKIIACGKSVAAFSMAVRFLTGPAVIAATSIAIGVRGVLLHVAIVQAALP
49+
QGIVPFVFAKEYNCHPDILSTAVIFGMLIALPITILYYVLLGV
50+
>CALJE_DJ01Gene20884
51+
MIGWEDVCKVVASMVPLYVALGLGYGSVRWWHVFTPDQCDAVNRLVFSFTLPFYAFEFAI
52+
HADPFSMNCRLIAADIIGKLIVILVLSLWAKCREGSHRCSWFITAFSLSTLNNSLVLGLP
53+
LLTSLYGPSVRGLVVQTTVMQAIIWLTLLLFMLEIKKAKGILLEESPMKEERSWPSFWCL
54+
MRMVWLKLALNPNFYASILGITWAFIANRWQLEMPSIMEGSVLVISRAGTGMAMFCIGLF
55+
MALQKKMLACGARLTALALVLRFVVGPATMAISAFAVGLHGDVLRLAIIQAALPQSVTSF
56+
IFAREYGLHADVLSTAVILGLLVCLPVLVAYNVLLGFVT
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
>CALJE_DJ01Gene08198
2+
MDRVKNQNE---------------------DLLSGVVPLMKLLCLTVIGLVLAHPKIQ--
3+
-IVPRATFKLLSKLVFALFLPCLIF--VHLGESVTLNSVLLWWFVPVNVLISTAIG----
4+
-CALGYIVAIICRPPPQFYRFAVIMTGFG-----NTGNLPIAIVGSVCHSSENPFGPDCH
5+
RTGVAYVSFAQWVAVILVYTLVYHMME------------PPMQYYEIVSEENEIQEETVN
6+
NISRPLLHEAEWPG------MVDKETVH--------------------------------
7+
-----------------SKTPFIARV-------FMSISG--------SSQNTFPDL----
8+
-------------------------------------------------DLTED------
9+
----GGAGPSSP-----------------------------KSLRCLAE-----------
10+
------------------------------------------------------------
11+
---------PKVVRRIRIVAEETPIRHILQPPTIASLLALIVGMVPLFRDFVFGTDAPLS
12+
FFTDSLEILAGAVVPSVMLILGGMLAEGPNDSALGTRTMIGITVARLLVLP---------
13+
------LVGIGVVALADKLNFLVEGDQMYRFVLLLQYTTPSAILLGAIASLRVMLSRKLR
14+
HSCSGSTSVQUSSNSICNLLILLHSLADEDVGASQSGELIK------GTDATRHQLFPGK
15+
DAGFAREQRHGFHAAEVFIAPC-SQKEEKMLFEARRLVRWGFHEPHGPRHAAFQPWPDLS
16+
RARKHCLAPEAGVD---SIAKSAHRSHFSAFSPTP
17+
>CALJE_DJ01Gene07324
18+
MHQKRDIDPADLWLPYSFRSSNMISLATVYHVLEATVPLY-------AAMMLAYLSIKWW
19+
KLFTPDQCTGINKFVAKFSVPLLSFHVISTNNPYQMNLKLLFSDSMQKILALLFFAVISK
20+
ACFRGS--------------LDWLITGFSLSTLPNTLIVGIPLLKDL-------YGDEAA
21+
KLLGQIIV----LQSLVWYTLLLFLFEFRVAKAVAA------------------------
22+
------------------------------------------------------------
23+
-----------------NRND---------------NTG-----ELEASGGMQPKP----
24+
---------------------------------------------------EED--EAKT
25+
LSM--------------------------------------RSIKSLLMKSL--------
26+
------------------------------------------------------------
27+
-----------------LILCMVGKKLMINPNTYASLAGFVWALIS----FRWGIELP-L
28+
MIRSCISILSDGGLGMAMFSLGLFTASQSSIIACGTRMMVLSMGLRFIVGPALIAIPSYA
29+
IGMRATLLKVAIVQAA-----LPQG--IVPWVFAKEYGVQPDIL----------------
30+
-----STGVII-------------------------GMIIA-------------------
31+
-------------------VPI--------------------------------------
32+
-----------------ALAYYSILDHN-------
33+
>CALJE_DJ01Gene04286
34+
----------------------MITWHDLYTVLTAVVPLY-------VAMILAYGSVRWW
35+
RIFSPDQCSGINRFVAIFAVPLLSFHFISTNNPYTMNFRFIAADTLQKLIVLSALAAWAR
36+
LSSRGS--------------LDWAITVFSLATLPNTLVMGIPLLIAM-------YGSFSG
37+
SLMVQIVV----LQCIIWYTLLLFLFEYRAARLLIADQF-PDTAASIVSFRVDSDVVSLD
38+
GGRDLLQADAEVGGDGKIHVTVRRSNAS-RRSVS------------MAMTPRPSNLTGAE
39+
IYSLSSSRNPTPRGSNFNHSDFFAMVGGAPPPLRPSNFGPADLYSLHSSRGPTPRPSNFD
40+
DGAAVGSPRFVHYPAPNPEISASAPVTKKPTPTNPAPPSTAAATQQQNRAAHHD---AKE
41+
LHMFVWSSSASPVSEVGGLHVFGGADFAAPDLAGRPDHGGAKEIRMLIPTDLPQNGGTKG
42+
VAIPEGEDYGGGGEDFSFGGGKGLEDLVGRDKENGPEGLSKLGSSSTAELHPKGGGGPSD
43+
GAKQAHPQMPPASVMTRLILIMVWRKLIRNPNTYSSLIGLIWSLVA----FRWHVTMP-K
44+
IVEKSISILSDAGLGMAMFSLGLFMALQPKIIACGNSVATFAMAVRFLTGPAVMAAASIA
45+
VGLRGVLLHVAIVQAA-----LPQG--IVPFVFAKEYNVHPAIL----------------
46+
-----STAVIF-------------------------GMLIA-------------------
47+
-------------------LPI--------------------------------------
48+
-----------------TLVYYIILGL--------
49+
>CALJE_DJ01Gene08527
50+
----------------------MISLVIVYHVLEATVPLY-------AAMILAYLSVKWW
51+
KLFTPDQCTGINNFVAKFSVPLLSFHVISPNNPYPINLKLLFSDSVQKILALLVFAVLSR
52+
PCFRGS--------------LDWLITGFSTSTLPNTLIIGIPLLKGL-------YGDEAV
53+
KLLGQIIV----LQSLVWNTLLLFLFEVRAAKAVAA------------------------
54+
------------------------------------------------------------
55+
-----------------NPTD---------------NTG-----ELESSGGIQPKP----
56+
---------------------------------------------------EED--EVKC
57+
L-----------------------------------------------------------
58+
------------------------------------------------------------
59+
------------------------------------LAGFIWALIL----FRWEVELP-L
60+
IISKCISILSDGGLGMAMFSLGLFMASQSSIIASGTWMMVLSMGLRFIIGPALIAIPSYA
61+
IGLRATLLKVAIVQAA-----LPQG--IVPFVFAKEYSVHPDIL----------------
62+
-----STGFKLD-----------------------QGFLLADERAIEKIDAVELRENPDN
63+
EMGRSPESRR----KGAVIVPIISD-----------------------------------
64+
-----AIGADAGGGLFQAVSSAFRRPHC-------
65+
>CALJE_DJ01Gene19453
66+
----------------------MITGKDIYDVLAAIVPLY-------VAMILAYGSVRWW
67+
KIFTPDQCSGINRFVAVFAVPLLSFHFISTNNPYAMNYHFIGADSLQKVVILFALFLWHN
68+
LSKRGN--------------LDWTITLFSLSTLPNTLVMGIPLLRAM-------YGDFSG
69+
SLMVQIVV----LQSVIWYTLMLFLFEYRGAKALISEQFPPDIAGSITSFRVDSDVVSLN
70+
-GREPLQADAEIRQDGKLHVVVRRSTSSAARSMSSSYNKSHGLNSITSMTPRASNLTGVE
71+
IYSLQSSREPTPRASSFNQTDFYAMF---------------------SSKVTSPRP----
72+
---------------------------------DPSKP-----------ALHQDIPAPKG
73+
LH--APSGNASPIRKAG-----TNGDLEIED--------GCKSQEGKFP-------ASPF
74+
VAQKKGTDTGGVA-----------------------------------------------
75+
GLVENNHRMPPASVMTRLILIMVWRKLIRNPNTYSSLLGLVWSLVS----FRWNIEMP-T
76+
IIKGSISILSDAGLGMAMFSLGLFMALQPKIIACGKSVAAFSMAVRFLTGPAVIAATSIA
77+
IGVRGVLLHVAIVQAA-----LPQG--IVPFVFAKEYNCHPDIL----------------
78+
-----STAVIF-------------------------GMLIA-------------------
79+
-------------------LPI--------------------------------------
80+
-----------------TILYYVLLGV--------
81+
>CALJE_DJ01Gene20884
82+
----------------------MIGWEDVCKVVASMVPLY-------VALGLGYGSVRWW
83+
HVFTPDQCDAVNRLVFSFTLPFYAFEFAIHADPFSMNCRLIAADIIGKLIVILVLSLWAK
84+
-CREGSH------------RCSWFITAFSLSTLNNSLVLGLPLLTSL-------YGPSVR
85+
GLVVQTTV----MQAIIWLTLLLFMLEIKKAKGILLEE----------------------
86+
------------------------------------------------------------
87+
----------------------------------------------------SPMK----
88+
---------------------------------------------------EER------
89+
---------SWP------------------------------------------------
90+
----------------SFW-----------------------------------------
91+
-----------------CLMRMVWLKLALNPNFYASILGITWAFIA----NRWQLEMP-S
92+
IMEGSVLVISRAGTGMAMFCIGLFMALQKKMLACGARLTALALVLRFVVGPATMAISAFA
93+
VGLHGDVLRLAIIQAA-----LPQS--VTSFIFAREYGLHADVL----------------
94+
-----STAVIL-------------------------GLLVC-------------------
95+
-------------------LPV--------------------------------------
96+
-----------------LVAYNVLLGFVT------

0 commit comments

Comments
 (0)