Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
027c8be
transfered sylph to galaxytools due to size of test database metadata…
tcollins2011 Oct 8, 2024
d7fc0e7
Update tools/sylph/sylph.xml
tcollins2011 Oct 9, 2024
6146fb0
Update tools/sylph/.shed.yml
tcollins2011 Oct 9, 2024
f1e4551
Update tools/sylph/.shed.yml
tcollins2011 Oct 9, 2024
6b9d9cb
Update tools/sylph/macros.xml
tcollins2011 Oct 9, 2024
55d54f1
Update tools/sylph/macros.xml
tcollins2011 Oct 9, 2024
3519a0b
Update tools/sylph/.shed.yml
tcollins2011 Oct 9, 2024
8944a8d
replaced all double quotes in the command section with single quotes …
tcollins2011 Oct 9, 2024
63ca10f
Update tools/sylph/sylph.xml
tcollins2011 Oct 9, 2024
46e5d68
updated database sylmlink to better reflect the name
tcollins2011 Oct 9, 2024
477e325
changed ouput name
tcollins2011 Oct 9, 2024
25d9bac
changed database path names and sample file
tcollins2011 Oct 10, 2024
1513faf
python linting and tab spacing
tcollins2011 Oct 10, 2024
691a915
Merge branch 'master' into sylph
tcollins2011 Oct 10, 2024
9221bb8
fixing flake8 linting problems
tcollins2011 Oct 10, 2024
d86bcf0
force adding the extra test files and fixing a spacing issue in python
tcollins2011 Oct 10, 2024
3c35f6f
actually remembring to add the correct whitespace file to my commit
tcollins2011 Oct 10, 2024
5566c61
changed profile and query to be different tools and upated the macros…
tcollins2011 Oct 25, 2024
21de493
some of the comments
Dec 14, 2024
3fae5b1
lint fix
Dec 15, 2024
da2821f
Merge branch 'bgruening:master' into sylph
tcollins2011 Dec 16, 2024
2dfa011
fixed database tabs
tcollins2011 Dec 16, 2024
bdc5e02
added history database test
tcollins2011 Dec 16, 2024
35d6a24
Merge branch 'master' into sylph
bgruening Mar 27, 2025
5cc9611
add sylph database and sylph-tax metadatas DM
hugolefeuvre Apr 22, 2025
03244e5
update sylph version, add sketch fasta, sylph-tax and DM informations
hugolefeuvre Apr 22, 2025
4ea1e5a
Merge pull request #1 from hugolefeuvre/sylph
tcollins2011 Apr 30, 2025
cec595b
delete >1Mo fasta file, add a remote file to replace it, add sort int…
hugolefeuvre May 6, 2025
5ccf6cc
same changes in query
hugolefeuvre May 6, 2025
6044e48
change value into location
hugolefeuvre May 6, 2025
b9c910c
Merge pull request #2 from hugolefeuvre/sylph
tcollins2011 May 7, 2025
b307470
add long description into shed sylph tax DM
hugolefeuvre May 7, 2025
1ab2a4c
Merge pull request #3 from hugolefeuvre/sylph
tcollins2011 May 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions data_managers/data_manager_sylph_database/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: data_manager_sylph_database
owner: bgruening
description: "Download pre-built databases for sylph"
homepage_url: "https://github.com/bluenote-1577/sylph"
long_description: |
Download pre-built databases that have associated taxonomies that sylph can utilize for metagenomic profiling and incorporating taxonomy into sylph.
remote_repository_url: "https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_sylph_database"
type: unrestricted
categories:
- Data Managers
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<tool id="data_manager_sylph_database" name="Download pre-built sylph databases" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="@PROFILE@">
<description></description>
<macros>
<token name="@TOOL_VERSION@">0.8.1</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">23.0</token>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">sylph</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[

#set $file = str($db_link).split('/')[-1]

mkdir -p '$out_file.extra_files_path' &&
wget $db_link &&
mv '$file' '$out_file.extra_files_path' &&
cp '$dmjson' '$out_file'
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[
#from datetime import date
#set $file = str($db_link).split('/')[-1]

{
"data_tables":{
"sylph_databases":[
{
"value": "sylph_downloaded_#echo date.today().strftime('%d%m%Y')#_${file}",
"name": "sylph database ${file}",
"path": "${file}",
"clade": "${clades}",
"sylph_tax_identifier": "${name}",
"version": "@TOOL_VERSION@"
}
]
}
}]]></configfile>
</configfiles>
<inputs>
<conditional name="db_type">
<param argument="clades" type="select" label="Type of Databases">
<option value="prokaryote">Prokaryote databases</option>
<option value="eukaryote">Eukaryote databases</option>
<option value="virus">Virus databases</option>
</param>
<when value="prokaryote">
<conditional name="db_name">
<param argument="name" type="select" label="Database">
<option value="GTDB_r220">GTDB r220</option>
<option value="GTDB_r214">GTDB r214</option>
<option value="OceanDNA">Ocean DNA</option>
<option value="SoilSMAG">Soil MAGs</option>
</param>
<when value="GTDB_r220">
<param name="db_link" type="select" label="Sylph prokaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/gtdb-r220-c200-dbv1.syldb" selected="true">GTDB-r220 (April 2024) -c 200, more sensitive database</option>
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/gtdb-r220-c1000-dbv1.syldb">GTDB-r220 (April 2024) -c 1000 more efficient, less sensitive database</option>
</param>
</when>
<when value="GTDB_r214">
<param name="db_link" type="select" label="Sylph prokaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/v0.3-c200-gtdb-r214.syldb">GTDB-r214 (April 2023) -c 200, more sensitive database</option>
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/v0.3-c1000-gtdb-r214.syldb">GTDB-r214 (April 2023) -c 1000 more efficient, less sensitive database</option>
</param>
</when>
<when value="OceanDNA">
<param name="db_link" type="select" label="Sylph prokaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/OceanDNA-c200-v0.3.syldb">OceanDNA - ocean MAGs from Nishimura and Yoshizawa, -c 200, more sensitive database</option>
</param>
</when>
<when value="SoilSMAG">
<param name="db_link" type="select" label="Sylph prokaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/SMAG-c200-v0.3.syldb">SoilSMAG (SMAG) from Ma et al. -c 200, more sensitive database</option>
</param>
</when>
</conditional>
</when>
<when value="eukaryote">
<conditional name="db_name">
<param argument="name" type="select" label="Database">
<option value="FungiRefSeq-2024-07-25">Refseq fungi 2024-07-25</option>
<option value="TaraEukaryoticSMAG">TARA eukaryotic SMAGs</option>
</param>
<when value="FungiRefSeq-2024-07-25">
<param name="db_link" type="select" label="Sylph eukaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/fungi-refseq-2024-07-25-c200-v0.3.syldb" selected="true">FungiRefSeq-2024-07-25 - Refseq fungi representative genomes collected on 2024-07-25</option>
</param>
</when>
<when value="TaraEukaryoticSMAG">
<param name="db_link" type="select" label="Sylph eukaryote databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/tara-eukmags-c200-v0.3.syldb">TaraEukaryoticSMAG - TARA eukaryotic SMAGs from Delmont et al. -c 200, more sensitive database</option>
</param>
</when>
</conditional>
</when>
<when value="virus">
<conditional name="db_name">
<param argument="name" type="select" label="Database">
<option value="IMGVR_4.1">IMG/VR 4.1</option>
</param>
<when value="IMGVR_4.1">
<param name="db_link" type="select" label="Sylph virus databases">
<option value="http://faust.compbio.cs.cmu.edu/sylph-stuff/imgvr_c200_v0.3.0.syldb" selected="true">IMGVR_4.1 high-confidence viral OTU genomes, -c 200, more sensitive database</option>
</param>
</when>
</conditional>
</when>
</conditional>
</inputs>
<outputs>
<data name="out_file" format="data_manager_json" label="${tool.name}"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<conditional name="db_type">
<param name="clades" value="eukaryote"/>
<conditional name="db_name">
<param name="name" value="FungiRefSeq-2024-07-25"/>
<param name="db_link" value="http://faust.compbio.cs.cmu.edu/sylph-stuff/fungi-refseq-2024-07-25-c200-v0.3.syldb"/>
</conditional>
</conditional>
<output name="out_file">
<assert_contents>
<has_text text='"sylph_databases":'/>
<has_text_matching expression='"value": "sylph_downloaded_[0-9]{8}_fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
<has_text text='"name": "sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
<has_text_matching expression='"path": "fungi-refseq-2024-07-25-c200-v0.3.syldb"'/>
<has_text text='"clade": "eukaryote"'/>
<has_text text='"sylph_tax_identifier": "FungiRefSeq-2024-07-25"'/>
<has_text text='"version": "@TOOL_VERSION@"'/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<conditional name="db_type">
<param name="clades" value="prokaryote"/>
<conditional name="db_name">
<param name="name" value="OceanDNA"/>
<param name="db_link" value="http://faust.compbio.cs.cmu.edu/sylph-stuff/OceanDNA-c200-v0.3.syldb"/>
</conditional>
</conditional>
<output name="out_file">
<assert_contents>
<has_text text='"sylph_databases":'/>
<has_text_matching expression='"value": "sylph_downloaded_[0-9]{8}_OceanDNA-c200-v0.3.syldb"'/>
<has_text text='"name": "sylph database OceanDNA-c200-v0.3.syldb"'/>
<has_text_matching expression='"path": "OceanDNA-c200-v0.3.syldb"'/>
<has_text text='"clade": "prokaryote"'/>
<has_text text='"sylph_tax_identifier": "OceanDNA"'/>
<has_text text='"version": "@TOOL_VERSION@"'/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
This tool downloads pre-built sylph databases.
-c: the compression parameter. Memory/runtime scale like 1/c; higher c is faster but less sensitive at low coverage.
Default c = 200. The -c for genomes must be than sup or = the -c for reads (strict sup is allowed)
]]></help>
<citations>
<citation type="doi">10.1038/s41587-024-02412-y</citation>
</citations>
</tool>
22 changes: 22 additions & 0 deletions data_managers/data_manager_sylph_database/data_manager_conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0"?>
<data_managers>
<data_manager tool_file="data_manager/data_manager_sylph_database.xml" id="data_manager_sylph_database">
<data_table name="sylph_databases"> <!-- Defines a Data Table to be modified. -->
<output> <!-- Handle the output of the Data Manager Tool -->
<column name="value"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="name"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="path" output_ref="out_file">
<move type="file">
<source>${path}</source>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">sylph/${path}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/sylph/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
<column name="clade"/>
<column name="sylph_tax_identifier"/>
<column name="version"/>
</output>
</data_table>
</data_manager>
</data_managers>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#This is a tab separated file describing the location of StarAMR databases
#used for the StarAMR tool
#
#file has this format (white space characters are TAB characters)
#
#The columns are:
#value name path version
#
#For example
#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502 ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004) /path/to/data
sylph_downloaded_08042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmp_coqavq8/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_08042025_OceanDNA-c200-v0.3.syldb sylph database OceanDNA-c200-v0.3.syldb /tmp/tmp_coqavq8/galaxy-dev/tool-data/sylph/OceanDNA-c200-v0.3.syldb prokaryote OceanDNA 0.8.1
sylph_downloaded_15042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmp4uwrqyau/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_15042025_OceanDNA-c200-v0.3.syldb sylph database OceanDNA-c200-v0.3.syldb /tmp/tmp4uwrqyau/galaxy-dev/tool-data/sylph/OceanDNA-c200-v0.3.syldb prokaryote OceanDNA 0.8.1
sylph_downloaded_22042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmpj3dj95of/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_22042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmparv308ud/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_22042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmp0ldqph7a/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_22042025_OceanDNA-c200-v0.3.syldb sylph database OceanDNA-c200-v0.3.syldb /tmp/tmp0ldqph7a/galaxy-dev/tool-data/sylph/OceanDNA-c200-v0.3.syldb prokaryote OceanDNA 0.8.1
sylph_downloaded_22042025_fungi-refseq-2024-07-25-c200-v0.3.syldb sylph database fungi-refseq-2024-07-25-c200-v0.3.syldb /tmp/tmpmiwfhvr6/galaxy-dev/tool-data/sylph/fungi-refseq-2024-07-25-c200-v0.3.syldb eukaryote FungiRefSeq-2024-07-25 0.8.1
sylph_downloaded_22042025_OceanDNA-c200-v0.3.syldb sylph database OceanDNA-c200-v0.3.syldb /tmp/tmpmiwfhvr6/galaxy-dev/tool-data/sylph/OceanDNA-c200-v0.3.syldb prokaryote OceanDNA 0.8.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#This is a tab separated file describing the location of StarAMR databases
#used for the StarAMR tool
#
#file has this format (white space characters are TAB characters)
#
#The columns are:
#value name path version
#
#For example
#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502 ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004) /path/to/data
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<tables>
<table name="sylph_databases" comment_char="#">
<columns>value, name, path, clade, sylph_tax_identifier, version</columns>
<file path="tool-data/sylph_databases.loc"/>
</table>
</tables>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<tables>
<table name="sylph_databases" comment_char="#">
<columns>value, name, path, clade, sylph_tax_identifier, version</columns>
<file path="${__HERE__}/test-data/sylph_databases.loc.test"/>
</table>
</tables>
8 changes: 8 additions & 0 deletions data_managers/data_manager_sylph_tax_database/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: data_manager_sylph_tax_database
owner: bgruening
description: "Download sylph-tax taxonomy metadata files"
homepage_url: "https://github.com/bluenote-1577/sylph-tax"
remote_repository_url: "https://github.com/bgruening/galaxytools/tree/master/data_managers/data_manager_sylph_tax_database"
type: unrestricted
categories:
- Data Managers
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<tool id="data_manager_sylph_tax_database" name="Download sylph-tax taxonomy metadata files" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="@PROFILE@">
<description></description>
<macros>
<token name="@TOOL_VERSION@">1.2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">23.0</token>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">sylph-tax</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[

mkdir -p '$out_file.extra_files_path'/sylph_tax_metadata_@TOOL_VERSION@ &&
sylph-tax download --download-to '$out_file.extra_files_path'/sylph_tax_metadata_@TOOL_VERSION@ &&
cp '$dmjson' '$out_file'
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[
#from datetime import date

{
"data_tables":{
"sylph_tax_database":[
{
"value": "sylph_tax_downloaded_#echo date.today().strftime('%d%m%Y')#",
"name": "sylph tax database @TOOL_VERSION@",
"path": "sylph_tax_metadata_@TOOL_VERSION@",
"version": "@TOOL_VERSION@"
}
]
}
}]]></configfile>
</configfiles>
<inputs>
<param name="version" type="text" label="Version" value="@TOOL_VERSION@" />
</inputs>
<outputs>
<data name="out_file" format="data_manager_json" label="${tool.name}"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<output name="out_file">
<assert_contents>
<has_text text='"sylph_tax_database":'/>
<has_text_matching expression='"value": "sylph_tax_downloaded_[0-9]{8}"'/>
<has_text text='"name": "sylph tax database @TOOL_VERSION@"'/>
<has_text_matching expression='"path": "sylph_tax_metadata_@TOOL_VERSION@"'/>
<has_text text='"version": "@TOOL_VERSION@"'/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
This tool downloads sylph-tax taxonomy metadata files for pre-built databases.
]]></help>
<citations>
<citation type="doi">10.1038/s41587-024-02412-y</citation>
</citations>
</tool>
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0"?>
<data_managers>
<data_manager tool_file="data_manager/data_manager_sylph_tax_database.xml" id="data_manager_sylph_tax_database">
<data_table name="sylph_tax_database"> <!-- Defines a Data Table to be modified. -->
<output> <!-- Handle the output of the Data Manager Tool -->
<column name="value"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="name"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="path" output_ref="out_file">
<move type="directory">
<source>${path}</source>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">sylph_tax/${path}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/sylph_tax/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
<column name="version"/>
</output>
</data_table>
</data_manager>
</data_managers>
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#This is a tab separated file describing the location of StarAMR databases
#used for the StarAMR tool
#
#file has this format (white space characters are TAB characters)
#
#The columns are:
#value name path version
#
#For example
#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502 ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004) /path/to/data
sylph_tax_downloaded_15042025 sylph tax database 1.2.0 /tmp/tmpsxnxlktw/galaxy-dev/tool-data/sylph_tax/sylph_tax_metadata_1.2.0 1.2.0
sylph_tax_downloaded_22042025 sylph tax database 1.2.0 /tmp/tmpl5hobqgw/galaxy-dev/tool-data/sylph_tax/sylph_tax_metadata_1.2.0 1.2.0
sylph_tax_downloaded_22042025 sylph tax database 1.2.0 /tmp/tmpcys1653j/galaxy-dev/tool-data/sylph_tax/sylph_tax_metadata_1.2.0 1.2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#This is a tab separated file describing the location of StarAMR databases
#used for the StarAMR tool
#
#file has this format (white space characters are TAB characters)
#
#The columns are:
#value name path version
#
#For example
#staramr_downloaded_20241004_resfinder_d1e607b_pointfinder_694919f_plasmidfinder_3e77502 ResFinder: tag 4.6.0, commit d1e607b, 2024-08-06 - PointFinder: tag 4.1.1, commit 694919f, 2024-08-08 - PlasmidFinder: commit 3e77502, 2024-03-07 (downloaded 20241004) /path/to/data
Loading
Loading