fmalmeida
diff --git a/‎.zenodo.json‎
Lines changed: 1 addition & 1 deletion b/‎.zenodo.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 22 additions & 57 deletions b/‎README.md‎
Lines changed: 22 additions & 57 deletions
diff --git a/‎bin/gff2sql.R‎
Lines changed: 2 additions & 2 deletions b/‎bin/gff2sql.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bin/mlst-make_blast_db.sh‎
Lines changed: 23 additions & 0 deletions b/‎bin/mlst-make_blast_db.sh‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎bin/run_jbrowse.sh‎
Lines changed: 70 additions & 50 deletions b/‎bin/run_jbrowse.sh‎
Lines changed: 70 additions & 50 deletions
diff --git a/‎conf/defaults.config‎
Lines changed: 10 additions & 6 deletions b/‎conf/defaults.config‎
Lines changed: 10 additions & 6 deletions
@@ -2,7 +2,7 @@
     "description": "<p>The pipeline</p>\n\n<p>bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.</p>", 
     "license": "other-open", 
     "title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline", 
-    "version": "v3.2", 
+    "version": "v3.3", 
     "upload_type": "software",
     "creators": [
         {
 
@@ -1,13 +1,14 @@
 <img src="images/lOGO_3.png" width="300px">
 
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3627669-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3627669)
+[![F1000 Paper](https://img.shields.io/badge/Citation%20F1000-10.12688/f1000research.139488.1-orange)](https://doi.org/10.12688/f1000research.139488.1)
 [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/fmalmeida/bacannot?include_prereleases&label=Latest%20release)](https://github.com/fmalmeida/bacannot/releases)
 [![Documentation](https://img.shields.io/badge/Documentation-readthedocs-brightgreen)](https://bacannot.readthedocs.io/en/latest/?badge=latest)
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![License](https://img.shields.io/badge/License-GPL%203-black)](https://github.com/fmalmeida/bacannot/blob/master/LICENSE)
 [![Follow on Twitter](http://img.shields.io/badge/twitter-%40fmarquesalmeida-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/fmarquesalmeida)
+[![Zenodo Archive](https://img.shields.io/badge/Zenodo-Archive-blue)](https://doi.org/10.5281/zenodo.3627669)
 
 [![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/github.com/fmalmeida/bacannot)
 
@@ -47,8 +48,9 @@ Its main steps are:
 | Annotation of virulence genes | [Victors](http://www.phidias.us/victors/) and [VFDB](http://www.mgc.ac.cn/VFs/main.htm) |
 | Prophage sequences and genes annotation | [PHASTER](http://phast.wishartlab.com/), [Phigaro](https://github.com/bobeobibo/phigaro) and [PhySpy](https://github.com/linsalrob/PhiSpy) |
 | Annotation of integrative and conjugative elements | [ICEberg](http://db-mml.sjtu.edu.cn/ICEberg/) |
+| Annotation of bacterial integrons | [Integron Finder](https://github.com/gem-pasteur/Integron_Finder) |
 | Focused detection of insertion sequences | [digIS](https://github.com/janka2012/digIS) |
-| _In silico_ detection of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/) and [Platon](https://github.com/oschwengers/platon) |
+| _In silico_ detection and typing of plasmids | [Plasmidfinder](https://cge.cbs.dtu.dk/services/PlasmidFinder/), [Platon](https://github.com/oschwengers/platon) and [MOB-typer](https://github.com/phac-nml/mob-suite)|
 | Prediction and visualization of genomic islands | [IslandPath-DIMOB](https://github.com/brinkmanlab/islandpath) and [gff-toolbox](https://github.com/fmalmeida/gff-toolbox) |
 | Custom annotation from formatted FASTA or NCBI protein IDs | [BLAST](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs) |
 | Merge of annotation results | [bedtools](https://bedtools.readthedocs.io/en/latest/) |
@@ -86,18 +88,7 @@ These images have been kept separate to not create massive Docker image and to a
 
 ## Installation
 
-1. If you don't have it already install [Docker](https://docs.docker.com/) in your computer.
-    * After installed, you need to download the required Docker images
-
-    ```bash
-    docker pull fmalmeida/bacannot:v3.2_misc    ;
-    docker pull fmalmeida/bacannot:v3.2_perlenv ;
-    docker pull fmalmeida/bacannot:v3.2_pyenv   ;
-    docker pull fmalmeida/bacannot:v3.2_renv    ;
-    docker pull fmalmeida/bacannot:jbrowse      ;
-    ```
-
-🔥 Nextflow can also automatically handle images download on the fly when executed. If docker has exceeded its download limit rates, please try again in a few hours.
+1. If you don't have it already install either [Docker](https://docs.docker.com/) or [Singularity](https://docs.sylabs.io/guides/3.5/user-guide/index.html) in your computer.
 
 2. Install Nextflow (version 20.10 or higher):
 
@@ -111,48 +102,7 @@ These images have been kept separate to not create massive Docker image and to a
 
 🔥 Users can get let the pipeline always updated with: `nextflow pull fmalmeida/bacannot`
 
-### Downloading and updating databases
-
-Bacannot databases are not inside the docker images anymore to avoid huge images and problems with connections and limit rates with dockerhub.
-
-#### Pre-formatted
-
-Users can directly download pre-formatted databases from Zenodo: https://doi.org/10.5281/zenodo.7615811
-
-Useful for standardization and also overcoming known issues that may arise when formatting databases with `singularity` profile.
-
-#### I want to generate a new formatted database
-
-To download and format a copy of required bacannot databases users can execute the following:
-
-```bash
-# Download pipeline databases
-nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile <docker/singularity>
-```
-
-This will produce a directory like this:
-
-```bash
-bacannot_dbs
-├── amrfinder_db
-├── antismash_db
-├── argminer_db
-├── card_db
-├── iceberg_db
-├── kofamscan_db
-├── mlst_db
-├── phast_db
-├── phigaro_db
-├── pipeline_info
-├── plasmidfinder_db
-├── platon_db
-├── prokka_db
-├── resfinder_db
-├── vfdb_db
-└── victors_db
-```
-
-> To update databases you can either download a new one to a new directory. Remove the database you want to get a new one from the root bacannot dir and use the same command above to save in the same directory (the pipeline will only try to download missing databases). Or, you can use the parameter `--force_update` to download everything again.
+<a href="https://bacannot.readthedocs.io/en/latest/installation"><strong>Please refer to the installation page, for a complete guide on required images and databases. »</strong></a>
 
 ## Quickstart
 
@@ -185,6 +135,17 @@ Create a configuration file in your working directory:
 
       nextflow run fmalmeida/bacannot --get_config
 
+##### Overwrite container versions with config
+
+The pipeline uses pre-set docker and singularity configuration files to set all the containers and versions of images that should be used by each module in the pipeline.
+
+Although not recommended, one can use these configuration files to change the version of specific tools if desired.
+
+To download these configs one can:
+
+      nextflow run fmalmeida/bacannot --get_docker_config
+      nextflow run fmalmeida/bacannot --get_singularity_config
+
 ### Interactive graphical configuration and execution
 
 #### Via NF tower launchpad (good for cloud env execution)
@@ -234,7 +195,11 @@ It will result in the following:
 
 ## Citation
 
-To cite this tool please refer to our [Zenodo tag](https://doi.org/10.5281/zenodo.3627669).
+In order to cite this pipeline, please refer to:
+
+> Almeida FMd, Campos TAd and Pappas Jr GJ. Scalable and versatile container-based pipelines for de novo genome assembly and bacterial annotation. [version 1; peer review: awaiting peer review]. F1000Research 2023, 12:1205 (https://doi.org/10.12688/f1000research.139488.1)
+
+Additionally, archived versions of the pipeline are also found in [Zenodo](https://doi.org/10.5281/zenodo.3627669).
 
 This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [GPLv3](https://github.com/fmalmeida/bacannot/blob/master/LICENSE).
 
 
@@ -54,8 +54,8 @@ addTable <- function (con, sql, input) {
 
 # Loading SQL database driver
 drv <- dbDriver("SQLite")
-dbname <- file.path("/work", opt$out)
-con <- dbConnect(drv, dbname=dbname)
+print(opt$out)
+con <- dbConnect(drv, dbname=opt$out)
 
 #####################################
 ### First STEP load GENOME to sql ###
 
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+MLSTDIR="$0"
+BLASTDIR="$DIR/../db/blast"
+BLASTFILE="$BLASTDIR/mlst.fa"
+
+mkdir -p "$BLASTDIR"
+rm -f "$BLASTFILE"
+
+#for N in $(find $MLSTDIR -maxdepth 1 | grep -v '_2$'); do
+for N in $(find $MLSTDIR -mindepth 1 -maxdepth 1 -type d); do
+  SCHEME=$(basename $N)
+  echo "Adding: $SCHEME"
+  cat "$MLSTDIR"/$SCHEME/*.tfa \
+  	| grep -v 'not a locus'  \
+  	| sed -e "s/^>/>$SCHEME./" \
+  	>> "$BLASTFILE"
+done
+
+makeblastdb -hash_index -in "$BLASTFILE" -dbtype nucl -title "PubMLST" -parse_seqids
+
+echo "Created BLAST database for $BLASTFILE"
@@ -14,7 +14,7 @@ Help()
 	echo "Simple help message for the utilization of this script"
 	echo "It takes the jbrowse data path and all the files that shall be plotted from bacannot"
 	echo
-	echo "Syntax: run_jbrowse.sh [-h|p|g|b|s|f|r|B|P|G|m|S|R|d|A]"
+	echo "Syntax: run_jbrowse.sh [-h|p|g|b|s|f|r|B|P|G|m|S|R|d|A|i]"
 	echo "options:"
 	echo
 	echo "h		Print this help"
@@ -32,59 +32,63 @@ Help()
 	echo "R		Path to Resfinder custom GFF"
 	echo "d     Path to digIS custom GFF"
 	echo "A     Path to antismash custom GFF"
+	echo "i     Path to Integron Finder custom GFF"
 	echo ""
 	echo
 }
 
 # Get the options
-while getopts "hp:g:b:s:f:r:B:P:G:m:S:R:d:A:" option; do
-   case $option in
-      h) # display Help
-         Help
-         exit;;
-p) # get genome prefix
-	 PREFIX="$OPTARG"
-	 ;;
-g) # get genome FASTA
-	 GENOME="$OPTARG"
-	 ;;
-b) # get GC bedgraph
-	 BEDGRAPH="$OPTARG"
-	 ;;
-s) # get chr sizes
-	 CHRSIZES="$OPTARG"
-	 ;;
-f) # get prokka gff
-	 PROKKAGFF="$OPTARG"
-	 ;;
-r) # get barrnap gff
-	 rRNAGFF="$OPTARG"
-	 ;;
-B) # get phigaro bed
-	 PHIGAROBED="$OPTARG"
-	 ;;
-P) # get phispy bed
-	 PHISPYBED="$OPTARG"
-	 ;;
-G) # get GIs bed
-	 GIBED="$OPTARG"
-	 ;;
-m) # get nanopolish methylation
-	 NANOMETHYL="$OPTARG"
-	 ;;
-S) # get nanopolish chr sizes
-	 NANOSIZES="$OPTARG"
-	 ;;
-R) # get resfinder GFF
-	 RESFINDERGFF="$OPTARG"
-	 ;;
-d) # get digIS GFF
-	 DIGISGFF="$OPTARG"
-	 ;;
-A) # get antismash GFF
-	 ANTISMASHGFF="$OPTARG"
-	 ;;
-   esac
+while getopts "hp:g:b:s:f:r:B:P:G:m:S:R:d:A:i:" option; do
+	case $option in
+	h) # display Help
+		Help
+		exit;;
+	p) # get genome prefix
+		PREFIX="$OPTARG"
+		;;
+	g) # get genome FASTA
+		GENOME="$OPTARG"
+		;;
+	b) # get GC bedgraph
+		BEDGRAPH="$OPTARG"
+		;;
+	s) # get chr sizes
+		CHRSIZES="$OPTARG"
+		;;
+	f) # get prokka gff
+		PROKKAGFF="$OPTARG"
+		;;
+	r) # get barrnap gff
+		rRNAGFF="$OPTARG"
+		;;
+	B) # get phigaro bed
+		PHIGAROBED="$OPTARG"
+		;;
+	P) # get phispy bed
+		PHISPYBED="$OPTARG"
+		;;
+	G) # get GIs bed
+		GIBED="$OPTARG"
+		;;
+	m) # get nanopolish methylation
+		NANOMETHYL="$OPTARG"
+		;;
+	S) # get nanopolish chr sizes
+		NANOSIZES="$OPTARG"
+		;;
+	R) # get resfinder GFF
+		RESFINDERGFF="$OPTARG"
+		;;
+	d) # get digIS GFF
+		DIGISGFF="$OPTARG"
+		;;
+	A) # get antismash GFF
+		ANTISMASHGFF="$OPTARG"
+		;;
+	i) # get integron finder GFF
+		INTEGRONFINDERGFF="$OPTARG"
+		;;
+	esac
 done
 
 # Main
@@ -313,7 +317,7 @@ remove-track.pl --trackLabel "${PREFIX} CARD-RGI resistance features" --dir data
 --trackLabel "${PREFIX} Resfinder resistance features" --out "data" --nameAttributes "Resfinder_gene,ID,Resfinder_phenotype" ;
 remove-track.pl --trackLabel "${PREFIX} Resfinder resistance features" --dir data &> /tmp/error
 [ ! -s $RESFINDERGFF ] || echo -E " { \"compress\" : 0, \
- 	\"displayMode\" : \"compact\", \
+	\"displayMode\" : \"compact\", \
 	\"key\" : \"${PREFIX} Resfinder resistance features\", \
 	\"category\" : \"Resistance annotation\", \
 	\"label\" : \"${PREFIX} Resfinder resistance features\", \
@@ -343,6 +347,22 @@ remove-track.pl --trackLabel "${PREFIX} ICE genes from ICEberg database" --dir d
 	\"urlTemplate\" : \"tracks/${PREFIX} ICE genes from ICEberg database/{refseq}/trackData.json\" } " | add-track-json.pl  data/trackList.json
 [ $(grep "ICEberg" $PROKKAGFF | wc -l) -eq 0 ] || rm -f iceberg ices ;
 
+## Integron Finder
+[ $(wc -l $INTEGRONFINDERGFF) -eq 0 ] || flatfile-to-json.pl --gff $INTEGRONFINDERGFF --key "${PREFIX} Annotated Integrons - Integron Finder" --trackType CanvasFeatures \
+--trackLabel "${PREFIX} Annotated Integrons - Integron Finder" --out "data" --nameAttributes "ID,integron_type" ;
+remove-track.pl --trackLabel "${PREFIX} Annotated Integrons - Integron Finder" --dir data &> /tmp/error
+[ $(wc -l $INTEGRONFINDERGFF) -eq 0 ] || echo -E " {  \"compress\" : 0, \
+\"displayMode\" : \"compact\", \
+	\"key\" : \"${PREFIX} Annotated Integrons - Integron Finder\", \
+	\"category\" : \"MGEs annotation\", \
+	\"label\" : \"${PREFIX} Annotated Integrons - Integron Finder\", \
+	\"storeClass\" : \"JBrowse/Store/SeqFeature/NCList\", \
+	\"style\" : { \"className\" : \"feature\", \"color\": \"#6db6d9\" }, \
+	\"trackType\" : \"CanvasFeatures\", \
+	\"type\" : \"CanvasFeatures\", \
+	\"nameAttributes\" : \"ID,integron_type\", \
+	\"urlTemplate\" : \"tracks/${PREFIX} Annotated Integrons - Integron Finder/{refseq}/trackData.json\" } " | add-track-json.pl  data/trackList.json
+
 ## PROPHAGES
 ### PHAST
 [ $(grep "PHAST" $PROKKAGFF | wc -l) -eq 0 ] || grep "PHAST" $PROKKAGFF > prophage ;
 
@@ -14,8 +14,9 @@ params {
 
 // Trigger database download and formatting workflow? --> will not run annotation
 // Will download and format a database inside {output} parameter
-  get_dbs      = false
-  force_update = false
+  get_dbs       = false
+  force_update  = false
+  get_zenodo_db = false // download pre-built database
 
     /*
 
@@ -31,6 +32,9 @@ params {
 // It is also documented in the main manual: https://bacannot.readthedocs.io/en/latest/samplesheet
   input = null
 
+// Enable reads deduplication for assembly? (If input has reads)
+  enable_deduplication = false
+
 // path to directory containing databases used by bacannot
 // you can download databases with: 
 // nextflow run fmalmeida/bacannot --get_dbs --output bacannot_dbs -profile <docker/conda/singularity>
@@ -175,13 +179,13 @@ params {
 // Select versions of bioconda quay.io additional tools
 // Tools that are not part of the core of the pipeline,
 // but can eventually be used by users
-  unicycler_version = '0.4.8--py38h8162308_3'
-  flye_version      = '2.9--py39h39abbe0_0'
-  bakta_version     = '1.6.1--pyhdfd78af_0'
+  unicycler_version = '0.5.0--py310h6cc9453_3'
+  flye_version      = '2.9--py39h6935b12_1'
+  bakta_version     = '1.7.0--pyhdfd78af_1'
 
 // Max resource options
   max_memory                 = '20.GB'
   max_cpus                   = 16
   max_time                   = '40.h'
 
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`"description": "<p>The pipeline</p>\n\n<p>bacannot, is a customisable, easy to use, pipeline that uses state-of-the-art software for comprehensively annotating prokaryotic genomes having only Docker and Nextflow as dependencies. It is able to annotate and detect virulence and resistance genes, plasmids, secondary metabolites, genomic islands, prophages, ICEs, KO, and more, while providing nice an beautiful interactive documents for results exploration.</p>",`
`3`	`3`	`"license": "other-open",`
`4`	`4`	`"title": "fmalmeida/bacannot: A generic but comprehensive bacterial annotation pipeline",`
`5`		`- "version": "v3.2",`
	`5`	`+ "version": "v3.3",`
`6`	`6`	`"upload_type": "software",`
`7`	`7`	`"creators": [`
`8`	`8`	`{`