Skip to content

Commit ddee4ad

Browse files
authored
Improve testing (#19)
* Add script to simulate reads * We use a main branch on this repo, not master * Rename ncbi-acc-download install script in workflow, capture art logs * Install ncbi-acc-download to conda env * Install conda env under opt * Cannot use --name and --prefix together * Give full path to conda env * Use conda directly * Run conda directly from workflow * Rename step and add miniconda to path * Generate simulated reads * Rename art install script in workflow * Re-arrange workflow steps * Create file as placeholder * Add ART to PATH * Install rename * Use alternate rename syntax * Build Kraken & Bracken DBs * Build Bracken DB and cache Kraken2/Bracken DB * Use custom accession2taxid file to avoid running out of disk space * Limit disk usage during kraken db build * Download example InterOp and use kraken accession list * skip header * do not copy Data directory from example InterOp data * Edit RunInfo.xml to match InterOp data * Touch up pull-request script * Save all results to artifacts dir * Improve caching and artifacts * Could not find v2 * Use v2.1.3 * Separate installtion of system deps from miniconda * Adjust caching * Fix artifacts path * Add workflow for pushes to main branch * Adjust cache keys for caches that do not have good hash values * Do not require underscore after R1/R2 in fastq filenames * Use github-specific nextflow config while running tests * Separate ref genome list no longer necessary * Invoke nextflow to ensure all deps installed before caching * Use github config during initial nextflow run * Put config flag in correct location * Fix config file path * Fix config flag placement * Fix config file path * Cache ART installation * limit CPUs to 2 while testing * Set kraken2 cpus in config * Do not require underscore after R1/R2 in fastq filename * Cache ref genomes * Update * Update * Do not assume underscore after sample id * Invalidate ref genome cache * Check if output dir is already named correctly * Invalidate caches for ref genomes and kraken db * Fix if then fi bash syntax * Change read length to match simulated reads
1 parent f780a5c commit ddee4ad

23 files changed

+391
-52
lines changed

.github/config/nextflow.config

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
params {
2+
kraken2_db = ".github/data/kraken2_db"
3+
bracken_db = ".github/data/kraken2_db"
4+
instrument_type = "miseq"
5+
}
6+
7+
profiles {
8+
conda {
9+
process.conda = "$baseDir/environments/environment.yml"
10+
if (params.cache){
11+
conda.cacheDir = params.cache
12+
}
13+
}
14+
}
15+
16+
17+
// Capture exit codes from upstream processes when piping
18+
process.shell = ['/bin/bash', '-euo', 'pipefail']
19+
20+
process.executor = 'local'
21+
process.cpus = 2
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
accession accession.version taxid gi
2+
NC_002695 NC_002695.2 386585 1447699251
3+
NC_016845 NC_016845.1 1125630 378976159
4+
NZ_CP033744 NZ_CP033744.1 546 1520496619
5+
NC_003197 NC_003197.2 99287 1109557564

.github/data/mock_runs/210101_M00000_0000_000000000-A1B2C/Data/Intensities/BaseCalls/placeholder

Whitespace-only changes.

.github/data/mock_runs/210101_M00000_0000_000000000-A1B2C/SampleSheet.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ Assay,Illumina DNA Prep
1010
Index Adapters,IDT-Ilmn DNA-RNA UD Indexes SetA Tagmentation
1111
Chemistry,Amplicon
1212
[Reads]
13-
151
14-
151
13+
251
14+
251
1515
[Settings]
1616
ReverseComplement,0
1717
Adapter,CTGTCTCTTATACACATCT

.github/data/ref_genome_list.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

.github/scripts/create_bracken_db.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
export PATH=/opt/miniconda3/bin:$PATH
6+
7+
pushd ${PWD}/.github/data
8+
9+
bracken-build -d kraken2_db -l 250 > bracken_build.log
10+
11+
kraken2-build --clean --db kraken2_db
12+
13+
popd
14+
15+
cp ${PWD}/.github/data/bracken_build.log artifacts

.github/scripts/create_kraken2_db.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
export PATH=/opt/miniconda3/bin:$PATH
6+
7+
pushd ${PWD}/.github/data/kraken2_db/taxonomy
8+
9+
rsync --no-motd rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz .
10+
11+
tar -xzf taxdump.tar.gz && rm taxdump.tar.gz
12+
13+
pushd ../..
14+
15+
for file in ref_genomes/*.fa; do
16+
kraken2-build --add-to-library ${file} --db kraken2_db
17+
done
18+
19+
kraken2-build --build --db kraken2_db > kraken_build.log
20+
21+
popd && popd
22+
23+
cp ${PWD}/.github/data/kraken_build.log artifacts
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
pushd ${PWD}/.github/data
6+
7+
# Only publicly-available InterOp data I've found...
8+
wget http://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz
9+
10+
tar -xzf cellranger-tiny-bcl-1.2.0.tar.gz && rm cellranger-tiny-bcl-1.2.0.tar.gz
11+
12+
mv cellranger-tiny-bcl-1.2.0/InterOp mock_runs/210101_M00000_0000_000000000-A1B2C
13+
mv cellranger-tiny-bcl-1.2.0/RunInfo.xml mock_runs/210101_M00000_0000_000000000-A1B2C
14+
mv cellranger-tiny-bcl-1.2.0/runParameters.xml mock_runs/210101_M00000_0000_000000000-A1B2C
15+
16+
rm -r cellranger-tiny-bcl-1.2.0
17+
18+
# RunInfo.xml doesn't match InterOp data for some reason, so
19+
# edit RunInfo.xml to match
20+
cat mock_runs/210101_M00000_0000_000000000-A1B2C/RunInfo.xml | \
21+
sed 's/LaneCount="1"/LaneCount="2"/' | \
22+
sed 's/SurfaceCount="1"/SurfaceCount="2"/' | \
23+
sed 's/SwathCount="1"/SwathCount="2"/' | \
24+
sed 's/TileCount="1"/TileCount="32"/' \
25+
> RunInfo.edited.xml
26+
27+
mv RunInfo.edited.xml mock_runs/210101_M00000_0000_000000000-A1B2C/RunInfo.xml
28+
29+
popd

.github/scripts/download_reference_genomes.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22

33
set -eo pipefail
44

5+
export PATH=/opt/miniconda3/bin:${PATH}
6+
57
echo "Download Reference Genomes..." >> artifacts/test_artifact.log
68

79
mkdir -p $PWD/.github/data/ref_genomes
810

911
pushd $PWD/.github/data/ref_genomes
1012

11-
while read -r accession; do
12-
ncbi-acc-download --format fasta ${accession}
13+
while IFS=$'\t' read -r accession accession_version taxid gi ; do
14+
ncbi-acc-download --format fasta ${accession_version}
1315
sleep 5
14-
done < ../ref_genome_list.txt
16+
done < <(tail -n+2 ../kraken2_db/taxonomy/nucl_gb.accession2taxid)
1517

1618
popd
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
set -eo pipefail
4+
5+
export PATH=/opt/art/bin/:${PATH}
6+
7+
pushd ${PWD}/.github/data/mock_runs/210101_M00000_0000_000000000-A1B2C/Data/Intensities/BaseCalls/
8+
9+
art_illumina --seqSys MSv3 --paired -i ../../../../../ref_genomes/NC_002695.2.fa --fcov 5 --mflen 500 --sdev 25 --len 250 --noALN -o test-01_R > ../../../../../../../artifacts/test-01_read_generation_log.txt
10+
11+
art_illumina --seqSys MSv3 --paired -i ../../../../../ref_genomes/NC_016845.1.fa --fcov 5 --mflen 500 --sdev 25 --len 250 --noALN -o test-02_R > ../../../../../../../artifacts/test-02_read_generation_log.txt
12+
13+
art_illumina --seqSys MSv3 --paired -i ../../../../../ref_genomes/NZ_CP033744.1.fa --fcov 5 --mflen 500 --sdev 25 --len 250 --noALN -o test-03_R > ../../../../../../../artifacts/test-03_read_generation_log.txt
14+
15+
art_illumina --seqSys MSv3 --paired -i ../../../../../ref_genomes/NC_003197.2.fa --fcov 5 --mflen 500 --sdev 25 --len 250 --noALN -o negative-control_R > ../../../../../../../artifacts/negative-control_read_generation_log.txt
16+
17+
rename s/fq/fastq/ *.fq
18+
19+
gzip *.fastq
20+
21+
popd

0 commit comments

Comments
 (0)