Skip to content

feat: add aria2c wrapper #2725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
dd418a5
Add aria2c wrapper
fgvieira Mar 7, 2024
8fb9bce
Add examples for other checksum types
fgvieira Mar 12, 2024
dfd58ab
Fix issue with file name matching
fgvieira Mar 12, 2024
4eb64cd
Remove debug code
fgvieira Mar 12, 2024
281c31b
Allow specification of hash function through parameters
fgvieira Mar 20, 2024
2dafecc
Add example with remote checksum file
fgvieira Mar 20, 2024
a1ce42c
Revert env
fgvieira Mar 20, 2024
d45d1b1
Disable test and reformat
fgvieira Mar 20, 2024
536843e
Disable test
fgvieira Mar 20, 2024
b203b8e
Use helper functions
fgvieira May 29, 2024
e17809a
Add missing parenthesis
fgvieira May 29, 2024
ca49cc0
Fix import
fgvieira May 29, 2024
35a7a58
Place functions in separate file
fgvieira May 29, 2024
ab0081e
Switch to pandas
fgvieira May 29, 2024
bc43342
Fix typo
fgvieira May 29, 2024
f868cbf
Update with new input function
fgvieira Mar 25, 2025
a7962d7
Fix typos
fgvieira Mar 25, 2025
b34abff
Add linting dependency
fgvieira Mar 25, 2025
8bfee64
Fix and enable test
fgvieira Mar 26, 2025
51e4a4c
Fix typo
fgvieira Mar 26, 2025
91362db
Add missing lambda function
fgvieira Mar 26, 2025
00cccf7
Remove function
fgvieira Mar 26, 2025
a2108d3
Fix missing path
fgvieira Mar 27, 2025
cb6df6e
Merge branch 'master' into aria2c
fgvieira Apr 24, 2025
c63e3bf
Merge branch 'master' into aria2c
fgvieira Apr 24, 2025
a67c66d
Update min snakemake version
fgvieira Apr 25, 2025
f3057ad
Clean-up
fgvieira Apr 25, 2025
76716d3
Rename rules
fgvieira Apr 25, 2025
24e2ed8
Add minimum snakemake version
fgvieira Apr 25, 2025
5538fe4
Fix rules
fgvieira Apr 25, 2025
500e02e
Fix tests
fgvieira May 5, 2025
0c8bf12
Merge branch 'master' into aria2c
fgvieira May 13, 2025
a05aa86
Merge branch 'master' into aria2c
fgvieira May 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/qc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
shell: bash -el {0}
run: |
conda config --set channel_priority strict
conda install -n snakemake -y snakemake-minimal snakemake
conda install -n snakemake -y snakemake snakemake-minimal snakemake-storage-plugin-http

- name: Fetch master
run: |
Expand Down
21 changes: 21 additions & 0 deletions test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ def _run(wrapper, cmd, check_log=None, compare_results_with_expected=None):
return _run


def test_aria2c(run):
run(
"utils/aria2c",
[
"snakemake",
"--cores",
"2",
"--use-conda",
"-F",
"results/file.fas.gz",
"results/file.md5.fas.gz",
"results/file.md5file.fas.gz",
"results/file.sha1file.fas.gz",
"results/file.sha224file.fas.gz",
"results/file.sha256file.fas.gz",
"results/file.sha384file.fas.gz",
"results/file.sha512file.fas.gz",
"results/file.md5fileH.fas.gz",
],
)

def test_miller(run):
run(
"utils/miller",
Expand Down
22 changes: 22 additions & 0 deletions utils/aria2c/environment.linux-64.pin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
# created-by: conda 25.3.1
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.4.26-hbd8a1cb_0.conda#95db94f75ba080a22eb623590993167b
https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h767d61c_2.conda#06d02030237f4d5b3d9a7e7d348fe3c6
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h767d61c_2.conda#ef504d1acbd74b7cc6849ef8af47dd03
https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda#f7f0d6cc2dc986d42ac2689ec88192be
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_2.conda#a2222a6ada71fb478682efe483ce0f92
https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.18-h4ce23a2_1.conda#e796ff8ddc598affdf7c173d6145f087
https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_0.conda#0e87378639676987af32fee53ba32258
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-h8f9b012_2.conda#a78c856b6dc6bf4ea8daeb9beaaa3fb0
https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.0-h7b32b05_0.conda#bb539841f2a3fde210f387d00ed4bb9d
https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.49.1-hee588c1_2.conda#962d6ac93c30b1dfc54c9cccafd1003e
https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_2.conda#c75da67f045c2627f59e6fcb5f4e3a9b
https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.13.7-h81593ed_1.conda#0619e8fc4c8025a908ea3a3422d3b775
https://conda.anaconda.org/conda-forge/linux-64/aria2-1.37.0-hbc8128a_2.conda#03b8874fa70df577f3eee53085d025cf
5 changes: 5 additions & 0 deletions utils/aria2c/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- nodefaults
dependencies:
- aria2 =1.37.0
15 changes: 15 additions & 0 deletions utils/aria2c/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: aria2
url: https://github.com/aria2/aria2/
description: >
aria2 is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink.
authors:
- Filipe G. Vieira
output:
- Path to downloaded file
params:
- url: URL to download from
- extra: Optional arguments for `aria2c`
- type: type of hash, where `type in ["sha-1", "sha-224", "sha-256", "sha-384", "sha-512", "md5", "adler32"]`
notes: |
* Checksum input file only supported for single-file downloads
* Requires `snakemake >=9.3.1`
13 changes: 13 additions & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
04c1275ff9c9d0fb595b7482a1d54438 ./annotation_hashes.txt
3413f40db67f8ea3b3a193c2fd663a6e ./GCF_000869925.1_ViralProj17181_assembly_report.txt
7d45362bb87770fac4716b60055fd72d ./GCF_000869925.1_ViralProj17181_assembly_stats.txt
3e2e82ee2bd94c18d92891211eafdf18 ./GCF_000869925.1_ViralProj17181_cds_from_genomic.fna.gz
e673fed3417f2f694b99f9cab1dad83e ./GCF_000869925.1_ViralProj17181_feature_count.txt
c5a292890d71b35ddd4b2366d06cdeb6 ./GCF_000869925.1_ViralProj17181_feature_table.txt.gz
42aa93c5bfdba6ac09a4822a4407b572 ./GCF_000869925.1_ViralProj17181_genomic.fna.gz
a2e1b9686fcbdd4c4059c0ee4c03851a ./GCF_000869925.1_ViralProj17181_genomic.gbff.gz
4276f72895f3436e6826424d1b908d20 ./GCF_000869925.1_ViralProj17181_genomic.gff.gz
81499b53906a29cebea4e472e8ffe842 ./GCF_000869925.1_ViralProj17181_genomic.gtf.gz
a3f486d02206a33e0d17f79d11807f0d ./GCF_000869925.1_ViralProj17181_protein.faa.gz
7c30a6c03dbc7402ce0872afb0ec9e94 ./GCF_000869925.1_ViralProj17181_protein.gpff.gz
cdbfa4db0d86580a730f0829b9ca2151 ./GCF_000869925.1_ViralProj17181_translated_cds.faa.gz
1 change: 1 addition & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.sha-1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
30004da6fc9f681d59c6c92cc99c9331622fb1f5 GCF_000869925.1_ViralProj17181_genomic.fna.gz
1 change: 1 addition & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.sha-224
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ac2d83823e2adc6b7b38e8dda0b7ff9c2536e62d96dec77e68cf0147 GCF_000869925.1_ViralProj17181_genomic.fna.gz
1 change: 1 addition & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.sha-256
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
337dad2a0047dde05c24d5ae83fe175f762212e2e50a9494e54f43f9ebd508bd GCF_000869925.1_ViralProj17181_genomic.fna.gz
1 change: 1 addition & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.sha-384
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0171910ac0f8c881e24ac5054c734eb295fe73c3a6ad0857eab9349446949a96c45095241ae8d63f25c16a4c1e37c30a GCF_000869925.1_ViralProj17181_genomic.fna.gz
1 change: 1 addition & 0 deletions utils/aria2c/test/GCF_000869925.1_ViralProj17181.sha-512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
265fd46dea811ddebf549bb38fe7f5532308a6f97b62a93cccc6cbdf2fd09e0f3e928745a1b775889f43717593ae9afb9658821be684cdfe42006b9c6592ad41 GCF_000869925.1_ViralProj17181_genomic.fna.gz
195 changes: 195 additions & 0 deletions utils/aria2c/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@

rule test_aria2:
output:
"results/file.fas.gz",
log:
"logs/aria2.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_md5:
output:
"results/file.md5.fas.gz",
log:
"logs/aria2.md5.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
md5="42aa93c5bfdba6ac09a4822a4407b572",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_md5fileH:
input:
storage.http(
"https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/md5checksums.txt"
),
output:
"results/file.md5fileH.fas.gz",
log:
"logs/aria2.md5fileH.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
md5=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_md5file:
input:
checksum="GCF_000869925.1_ViralProj17181.md5",
output:
"results/file.md5file.fas.gz",
log:
"logs/aria2.md5file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
md5=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_sha1file:
input:
checksum="GCF_000869925.1_ViralProj17181.sha-1",
output:
"results/file.sha1file.fas.gz",
log:
"logs/aria2.sha1file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
sha1=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_sha224file:
input:
checksum="GCF_000869925.1_ViralProj17181.sha-224",
output:
"results/file.sha224file.fas.gz",
log:
"logs/aria2.sha224file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
sha224=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_sha256file:
input:
checksum="GCF_000869925.1_ViralProj17181.sha-256",
output:
"results/file.sha256file.fas.gz",
log:
"logs/aria2.sha256file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
sha256=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_sha384file:
input:
checksum="GCF_000869925.1_ViralProj17181.sha-384",
output:
"results/file.sha384file.fas.gz",
log:
"logs/aria2.sha384file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
sha384=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"


rule test_aria2_sha512file:
input:
checksum="GCF_000869925.1_ViralProj17181.sha-512",
output:
"results/file.sha512file.fas.gz",
log:
"logs/aria2.sha512file.log",
params:
url="https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/869/925/GCF_000869925.1_ViralProj17181/GCF_000869925.1_ViralProj17181_genomic.fna.gz",
extra="--file-allocation none --retry-wait 5 --console-log-level warn --log-level notice",
sha512=parse_input(
input[0],
parser=extract_checksum,
file="GCF_000869925.1_ViralProj17181_genomic.fna.gz",
),
threads: 2
resources:
mem_mb=1024,
runtime=30,
wrapper:
"master/utils/aria2c"
32 changes: 32 additions & 0 deletions utils/aria2c/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
__author__ = "Filipe G. Vieira"
__copyright__ = "Copyright 2023, Filipe G. Vieira"
__license__ = "MIT"

from snakemake.shell import shell

extra = snakemake.params.get("extra", "")

for hash_function, digest in snakemake.params.items():
if hash_function in [
"sha1",
"sha224",
"sha256",
"sha384",
"sha512",
"md5",
"adler32",
]:
if hash_function.startswith("sha"):
hash_function = hash_function.replace("sha", "sha-")
extra += f" --checksum {hash_function}={digest}"
break

shell(
"aria2c"
" --max-concurrent-downloads {snakemake.threads}"
" {extra}"
" --log {snakemake.log}"
" --out {snakemake.output[0]}"
" {snakemake.params.url}"
" > /dev/null"
)
Loading