Skip to content

Commit 0eb1211

Browse files
agudysaziele
andauthored
Several updates
* Remove --bin* options in vclust.py * Submodules updated to latest revisions. Co-authored-by: aziele <[email protected]>
1 parent c5058fc commit 0eb1211

File tree

11 files changed

+36
-67
lines changed

11 files changed

+36
-67
lines changed

.github/workflows/deploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252

5353
steps:
5454
- name: make
55-
run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
55+
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
5656
- name: tar artifacts
5757
run: |
5858
mkdir ${DIR}

.github/workflows/main.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,24 @@ jobs:
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
machine: [ubuntu-latest, macOS-12]
18+
machine: [ubuntu-latest]
19+
gmake_install_command: ['gmake --version']
1920
compiler: [12]
21+
include:
22+
- {machine: macOS-13, gmake_install_command: 'brew install make && gmake --version', compiler: 12}
2023
runs-on: ['${{ matrix.machine }}']
2124

2225
steps:
2326
- uses: actions/checkout@v4
2427
with:
2528
submodules: recursive
29+
30+
- name: install gmake
31+
run: ${{ matrix.gmake_install_command }}
2632

2733
- name: make
2834
run: |
29-
make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}}
35+
gmake -j CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} STATIC_LINK=true
3036
- name: tar artifacts
3137
run: tar -cvf vclust.tar ./vclust.py ./test.py ./example ./bin/kmer-db ./bin/lz-ani ./bin/clusty ./bin/multi-fasta-split
3238

@@ -42,7 +48,7 @@ jobs:
4248
strategy:
4349
fail-fast: false
4450
matrix:
45-
machine: [ubuntu-latest, macOS-12]
51+
machine: [ubuntu-latest, macOS-13]
4652

4753
runs-on: ['${{ matrix.machine }}']
4854

.github/workflows/self-hosted.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050

5151
steps:
5252
- name: make
53-
run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
53+
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
5454
- name: print info
5555
run: python3 vclust.py info
5656

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
[![Build and tests](../../workflows/Build%20and%20tests/badge.svg)](../../actions/workflows/main.yml)
66
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
77

8+
9+
[![PyPI - Total Downloads](https://static.pepy.tech/personalized-badge/vclust?period=total&units=abbreviation&left_color=grey&right_color=green&left_text=PyPI%20total%20downloads)](https://www.pepy.tech/projects/vclust)
810
[![PyPI - Downloads](https://img.shields.io/pypi/dm/vclust?label=PyPI%20downloads)](https://pypi.org/project/vclust/)
911
[![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/vclust/total.svg?style=flag&label=GitHub%20downloads)](https://github.com/refresh-bio/vclust/releases)
1012
[![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/vclust.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/vclust)

makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ prep:
1414
cd 3rd_party/clusty && $(MAKE) -j
1515
cd 3rd_party/ref-utils && $(MAKE) -j
1616
mkdir -p bin
17-
cp 3rd_party/kmer-db/kmer-db ./bin/
18-
cp 3rd_party/lz-ani/lz-ani ./bin/
19-
cp 3rd_party/clusty/clusty ./bin/
20-
cp 3rd_party/ref-utils/multi-fasta-split/multi-fasta-split ./bin/
17+
cp 3rd_party/kmer-db/bin/kmer-db ./bin/
18+
cp 3rd_party/lz-ani/bin/lz-ani ./bin/
19+
cp 3rd_party/clusty/bin/clusty ./bin/
20+
cp 3rd_party/ref-utils/bin/multi-fasta-split ./bin/
2121

2222
clean:
2323
cd 3rd_party/kmer-db && $(MAKE) clean

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ py-modules = ["vclust"]
99
where = ["./"]
1010

1111
[project]
12-
name = "vclust-test"
12+
name = "vclust"
1313
description = """Fast and accurate tool for calculating \
1414
Average Nucleotide Identity (ANI) and clustering virus \
1515
genomes and metagenomic contigs"""

vclust.py

Lines changed: 14 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import typing
1717
import uuid
1818

19-
__version__ = '1.2.8'
19+
__version__ = '1.2.9'
2020

2121
DEFAULT_THREAD_COUNT = min(multiprocessing.cpu_count(), 64)
2222

@@ -95,7 +95,7 @@ def ranged_float_type(value):
9595
metavar='<file>',
9696
type=input_path_type,
9797
dest='input_path',
98-
help='Input FASTA file or directory with FASTA files',
98+
help='Input FASTA file or directory of files (gzipped or uncompressed)',
9999
required=True
100100
)
101101
prefilter_required.add_argument(
@@ -119,16 +119,15 @@ def ranged_float_type(value):
119119
metavar="<int>",
120120
type=int,
121121
default=20,
122-
help='Filter genome pairs based on minimum number of shared k-mers '
123-
'[%(default)s]'
122+
help='Minimum number of shared k-mers between two genomes [%(default)s]'
124123
)
125124
prefilter_parser.add_argument(
126125
'--min-ident',
127126
metavar="<float>",
128127
type=ranged_float_type,
129128
default=0.7,
130-
help='Filter genome pairs based on minimum sequence identity of '
131-
'the shorter sequence (0-1) [%(default)s]'
129+
help='Minimum sequence identity (0-1) between two genomes. Calculated '
130+
'based on the shorter sequence [%(default)s]'
132131
)
133132
prefilter_parser.add_argument(
134133
'--batch-size',
@@ -144,9 +143,9 @@ def ranged_float_type(value):
144143
metavar="<float>",
145144
type=ranged_float_type,
146145
default=1.0,
147-
help='Fraction of k-mers to analyze for each genome (0-1). A lower '
148-
'value reduces RAM usage and speeds up processing (affects sensitivity) '
149-
'[%(default)s]'
146+
help='Fraction of k-mers to analyze in each genome (0-1). A lower '
147+
'value reduces RAM usage and speeds up processing. By default, all '
148+
'k-mers [%(default)s]'
150149
)
151150
prefilter_parser.add_argument(
152151
'--max-seqs',
@@ -164,22 +163,6 @@ def ranged_float_type(value):
164163
action="store_true",
165164
help='Keep temporary Kmer-db files [%(default)s]'
166165
)
167-
prefilter_parser.add_argument(
168-
'--bin',
169-
metavar='<file>',
170-
type=pathlib.Path,
171-
dest="bin_kmerdb",
172-
default=f'{BIN_KMERDB}',
173-
help='Path to the Kmer-db binary [%(default)s]'
174-
)
175-
prefilter_parser.add_argument(
176-
'--bin-fasta',
177-
metavar='<file>',
178-
type=pathlib.Path,
179-
dest="bin_fastasplit",
180-
default=f'{BIN_FASTASPLIT}',
181-
help='Path to the multi-fasta-split binary [%(default)s]'
182-
)
183166
prefilter_parser.add_argument(
184167
'-t', '--threads',
185168
metavar="<int>",
@@ -214,7 +197,7 @@ def ranged_float_type(value):
214197
metavar='<file>',
215198
type=input_path_type,
216199
dest='input_path',
217-
help='Input FASTA file or directory with FASTA files',
200+
help='Input FASTA file or directory of files (gzipped or uncompressed)',
218201
required=True
219202
)
220203
align_required.add_argument(
@@ -298,14 +281,6 @@ def ranged_float_type(value):
298281
help='Min. reference coverage (aligned fraction) to output (0-1) '
299282
'[%(default)s]'
300283
)
301-
align_parser.add_argument(
302-
'--bin',
303-
metavar='<file>',
304-
type=pathlib.Path,
305-
dest='bin_lzani',
306-
default=f'{BIN_LZANI}',
307-
help='Path to the LZ-ANI binary [%(default)s]'
308-
)
309284
align_parser.add_argument(
310285
'--mal',
311286
metavar='<int>',
@@ -528,14 +503,6 @@ def ranged_float_type(value):
528503
default=2,
529504
help='Number of iterations for the Leiden algorithm [%(default)s]'
530505
)
531-
cluster_parser.add_argument(
532-
'--bin',
533-
metavar='<file>',
534-
type=pathlib.Path,
535-
dest="bin_clusty",
536-
default=f'{BIN_CLUSTY}',
537-
help='Path to the Clusty binary [%(default)s]'
538-
)
539506
cluster_parser.add_argument(
540507
'-v', '--verbose',
541508
action="store_true",
@@ -1215,7 +1182,7 @@ def vclust_info() -> None:
12151182
output_lines.append(f'{RED}Status: error{RESET}')
12161183
output_lines.extend(f" - {name}: {error}" for name, error in errors)
12171184
else:
1218-
output_lines.append(f'{GREEN}Status: ok{RESET}')
1185+
output_lines.append(f'{GREEN}Status: ready{RESET}')
12191186

12201187
# Output the complete information.
12211188
print('\n'.join(output_lines))
@@ -1263,7 +1230,7 @@ def main():
12631230
vclust_info()
12641231
# Prefilter
12651232
elif args.command == 'prefilter':
1266-
args.bin_kmerdb = validate_binary(args.bin_kmerdb)
1233+
validate_binary(BIN_KMERDB)
12671234
args = validate_args_prefilter(args, parser)
12681235
args = validate_args_fasta_input(args, parser)
12691236

@@ -1278,13 +1245,12 @@ def main():
12781245
else:
12791246
# Split multi-fasta file.
12801247
if args.batch_size:
1281-
args.bin_fastasplit = validate_binary(args.bin_fastasplit)
1248+
validate_binary(BIN_FASTASPLIT)
12821249
cmd = cmd_fastasplit(
12831250
input_fasta=args.input_path,
12841251
out_dir=out_dir,
12851252
n=args.batch_size,
12861253
verbose=args.verbose,
1287-
bin_path=args.bin_fastasplit,
12881254
)
12891255
p = run(cmd, args.verbose, logger)
12901256
for f in out_dir.glob('part_*'):
@@ -1311,7 +1277,6 @@ def main():
13111277
kmer_size=args.k,
13121278
kmers_fraction=args.kmers_fraction,
13131279
num_threads=args.num_threads,
1314-
bin_path=args.bin_kmerdb,
13151280
)
13161281
p = run(cmd, args.verbose, logger)
13171282
db_paths.append(db_path)
@@ -1333,7 +1298,6 @@ def main():
13331298
min_ident=args.min_ident,
13341299
max_seqs=args.max_seqs,
13351300
num_threads=args.num_threads,
1336-
bin_path=args.bin_kmerdb,
13371301
)
13381302
p = run(cmd, args.verbose, logger)
13391303

@@ -1342,7 +1306,6 @@ def main():
13421306
outfile_distance=args.output_path,
13431307
min_ident=args.min_ident,
13441308
num_threads=args.num_threads,
1345-
bin_path=args.bin_kmerdb,
13461309
)
13471310
p = run(cmd, args.verbose, logger)
13481311

@@ -1353,7 +1316,7 @@ def main():
13531316

13541317
# Align
13551318
elif args.command == 'align':
1356-
args.bin_lzani = validate_binary(args.bin_lzani)
1319+
validate_binary(BIN_LZANI)
13571320
args = validate_args_fasta_input(args, parser)
13581321

13591322
out_dir = args.output_path.parent / get_uuid()
@@ -1386,7 +1349,6 @@ def main():
13861349
ar=args.ar,
13871350
num_threads=args.num_threads,
13881351
verbose=args.verbose,
1389-
bin_path=args.bin_lzani,
13901352
)
13911353
p = run(cmd, args.verbose, logger)
13921354

@@ -1396,7 +1358,7 @@ def main():
13961358

13971359
# Cluster
13981360
elif args.command == 'cluster':
1399-
args.bin_clusty = validate_binary(args.bin_clusty)
1361+
validate_binary(BIN_CLUSTY)
14001362
args = validate_args_cluster(args, parser)
14011363

14021364
cmd = cmd_clusty(
@@ -1416,7 +1378,6 @@ def main():
14161378
leiden_resolution=args.leiden_resolution,
14171379
leiden_beta=args.leiden_beta,
14181380
leiden_iterations=args.leiden_iterations,
1419-
bin_path=args.bin_clusty,
14201381
)
14211382
p = run(cmd, args.verbose, logger)
14221383

0 commit comments

Comments
 (0)