Skip to content

Commit e60a35a

Browse files
agudysaziele
andauthored
Installation through PyPi, Set default --min-kmers to 20
Co-authored-by: aziele <[email protected]>
1 parent 928b415 commit e60a35a

File tree

8 files changed

+170
-36
lines changed

8 files changed

+170
-36
lines changed

MANIFEST.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
include README.md
2+
include LICENSE
3+
recursive-include bin *
4+
recursive-exclude 3rd_party *

README.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
# <img src="./images/logo.svg" alt="Vclust logo" /> Vclust
22

3-
![version](https://img.shields.io/badge/version-1.2.7-blue.svg)
4-
[![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/vclust/total.svg?style=flag&label=GitHub%20downloads)](https://github.com/refresh-bio/vclust/releases)
5-
[![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/vclust.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/vclust)
3+
![version](https://img.shields.io/badge/version-1.2.8-blue.svg)
4+
![PyPI - Version](https://img.shields.io/pypi/v/vclust?label=PyPI%20version&color=blue)
65
[![Build and tests](../../workflows/Build%20and%20tests/badge.svg)](../../actions/workflows/main.yml)
76
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
87

8+
![PyPI - Downloads](https://img.shields.io/pypi/dm/vclust?label=PyPI%20downloads)
9+
[![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/vclust/total.svg?style=flag&label=GitHub%20downloads)](https://github.com/refresh-bio/vclust/releases)
10+
[![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/vclust.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/vclust)
11+
912
![x86-64](https://img.shields.io/static/v1?label=%E2%80%8B&message=x86-64&color=yellow&logo=PCGamingWiki&logoColor=white)
1013
![ARM](https://img.shields.io/static/v1?label=%E2%80%8B&message=ARM&color=yellow&logo=Raspberry%20Pi&logoColor=white)
1114
![Apple M](https://img.shields.io/static/v1?label=%E2%80%8B&message=Apple%20M&color=yellow&logo=Apple&logoColor=white)
@@ -51,18 +54,17 @@ For datasets containing up to 1000 viral genomes, Vclust is available at [http:/
5154
## Quick start
5255

5356
```bash
54-
# Clone repository and build Vclust
55-
git clone --recurse-submodules https://github.com/refresh-bio/vclust
56-
cd vclust && make -j
57+
# Install Vclust (requires Python >= 3.7)
58+
pip install vclust
5759

5860
# Prefilter similar genome sequence pairs before conducting pairwise alignments.
59-
./vclust.py prefilter -i example/multifasta.fna -o fltr.txt
61+
vclust prefilter -i example/multifasta.fna -o fltr.txt
6062

6163
# Align similar genome sequence pairs and calculate pairwise ANI measures.
62-
./vclust.py align -i example/multifasta.fna -o ani.tsv --filter fltr.txt
64+
vclust align -i example/multifasta.fna -o ani.tsv --filter fltr.txt
6365

6466
# Cluster genome sequences based on given ANI measure and minimum threshold.
65-
./vclust.py cluster -i ani.tsv -o clusters.tsv --ids ani.ids.tsv --metric ani --ani 0.95
67+
vclust cluster -i ani.tsv -o clusters.tsv --ids ani.ids.tsv --metric ani --ani 0.95
6668
```
6769
## Documentation
6870

pyproject.toml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
[build-system]
2+
requires = ["setuptools>=61.0.0", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[tool.setuptools]
6+
py-modules = ["vclust"]
7+
8+
[tool.setuptools.packages.find]
9+
where = ["./"]
10+
11+
[project]
12+
name = "vclust-test"
13+
description = """Fast and accurate tool for calculating \
14+
Average Nucleotide Identity (ANI) and clustering virus \
15+
genomes and metagenomic contigs"""
16+
readme = "README.md"
17+
license = { file = "LICENSE" }
18+
authors = [
19+
{ name = "Andrzej Zielezinski", email = "[email protected]" },
20+
{ name = "Adam Gudyś", email = "[email protected]" },
21+
{ name = "Sebastian Deorowicz", email = "[email protected]" },
22+
]
23+
requires-python = ">=3.7"
24+
dynamic = ["version"]
25+
classifiers = [
26+
"Development Status :: 5 - Production/Stable",
27+
"Natural Language :: English",
28+
"Intended Audience :: Developers",
29+
"Intended Audience :: Science/Research",
30+
"Topic :: Scientific/Engineering",
31+
"Topic :: Scientific/Engineering :: Bio-Informatics",
32+
"Operating System :: POSIX :: Linux",
33+
"Operating System :: MacOS",
34+
"License :: OSI Approved :: GNU Affero General Public License v3",
35+
"Programming Language :: Python :: 3",
36+
"Programming Language :: Python :: 3.7",
37+
"Programming Language :: Python :: 3.8",
38+
"Programming Language :: Python :: 3.9",
39+
"Programming Language :: Python :: 3.10",
40+
"Programming Language :: Python :: 3.11",
41+
"Programming Language :: Python :: 3.12",
42+
]
43+
44+
[tool.setuptools.dynamic]
45+
version = { attr = "vclust.__version__" }
46+
47+
[tool.setuptools.package-data]
48+
"*" = ["bin/*"]
49+
50+
[project.scripts]
51+
vclust = "vclust:main"
52+
53+
[project.urls]
54+
Homepage = "https://github.com/refresh-bio/vclust"
55+
Documentation = "https://github.com/refresh-bio/vclust/wiki"
56+
Website = "http://vclust.org"

vclust.py

Lines changed: 95 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
import multiprocessing
1010
import os
1111
import pathlib
12+
import platform
1213
import shutil
1314
import subprocess
1415
import sys
1516
import typing
1617
import uuid
1718

18-
__version__ = '1.2.7'
19+
__version__ = '1.2.8'
1920

2021
DEFAULT_THREAD_COUNT = min(multiprocessing.cpu_count(), 64)
2122

@@ -59,7 +60,7 @@ def ranged_float_type(value):
5960
return f
6061

6162
parser = argparse.ArgumentParser(
62-
description=f'%(prog)s v.{__version__}: calculate ANI and cluster '
63+
description=f'%(prog)s v{__version__}: calculate ANI and cluster '
6364
'virus (meta)genome sequences',
6465
add_help=False,
6566
)
@@ -117,7 +118,7 @@ def ranged_float_type(value):
117118
'--min-kmers',
118119
metavar="<int>",
119120
type=int,
120-
default=10,
121+
default=20,
121122
help='Filter genome pairs based on minimum number of shared k-mers '
122123
'[%(default)s]'
123124
)
@@ -531,7 +532,7 @@ def ranged_float_type(value):
531532
'--bin',
532533
metavar='<file>',
533534
type=pathlib.Path,
534-
dest="BIN_CLUSTY",
535+
dest="bin_clusty",
535536
default=f'{BIN_CLUSTY}',
536537
help='Path to the Clusty binary [%(default)s]'
537538
)
@@ -603,8 +604,8 @@ def get_uuid() -> str:
603604
return f'vclust-{str(uuid.uuid4().hex)[:10]}'
604605

605606

606-
def validate_binary(bin_path: pathlib.Path) -> pathlib.Path:
607-
"""Validates the existence and executability of a binary file.
607+
def _validate_binary(bin_path: pathlib.Path) -> pathlib.Path:
608+
"""Validates the presence and executability of a binary file.
608609
609610
This function checks if the provided path points to an existing binary file
610611
and if it is executable. It also attempts to run the binary to ensure it
@@ -618,16 +619,16 @@ def validate_binary(bin_path: pathlib.Path) -> pathlib.Path:
618619
pathlib.Path: The resolved path to the binary file.
619620
620621
Raises:
621-
SystemExit: If the binary file does not exist, is not executable, or
622-
if running the binary encounters an error.
622+
RuntimeError: If the binary file does not exist, is not executable,
623+
or if running the binary encounters an error.
623624
"""
624625
bin_path = bin_path.resolve()
625626

626627
if not bin_path.exists():
627-
exit(f'error: Executable not found: {bin_path}')
628+
raise RuntimeError(f'File not found: {bin_path}')
628629

629630
if not bin_path.is_file() or not os.access(bin_path, os.X_OK):
630-
exit(f'error: Binary file not executable: {bin_path}')
631+
raise RuntimeError(f'Binary file not executable: {bin_path}')
631632

632633
try:
633634
subprocess.run(
@@ -638,14 +639,21 @@ def validate_binary(bin_path: pathlib.Path) -> pathlib.Path:
638639
check=True
639640
)
640641
except subprocess.CalledProcessError as e:
641-
exit(f'error: Running {bin_path} failed with message: {e.stderr}')
642+
raise RuntimeError(f'Running {bin_path} failed with message: {e.stderr}')
642643
except OSError as e:
643-
exit(f'error: OSError in {bin_path} - {e}')
644+
raise RuntimeError(f'OSError in {bin_path} - {e}')
644645
except Exception as e:
645-
exit(f'error: Unexpected error in binary {bin_path} - {e}')
646+
raise RuntimeError(f'Unexpected error in binary {bin_path} - {e}')
646647
return bin_path
647648

648649

650+
def validate_binary(bin_path: pathlib.Path) -> pathlib.Path:
651+
try:
652+
return _validate_binary(bin_path)
653+
except RuntimeError as e:
654+
sys.exit(f'error: {e}')
655+
656+
649657
def validate_args_fasta_input(args, parser) -> argparse.Namespace:
650658
"""Validates the arguments for FASTA input."""
651659
args.is_multifasta = True
@@ -732,13 +740,13 @@ def run(
732740
)
733741
except subprocess.CalledProcessError as e:
734742
logger.error(f'Process {" ".join(cmd)} failed with message: {e.stderr}')
735-
exit(1)
743+
sys.exit(1)
736744
except OSError as e:
737745
logger.error(f'OSError: {" ".join(cmd)} failed with message: {e}')
738-
exit(1)
746+
sys.exit(1)
739747
except Exception as e:
740748
logger.error(f'Unexpected: {" ".join(cmd)} failed with message: {e}')
741-
exit(1)
749+
sys.exit(1)
742750
logger.info(f'Done')
743751
return process
744752

@@ -1145,11 +1153,75 @@ def cmd_clusty(
11451153
return cmd
11461154

11471155

1148-
def vclust_info():
1149-
print(f'Vclust {__version__}')
1150-
for bin_path in [BIN_KMERDB, BIN_FASTASPLIT, BIN_LZANI, BIN_CLUSTY]:
1151-
validate_binary(bin_path)
1152-
print(f'{bin_path.name:<20} ok')
1156+
def vclust_info() -> None:
1157+
"""
1158+
Displays the Vclust version, installation paths, and binary dependencies.
1159+
Checks for the presence and executable status of required binaries.
1160+
1161+
Exits with a non-zero status if any dependencies are missing or
1162+
not executable.
1163+
1164+
Returns:
1165+
None
1166+
1167+
Raises:
1168+
SystemExit: If any binary dependencies are missing or not executable.
1169+
1170+
"""
1171+
# ANSI color codes for terminal output.
1172+
GREEN = '\033[92m'
1173+
RED = '\033[91m'
1174+
RESET = '\033[0m'
1175+
1176+
binaries = {
1177+
'Kmer-db': BIN_KMERDB,
1178+
'LZ-ANI': BIN_LZANI,
1179+
'Clusty': BIN_CLUSTY,
1180+
'multi-fasta-split': BIN_FASTASPLIT,
1181+
}
1182+
1183+
output_lines = [
1184+
f'Vclust version {__version__} (Python {platform.python_version()})',
1185+
'',
1186+
'Installed at:',
1187+
f' {pathlib.Path(__file__).resolve()}',
1188+
f' {BIN_DIR.resolve()}',
1189+
'',
1190+
'Binary dependencies:',
1191+
]
1192+
1193+
errors = [] # List to collect any errors encountered during binary checks.
1194+
1195+
# Check each binary's presence and version.
1196+
for name, path in binaries.items():
1197+
try:
1198+
_validate_binary(path)
1199+
version = subprocess.run(
1200+
[str(path), '-version' if name == 'Kmer-db' else '--version'],
1201+
stdout=subprocess.PIPE,
1202+
stderr=subprocess.PIPE,
1203+
text=True,
1204+
check=True
1205+
).stderr.strip()
1206+
output_lines.append(f' {name:<20} v{version:<10}')
1207+
except Exception as e:
1208+
output_lines.append(f' {name:<20} [error]')
1209+
errors.append((name, e))
1210+
1211+
# Append the status summary based on any encountered errors.
1212+
output_lines.append('')
1213+
1214+
if errors:
1215+
output_lines.append(f'{RED}Status: error{RESET}')
1216+
output_lines.extend(f" - {name}: {error}" for name, error in errors)
1217+
else:
1218+
output_lines.append(f'{GREEN}Status: ok{RESET}')
1219+
1220+
# Output the complete information.
1221+
print('\n'.join(output_lines))
1222+
1223+
if errors:
1224+
sys.exit(1)
11531225

11541226

11551227
class CustomHelpFormatter(argparse.HelpFormatter):
@@ -1324,7 +1396,7 @@ def main():
13241396

13251397
# Cluster
13261398
elif args.command == 'cluster':
1327-
args.BIN_CLUSTY = validate_binary(args.BIN_CLUSTY)
1399+
args.bin_clusty = validate_binary(args.bin_clusty)
13281400
args = validate_args_cluster(args, parser)
13291401

13301402
cmd = cmd_clusty(
@@ -1344,7 +1416,7 @@ def main():
13441416
leiden_resolution=args.leiden_resolution,
13451417
leiden_beta=args.leiden_beta,
13461418
leiden_iterations=args.leiden_iterations,
1347-
bin_path=args.BIN_CLUSTY,
1419+
bin_path=args.bin_clusty,
13481420
)
13491421
p = run(cmd, args.verbose, logger)
13501422

0 commit comments

Comments
 (0)