Skip to content

Commit 2736eb6

Browse files
Merge pull request #109 from RIVM-bioinformatics/IDSBIO-472-refactor-amplicon-covs
refactor: amplicon_covs.py
2 parents 28464ac + 8635472 commit 2736eb6

File tree

3 files changed

+701
-275
lines changed

3 files changed

+701
-275
lines changed

ViroConstrictor/workflow/scripts/__init__.py

Whitespace-only changes.
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
"""
2+
This module provides functions for parsing and validating command-line arguments for the amplicon_covs script.
3+
4+
The script requires the following input files:
5+
- A BED file with primers as given by AmpliGone.
6+
- A TSV file with coverages as given by TrueConsense.
7+
- A sample ID.
8+
- An output TSV file for average coverage per amplicon.
9+
10+
The module includes the following functions:
11+
- _common_file_checks: Perform common checks on a file to ensure it exists, is not empty, and is readable.
12+
- check_primers_file: Validate the primers file to ensure it meets the required criteria.
13+
- check_coverages_file: Validate the coverages file to ensure it meets the required criteria.
14+
- check_output_file: Validate the output file to ensure it meets the required criteria.
15+
- parse_args: Parse and validate command-line arguments for the script.
16+
17+
Each function raises an ArgumentTypeError if the input does not meet the required criteria.
18+
19+
Example usage:
20+
--------------
21+
To use this module, import it and call the parse_args function with the appropriate arguments.
22+
23+
from amplicon_arg_parser import parse_args
24+
25+
args = parse_args()
26+
print(args.primers)
27+
print(args.coverages)
28+
print(args.key)
29+
print(args.output)
30+
"""
31+
32+
import os
33+
from argparse import ArgumentParser, ArgumentTypeError, Namespace
34+
35+
36+
def _common_file_checks(filename: str) -> None:
37+
"""
38+
Perform common checks on a file to ensure it exists, is not empty, and is readable.
39+
40+
This function raises an ArgumentTypeError if any of the following conditions are not met:
41+
- The file exists.
42+
- The file is not empty.
43+
- The file is readable.
44+
45+
Parameters
46+
----------
47+
filename : str
48+
The path to the file to be checked.
49+
50+
Raises
51+
------
52+
ArgumentTypeError
53+
If the file does not exist, is empty, or is not readable.
54+
"""
55+
if not os.path.isfile(filename):
56+
raise ArgumentTypeError(f"File '{filename}' does not exist.")
57+
58+
if os.path.getsize(filename) == 0:
59+
raise ArgumentTypeError(f"File '{filename}' is empty.")
60+
61+
if not os.access(filename, os.R_OK):
62+
raise ArgumentTypeError(f"File '{filename}' is not readable.")
63+
64+
65+
def check_primers_file(filename: str) -> str:
66+
"""
67+
Validate the primers file to ensure it meets the required criteria.
68+
Does not open the file or check its contents.
69+
70+
This function performs the following checks:
71+
- The file exists, is not empty, and is readable (using _common_file_checks).
72+
- The file has a .bed extension.
73+
74+
Parameters
75+
----------
76+
filename : str
77+
The path to the primers file to be checked.
78+
79+
Returns
80+
-------
81+
str
82+
The validated filename.
83+
84+
Raises
85+
------
86+
ArgumentTypeError
87+
If the file does not exist, is empty, is not readable, or does not have a .bed extension.
88+
"""
89+
_common_file_checks(filename)
90+
if not filename.lower().endswith(".bed"):
91+
raise ArgumentTypeError("Primers file must be a BED file.")
92+
93+
return filename
94+
95+
96+
def check_coverages_file(filename: str) -> str:
97+
"""
98+
Validate the coverages file to ensure it meets the required criteria.
99+
Does not open the file or check its contents.
100+
101+
This function performs the following checks:
102+
- The file exists, is not empty, and is readable (using _common_file_checks).
103+
- The file has a .tsv extension.
104+
105+
Parameters
106+
----------
107+
filename : str
108+
The path to the coverages file to be checked.
109+
110+
Returns
111+
-------
112+
str
113+
The validated filename.
114+
115+
Raises
116+
------
117+
ArgumentTypeError
118+
If the file does not exist, is empty, is not readable, or does not have a .tsv extension.
119+
"""
120+
_common_file_checks(filename)
121+
122+
if not filename.lower().endswith(".tsv"):
123+
raise ArgumentTypeError("Coverages file must be a TSV file.")
124+
125+
return filename
126+
127+
128+
def check_output_file(filename: str) -> str:
129+
"""
130+
Validate the output file to ensure it meets the required criteria.
131+
Does not open the file or check its contents.
132+
133+
This function performs the following checks:
134+
- The file has a .csv extension.
135+
- The file does not already exist.
136+
- The directory containing the file is writable.
137+
138+
Parameters
139+
----------
140+
filename : str
141+
The path to the output file to be checked.
142+
143+
Returns
144+
-------
145+
str
146+
The validated filename.
147+
148+
Raises
149+
------
150+
ArgumentTypeError
151+
If the file does not have a .csv extension, already exists, or the directory is not writable.
152+
"""
153+
if not filename.lower().endswith(".csv"):
154+
raise ArgumentTypeError("Output file must be a CSV file.")
155+
156+
if os.path.isfile(filename):
157+
raise ArgumentTypeError(
158+
f"Output file '{filename}' already exists. Please choose another name."
159+
)
160+
161+
if not os.access(os.path.dirname(filename), os.W_OK):
162+
raise ArgumentTypeError(
163+
f"Directory '{os.path.dirname(filename)}' is not writable."
164+
)
165+
166+
return filename
167+
168+
169+
def parse_args(args: list[str] | None = None) -> Namespace:
170+
"""
171+
Parse command-line arguments for the amplicov_covs script.
172+
173+
This function sets up the argument parser and defines the required arguments for the script.
174+
It validates the input files using the specified check functions.
175+
176+
Parameters
177+
----------
178+
args : list[str], optional
179+
A list of command-line arguments to parse. If None, the arguments are taken from sys.argv.
180+
This parameter is used for testing purposes.
181+
182+
Returns
183+
-------
184+
Namespace
185+
An argparse.Namespace object containing the parsed arguments.
186+
187+
Arguments
188+
---------
189+
--primers : File
190+
Input BED file with primers as given by AmpliGone. This file is validated by check_primers_file.
191+
192+
--coverages : File
193+
Input file with coverages as given by TrueConsense. This file is validated by check_coverages_file.
194+
195+
--key : String
196+
Sample ID.
197+
198+
--output : File
199+
Output file with average coverage per amplicon. This file is validated by check_output_file.
200+
201+
Raises
202+
------
203+
ArgumentTypeError
204+
If any of the input files do not meet the required criteria.
205+
"""
206+
parser = ArgumentParser()
207+
208+
parser.add_argument(
209+
"--primers",
210+
metavar="File",
211+
type=check_primers_file,
212+
help="input BED file with primers as given by AmpliGone",
213+
required=True,
214+
)
215+
216+
parser.add_argument(
217+
"--coverages",
218+
metavar="File",
219+
type=check_coverages_file,
220+
help="Input file with coverages as given by TrueConsense",
221+
required=True,
222+
)
223+
224+
parser.add_argument(
225+
"--key",
226+
metavar="String",
227+
type=str,
228+
help="Sample ID",
229+
required=True,
230+
)
231+
232+
parser.add_argument(
233+
"--output",
234+
metavar="File",
235+
type=check_output_file,
236+
help="Output file with average coverage per amplicon",
237+
required=True,
238+
)
239+
240+
return parser.parse_args(args)

0 commit comments

Comments
 (0)