Skip to content

Commit a15f1b5

Browse files
authored
Merge pull request #14 from ressy/release-0.0.6
Release 0.0.6
2 parents d940e15 + 1d03e9d commit a15f1b5

File tree

8 files changed

+194
-153
lines changed

8 files changed

+194
-153
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# Changelog
22

3+
## 0.0.6 - 2021-03-12
4+
5+
### Changed
6+
7+
* Refactored top-level code into separate modules ([#13])
8+
9+
### Fixed
10+
11+
* Avoid trying to import dependencies at install time ([#13])
12+
13+
[#13]: https://github.com/ressy/vquest/pull/13
14+
315
## 0.0.5 - 2021-03-10
416

517
### Added

test_vquest/test_vquest.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
from contextlib import redirect_stdout, redirect_stderr
1616
from pathlib import Path
1717
from io import StringIO
18-
import vquest
19-
import vquest.__main__
18+
from vquest.request import vquest
19+
from vquest.util import VquestError
20+
from vquest.__main__ import main
2021

2122
class TestVquestBase(unittest.TestCase):
2223
"""Base class for supporting code. No actual tests here."""
@@ -85,7 +86,7 @@ def test_vquest(self):
8586
GAGCCTCTTGGATAGTGACGGGTACACCTGTTTGGACTGGTACCTGCAGAAGCCAGGCCAGTCTCCACAGCTCCTGATCT
8687
ATGAGGTTTCCAACCGGGTCTCTGGAGTCCCTGACAGGTTCAGTGGCAGTGGGTCAGNCACTGATTTCACACTGAAAATC
8788
AGCCGGGTGGAAGCTGAGGATGTTGGGGTGTATTACTGTATGCAAAGTATAGAGTTTCCTCC"""}
88-
result = vquest.vquest(config)
89+
result = vquest(config)
8990
# requests.post should have been called once, with this input.
9091
self.assertEqual(self.post.call_count, 1)
9192
self.assertEqual(
@@ -145,7 +146,7 @@ def test_vquest_main(self):
145146
config_path = str((self.path / "config.yml").resolve())
146147
with tempfile.TemporaryDirectory() as tempdir:
147148
os.chdir(tempdir)
148-
vquest.__main__.main([config_path])
149+
main([config_path])
149150
self.assertTrue(Path("vquest_airr.tsv").exists())
150151
self.assertTrue(Path("Parameters.txt").exists())
151152

@@ -164,7 +165,7 @@ def test_vquest_main_alignment(self):
164165
with redirect_stdout(out), redirect_stderr(err):
165166
with tempfile.TemporaryDirectory() as tempdir:
166167
os.chdir(tempdir)
167-
vquest.__main__.main([config_path, "--align"])
168+
main([config_path, "--align"])
168169
self.assertFalse(Path("vquest_airr.tsv").exists())
169170
self.assertFalse(Path("Parameters.txt").exists())
170171
self.assertEqual(out.getvalue(), expected)
@@ -177,7 +178,7 @@ class TestVquestEmpty(TestVquestSimple):
177178
def test_vquest(self):
178179
"""Test that an empty config fails as expected."""
179180
with self.assertRaises(ValueError):
180-
vquest.vquest({})
181+
vquest({})
181182

182183
def test_vquest_main(self):
183184
"""Test how the command-line interface handles no arguments.
@@ -191,7 +192,7 @@ def test_vquest_main(self):
191192
with self.assertRaises(SystemExit):
192193
with tempfile.TemporaryDirectory() as tempdir:
193194
os.chdir(tempdir)
194-
vquest.__main__.main([])
195+
main([])
195196
self.assertNotEqual(out.getvalue(), "")
196197
self.assertEqual(err.getvalue(), "")
197198

@@ -206,7 +207,7 @@ def test_vquest_main_alignment(self):
206207
with self.assertRaises(SystemExit):
207208
with tempfile.TemporaryDirectory() as tempdir:
208209
os.chdir(tempdir)
209-
vquest.__main__.main(["--align"])
210+
main(["--align"])
210211
self.assertNotEqual(out.getvalue(), "")
211212
self.assertEqual(err.getvalue(), "")
212213

@@ -233,7 +234,7 @@ def test_vquest(self):
233234
GAGCCTCTTGGATAGTGACGGGTACACCTGTTTGGACTGGTACCTGCAGAAGCCAGGCCAGTCTCCACAGCTCCTGATCT
234235
ATGAGGTTTCCAACCGGGTCTCTGGAGTCCCTGACAGGTTCAGTGGCAGTGGGTCAGNCACTGATTTCACACTGAAAATC
235236
AGCCGGGTGGAAGCTGAGGATGTTGGGGTGTATTACTGTATGCAAAGTATAGAGTTTCCTCC"""}
236-
result = vquest.vquest(config)
237+
result = vquest(config)
237238
parameters = [("Date", "Wed Dec 02 19:18:14 CET 2020"),
238239
("IMGT/V-QUEST program version", "3.5.21"),
239240
("IMGT/V-QUEST reference directory release", "202049-2"),
@@ -272,7 +273,7 @@ def test_vquest_main(self):
272273
config_path = str((self.path / "config.yml").resolve())
273274
with tempfile.TemporaryDirectory() as tempdir:
274275
os.chdir(tempdir)
275-
vquest.__main__.main(["--imgtrefdirset", "1", config_path])
276+
main(["--imgtrefdirset", "1", config_path])
276277
self.assertTrue(Path("vquest_airr.tsv").exists())
277278
self.assertTrue(Path("Parameters.txt").exists())
278279

@@ -296,8 +297,8 @@ def test_vquest(self):
296297
GAGCCTCTTGGATAGTGACGGGTACACCTGTTTGGACTGGTACCTGCAGAAGCCAGGCCAGTCTCCACAGCTCCTGATCT
297298
ATGAGGTTTCCAACCGGGTCTCTGGAGTCCCTGACAGGTTCAGTGGCAGTGGGTCAGNCACTGATTTCACACTGAAAATC
298299
AGCCGGGTGGAAGCTGAGGATGTTGGGGTGTATTACTGTATGCAAAGTATAGAGTTTCCTCC"""}
299-
with self.assertRaises(vquest.VquestError) as context:
300-
vquest.vquest(config)
300+
with self.assertRaises(VquestError) as context:
301+
vquest(config)
301302
self.assertEqual(
302303
context.exception.server_messages,
303304
["The receptor type or locus is not available for this species"])

vquest/__init__.py

Lines changed: 1 addition & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -4,141 +4,8 @@
44
http://www.imgt.org/IMGT_vquest/analysis
55
"""
66

7-
import sys
8-
import csv
97
import logging
10-
import time
11-
from io import StringIO, BytesIO
12-
from pathlib import Path
13-
import yaml
14-
import requests
15-
from requests_html import HTML
16-
from Bio import SeqIO
17-
from .util import unzip, chunker
18-
19-
__version__ = "0.0.5"
20-
URL = "http://www.imgt.org/IMGT_vquest/analysis"
21-
DELAY = 1 # for rate-limiting multiple requests
22-
CHUNK_SIZE = 50 # to stay within V-QUEST's limit on sequences in one go
8+
from .version import __version__
239
LOGGER = logging.getLogger(__name__)
2410
LOGGER.propagate = False
2511
LOGGER.addHandler(logging.StreamHandler())
26-
27-
class VquestError(Exception):
28-
"""Vquest-related errors. These can have one or more messages provided by the server."""
29-
30-
def __init__(self, message, server_messages=None):
31-
self.message = message
32-
self.server_messages = server_messages
33-
super().__init__(self.message)
34-
35-
def _parse_records(config):
36-
"""Extract Seq records for sequences given in config"""
37-
records = []
38-
if "sequences" in config and config["sequences"]:
39-
with StringIO(config["sequences"]) as seqs_stream:
40-
records.extend(list(SeqIO.parse(seqs_stream, "fasta")))
41-
if "fileSequences" in config and config["fileSequences"]:
42-
with open(config["fileSequences"]) as f_in:
43-
records.extend(list(SeqIO.parse(f_in, "fasta")))
44-
return records
45-
46-
def vquest(config):
47-
"""Submit a request to V-QUEST"""
48-
if not all([
49-
config.get("species"),
50-
config.get("receptorOrLocusType"),
51-
config.get("fileSequences") or config.get("sequences")]):
52-
raise ValueError(
53-
"species, receptorOrLocusType, and fileSequences "
54-
"and/or sequences are required options")
55-
# species, receptorOrLocusType, and either fileSequences or sequences
56-
supported = [("resultType", "excel"), ("xv_outputtype", 3)]
57-
if all([config.get(pair[0]) == pair[1] for pair in supported]):
58-
output = {}
59-
records = _parse_records(config)
60-
if not records:
61-
raise ValueError("No sequences supplied")
62-
LOGGER.info("Starting request batch for %d sequences total", len(records))
63-
for chunk in chunker(records, CHUNK_SIZE):
64-
if output:
65-
time.sleep(DELAY)
66-
LOGGER.info("Sending request with %d sequences...", len(chunk))
67-
out_handle = StringIO()
68-
SeqIO.write(chunk, out_handle, "fasta")
69-
config_chunk = config.copy()
70-
config_chunk["sequences"] = out_handle.getvalue()
71-
config_chunk["inputType"] = "inline"
72-
response = requests.post(URL, data = config_chunk)
73-
ctype = response.headers.get("Content-Type")
74-
LOGGER.debug("Received data of type %s", ctype)
75-
if ctype and "text/html" in ctype:
76-
html = HTML(html=response.content)
77-
errors = [div.text for div in html.find("div.form_error")]
78-
if errors:
79-
raise VquestError("; ".join(errors), errors)
80-
response = unzip(response.content)
81-
# Only keep one copy of the Parameters.txt data, but append rows
82-
# (minus header) of vquest_airr.tsv together
83-
if "Parameters.txt" not in output:
84-
output["Parameters.txt"] = response["Parameters.txt"].decode()
85-
if "vquest_airr.tsv" not in output:
86-
output["vquest_airr.tsv"] = response["vquest_airr.tsv"].decode()
87-
else:
88-
airr = response["vquest_airr.tsv"].decode()
89-
output["vquest_airr.tsv"] += "\n".join(airr.splitlines()[1:])
90-
return output
91-
needed = " ".join([pair[0] + "=" + str(pair[1]) for pair in supported])
92-
observed = " ".join([pair[0] + "=" + str(config.get(pair[0])) for pair in supported])
93-
raise NotImplementedError(("Only " + needed + " currently supported, not " + observed))
94-
95-
def airr_to_fasta(
96-
airr_txt,
97-
seqid_col="sequence_id", aln_col="sequence_alignment", fallback_col="sequence"):
98-
"""Convert AIRR TSV table to FASTA, both as strings.
99-
100-
If the alignment column is empty for a given row, the sequence will be
101-
taken from fallback_col, if provided.
102-
"""
103-
reader = csv.DictReader(StringIO(airr_txt), delimiter="\t")
104-
fasta = ""
105-
for row in reader:
106-
seq = row[aln_col]
107-
if fallback_col:
108-
seq = seq or row[fallback_col]
109-
fasta += ">%s\n%s\n" % (row[seqid_col], seq)
110-
return fasta
111-
112-
def load_config(path):
113-
"""Load YAML config file."""
114-
LOGGER.debug("Loading config file: %s", path)
115-
with open(path) as f_in:
116-
config = yaml.load(f_in, Loader=yaml.SafeLoader)
117-
return config
118-
119-
def layer_configs(*configs):
120-
"""Merge dictionaries one after the other.
121-
122-
The result is a shallow copy of the pairs in each input dictionary.
123-
"""
124-
config_full = configs[0].copy()
125-
for config in configs[1:]:
126-
config_full.update(config)
127-
return config_full
128-
129-
def __load_options():
130-
data = load_config(Path(__file__).parent / "data" / "options.yml")
131-
mapping = {"int": int, "bool": bool, "str": str}
132-
for opt_section in data:
133-
for val in opt_section["options"].values():
134-
try:
135-
val["values"] = mapping.get(val["values"], val["values"])
136-
except TypeError:
137-
pass
138-
return data
139-
140-
def __load_default_config():
141-
return load_config(Path(__file__).parent / "data" / "defaults.yml")
142-
143-
DEFAULTS = __load_default_config()
144-
OPTIONS = __load_options()

vquest/__main__.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
import logging
77
import argparse
88
import vquest
9-
from . import LOGGER, DEFAULTS, OPTIONS
10-
from . import load_config
11-
from . import layer_configs
9+
from vquest import LOGGER
10+
from . import request
11+
from .config import DEFAULTS, OPTIONS, load_config, layer_configs
12+
from .util import airr_to_fasta
13+
from .version import __version__
1214

1315
def main(arglist=None):
1416
"""Command-line interface for V-QUEST requests"""
@@ -46,10 +48,10 @@ def main(arglist=None):
4648
LOGGER.debug("final config: %s",
4749
" ".join(["%s=%s" % (key, val) for key, val in config_full.items()]))
4850
LOGGER.info("Configuration prepared")
49-
output = vquest.vquest(config_full)
51+
output = request.vquest(config_full)
5052
if args.align:
5153
LOGGER.info("Writing FASTA to stdout")
52-
print(vquest.airr_to_fasta(output["vquest_airr.tsv"]), end="")
54+
print(airr_to_fasta(output["vquest_airr.tsv"]), end="")
5355
else:
5456
LOGGER.info("Writing vquest_airr.tsv")
5557
with open("vquest_airr.tsv", "wt") as f_out:
@@ -68,7 +70,7 @@ def __setup_arg_parser():
6870
"--verbose", "-v", action="count", default=0,
6971
help="increase logging verbosity")
7072
parser.add_argument(
71-
"--version", "-V", action="version", version=vquest.__version__)
73+
"--version", "-V", action="version", version=__version__)
7274
parser.add_argument(
7375
"--align", "-a", action="store_true",
7476
help=("Instead of writing results to files, "

vquest/config.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
Tools to manage configurations and default options.
3+
"""
4+
5+
from pathlib import Path
6+
import logging
7+
import yaml
8+
9+
LOGGER = logging.getLogger(__name__)
10+
11+
def load_config(path):
12+
"""Load YAML config file."""
13+
LOGGER.debug("Loading config file: %s", path)
14+
with open(path) as f_in:
15+
config = yaml.load(f_in, Loader=yaml.SafeLoader)
16+
return config
17+
18+
def layer_configs(*configs):
19+
"""Merge dictionaries one after the other.
20+
21+
The result is a shallow copy of the pairs in each input dictionary.
22+
"""
23+
config_full = configs[0].copy()
24+
for config in configs[1:]:
25+
config_full.update(config)
26+
return config_full
27+
28+
def __load_options():
29+
data = load_config(Path(__file__).parent / "data" / "options.yml")
30+
mapping = {"int": int, "bool": bool, "str": str}
31+
for opt_section in data:
32+
for val in opt_section["options"].values():
33+
try:
34+
val["values"] = mapping.get(val["values"], val["values"])
35+
except TypeError:
36+
pass
37+
return data
38+
39+
def __load_default_config():
40+
return load_config(Path(__file__).parent / "data" / "defaults.yml")
41+
42+
DEFAULTS = __load_default_config()
43+
OPTIONS = __load_options()

0 commit comments

Comments
 (0)