|
4 | 4 | http://www.imgt.org/IMGT_vquest/analysis |
5 | 5 | """ |
6 | 6 |
|
7 | | -import sys |
8 | | -import csv |
9 | 7 | import logging |
10 | | -import time |
11 | | -from io import StringIO, BytesIO |
12 | | -from pathlib import Path |
13 | | -import yaml |
14 | | -import requests |
15 | | -from requests_html import HTML |
16 | | -from Bio import SeqIO |
17 | | -from .util import unzip, chunker |
18 | | - |
19 | | -__version__ = "0.0.5" |
20 | | -URL = "http://www.imgt.org/IMGT_vquest/analysis" |
21 | | -DELAY = 1 # for rate-limiting multiple requests |
22 | | -CHUNK_SIZE = 50 # to stay within V-QUEST's limit on sequences in one go |
| 8 | +from .version import __version__ |
23 | 9 | LOGGER = logging.getLogger(__name__) |
24 | 10 | LOGGER.propagate = False |
25 | 11 | LOGGER.addHandler(logging.StreamHandler()) |
26 | | - |
27 | | -class VquestError(Exception): |
28 | | - """Vquest-related errors. These can have one or more messages provided by the server.""" |
29 | | - |
30 | | - def __init__(self, message, server_messages=None): |
31 | | - self.message = message |
32 | | - self.server_messages = server_messages |
33 | | - super().__init__(self.message) |
34 | | - |
35 | | -def _parse_records(config): |
36 | | - """Extract Seq records for sequences given in config""" |
37 | | - records = [] |
38 | | - if "sequences" in config and config["sequences"]: |
39 | | - with StringIO(config["sequences"]) as seqs_stream: |
40 | | - records.extend(list(SeqIO.parse(seqs_stream, "fasta"))) |
41 | | - if "fileSequences" in config and config["fileSequences"]: |
42 | | - with open(config["fileSequences"]) as f_in: |
43 | | - records.extend(list(SeqIO.parse(f_in, "fasta"))) |
44 | | - return records |
45 | | - |
46 | | -def vquest(config): |
47 | | - """Submit a request to V-QUEST""" |
48 | | - if not all([ |
49 | | - config.get("species"), |
50 | | - config.get("receptorOrLocusType"), |
51 | | - config.get("fileSequences") or config.get("sequences")]): |
52 | | - raise ValueError( |
53 | | - "species, receptorOrLocusType, and fileSequences " |
54 | | - "and/or sequences are required options") |
55 | | - # species, receptorOrLocusType, and either fileSequences or sequences |
56 | | - supported = [("resultType", "excel"), ("xv_outputtype", 3)] |
57 | | - if all([config.get(pair[0]) == pair[1] for pair in supported]): |
58 | | - output = {} |
59 | | - records = _parse_records(config) |
60 | | - if not records: |
61 | | - raise ValueError("No sequences supplied") |
62 | | - LOGGER.info("Starting request batch for %d sequences total", len(records)) |
63 | | - for chunk in chunker(records, CHUNK_SIZE): |
64 | | - if output: |
65 | | - time.sleep(DELAY) |
66 | | - LOGGER.info("Sending request with %d sequences...", len(chunk)) |
67 | | - out_handle = StringIO() |
68 | | - SeqIO.write(chunk, out_handle, "fasta") |
69 | | - config_chunk = config.copy() |
70 | | - config_chunk["sequences"] = out_handle.getvalue() |
71 | | - config_chunk["inputType"] = "inline" |
72 | | - response = requests.post(URL, data = config_chunk) |
73 | | - ctype = response.headers.get("Content-Type") |
74 | | - LOGGER.debug("Received data of type %s", ctype) |
75 | | - if ctype and "text/html" in ctype: |
76 | | - html = HTML(html=response.content) |
77 | | - errors = [div.text for div in html.find("div.form_error")] |
78 | | - if errors: |
79 | | - raise VquestError("; ".join(errors), errors) |
80 | | - response = unzip(response.content) |
81 | | - # Only keep one copy of the Parameters.txt data, but append rows |
82 | | - # (minus header) of vquest_airr.tsv together |
83 | | - if "Parameters.txt" not in output: |
84 | | - output["Parameters.txt"] = response["Parameters.txt"].decode() |
85 | | - if "vquest_airr.tsv" not in output: |
86 | | - output["vquest_airr.tsv"] = response["vquest_airr.tsv"].decode() |
87 | | - else: |
88 | | - airr = response["vquest_airr.tsv"].decode() |
89 | | - output["vquest_airr.tsv"] += "\n".join(airr.splitlines()[1:]) |
90 | | - return output |
91 | | - needed = " ".join([pair[0] + "=" + str(pair[1]) for pair in supported]) |
92 | | - observed = " ".join([pair[0] + "=" + str(config.get(pair[0])) for pair in supported]) |
93 | | - raise NotImplementedError(("Only " + needed + " currently supported, not " + observed)) |
94 | | - |
95 | | -def airr_to_fasta( |
96 | | - airr_txt, |
97 | | - seqid_col="sequence_id", aln_col="sequence_alignment", fallback_col="sequence"): |
98 | | - """Convert AIRR TSV table to FASTA, both as strings. |
99 | | -
|
100 | | - If the alignment column is empty for a given row, the sequence will be |
101 | | - taken from fallback_col, if provided. |
102 | | - """ |
103 | | - reader = csv.DictReader(StringIO(airr_txt), delimiter="\t") |
104 | | - fasta = "" |
105 | | - for row in reader: |
106 | | - seq = row[aln_col] |
107 | | - if fallback_col: |
108 | | - seq = seq or row[fallback_col] |
109 | | - fasta += ">%s\n%s\n" % (row[seqid_col], seq) |
110 | | - return fasta |
111 | | - |
112 | | -def load_config(path): |
113 | | - """Load YAML config file.""" |
114 | | - LOGGER.debug("Loading config file: %s", path) |
115 | | - with open(path) as f_in: |
116 | | - config = yaml.load(f_in, Loader=yaml.SafeLoader) |
117 | | - return config |
118 | | - |
119 | | -def layer_configs(*configs): |
120 | | - """Merge dictionaries one after the other. |
121 | | -
|
122 | | - The result is a shallow copy of the pairs in each input dictionary. |
123 | | - """ |
124 | | - config_full = configs[0].copy() |
125 | | - for config in configs[1:]: |
126 | | - config_full.update(config) |
127 | | - return config_full |
128 | | - |
129 | | -def __load_options(): |
130 | | - data = load_config(Path(__file__).parent / "data" / "options.yml") |
131 | | - mapping = {"int": int, "bool": bool, "str": str} |
132 | | - for opt_section in data: |
133 | | - for val in opt_section["options"].values(): |
134 | | - try: |
135 | | - val["values"] = mapping.get(val["values"], val["values"]) |
136 | | - except TypeError: |
137 | | - pass |
138 | | - return data |
139 | | - |
140 | | -def __load_default_config(): |
141 | | - return load_config(Path(__file__).parent / "data" / "defaults.yml") |
142 | | - |
143 | | -DEFAULTS = __load_default_config() |
144 | | -OPTIONS = __load_options() |
0 commit comments