Skip to content

Commit 809130d

Browse files
authored
Merge pull request #64 from ShawHahnLab/release-0.5.1
Version 0.5.1
2 parents 30ae20e + e678bbf commit 809130d

File tree

1,289 files changed

+228
-70
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,289 files changed

+228
-70
lines changed

CHANGELOG.md

Lines changed: 24 additions & 0 deletions

conda/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html
2-
{% set version = "0.5.0" %}
2+
{% set version = "0.5.1" %}
33
{% set build = "0" %}
44

55
package:

igseq/convert.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,12 @@ def convert(path_in, path_out, fmt_in=None, fmt_out=None, colmap=None, dummyqual
2020
with RecordReader(path_in, fmt_in, colmap, dry_run=dry_run) as reader, \
2121
RecordWriter(path_out, fmt_out, colmap, dummyqual=dummyqual, dry_run=dry_run) as writer:
2222
for record in reader:
23+
# special case for descriptions: they may or may not exist on any
24+
# particular record for seq input, but for tabular output, we have
25+
# to have consistent columns. So in that case make sure to include
26+
# a description column by forcing one for the first record to be
27+
# written.
28+
if not writer.writer and writer.tabular and not reader.tabular:
29+
key = reader.colmap["sequence_description"]
30+
record[key] = record.get(key, "")
2331
writer.write(record)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sequence_id,sequence
2-
seqid,ACTGACTGACTGACTG
1+
sequence_id,sequence,sequence_description
2+
seqid,ACTGACTGACTGACTG,

igseq/getreads.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
those options.
1616
"""
1717

18+
import sys
1819
import logging
1920
import subprocess
2021
from tempfile import NamedTemporaryFile
@@ -98,10 +99,6 @@ def getreads(path_input, dir_out, path_counts="", extra_args=None, threads_load=
9899
"--sample-sheet", sample_sheet.name,
99100
# parallel processing during loading can help a bit in my tests
100101
"--loading-threads", threads_load,
101-
# parallel processing does *not* help during the bcl2fastq
102-
# demultiplexing step, go figure, when we don't have any
103-
# demultiplexing to perform here
104-
"--demultiplexing-threads", 1,
105102
# parallel processing in the processing step helps quite a bit
106103
"--processing-threads", threads_proc,
107104
# help text says "this must not be higher than number of
@@ -165,4 +162,6 @@ def _run_bcl2fastq(args, extra_args=None):
165162
raise util.IgSeqError(f"bcl2fastq arg collision from extra arguments: {shared}")
166163
args += extra_args
167164
LOGGER.info("bcl2fastq command: %s", args)
168-
subprocess.run(args, check=True)
165+
proc = subprocess.run(args, check=True, capture_output=True, text=True)
166+
sys.stdout.write(proc.stdout)
167+
sys.stderr.write(proc.stderr)

igseq/identity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
1212
The scoring is based on a simple global pairwise alignment, with matches scored
1313
as 1, mismatches and gaps 0. Any existing gaps are removed before comparing
14-
sequences, and differeces in case (lower/upper) are disregarded.
14+
sequences, and differences in case (lower/upper) are disregarded.
1515
"""
1616

1717
import logging

igseq/igblast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
command, so you can configure things like the output format and file path. See
1111
igblastn -help for those options. Any igblastn argument can be given with two
1212
dashes if needed to force igseq to handle it correctly (for example,
13-
-num_alignments_V will be interprted as -n um_alignments_V, but
13+
-num_alignments_V will be interpreted as -n um_alignments_V, but
1414
--num_alignments_V will work).
1515
"""
1616

igseq/msa.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ def msa(path_in, path_out, fmt_in=None, fmt_out=None, colmap=None, dry_run=False
3737
def run_muscle(records):
3838
"""Align a set of records with MUSCLE."""
3939
# muscle crashes with empty input, so we'll just do a noop for that case
40-
if not records:
41-
LOGGER.warning("no records provided to align; skipping MUSCLE")
40+
if len(records) < 2:
41+
detail = "only one record" if len(records) else "no records"
42+
LOGGER.warning("%s provided to align; skipping MUSCLE", detail)
4243
return records
4344
args = ["muscle", "-align", "/dev/stdin", "-output", "/dev/stdout"]
4445
with Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True) as proc:

igseq/record.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ def infer_fmt(self, fmt=None):
9494
fmt = fmt_inferred
9595
return fmt
9696

97+
@property
98+
def tabular(self):
99+
return self.fmt in ["csv", "tsv", "csvgz", "tsvgz"]
100+
97101
@staticmethod
98102
def _infer_fmt(path):
99103
try:
@@ -141,7 +145,7 @@ def decode_record(self, obj):
141145
if quals:
142146
record[self.colmap["sequence_quality"]] = self.encode_phred(quals)
143147
if seq_desc is not None:
144-
record["sequence_description"] = seq_desc
148+
record[self.colmap["sequence_description"]] = seq_desc
145149
else:
146150
record = obj
147151
return record
@@ -267,7 +271,7 @@ def _write_fa(self, record):
267271
seq = record[self.colmap["sequence"]]
268272
defline = record[self.colmap["sequence_id"]]
269273
desc = record.get(self.colmap["sequence_description"])
270-
if desc is not None:
274+
if desc:
271275
defline += f" {desc}"
272276
if not self.dry_run:
273277
self.handle.write(f">{defline}\n{seq}\n")
@@ -283,7 +287,7 @@ def _write_fq(self, record):
283287
"No quality scores available, using default dummy value: %s",
284288
DEFAULT_DUMMY_QUAL)
285289
quals = "".join(DEFAULT_DUMMY_QUAL * len(seq))
286-
if desc is not None:
290+
if desc:
287291
defline += f" {desc}"
288292
if not self.dry_run:
289293
self.handle.write(f"@{defline}\n{seq}\n+\n{quals}\n")

igseq/tree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def make_seq_set_colors(seq_sets):
314314
# adapted from SONAR
315315
# this stretches across COLORS in even increments for as many as we need here
316316
num = len(colors.COLORS)
317-
subset = [int( a * (num-1) / (len(seq_sets)-1) ) for a in range(num)]
317+
subset = [int( a * (num-1) / max(1, (len(seq_sets)-1)) ) for a in range(num)]
318318
try:
319319
seq_set_colors[set_name] = colors.color_str_to_trio(colors.COLORS[subset[idx]])
320320
except IndexError:

0 commit comments

Comments
 (0)