ShawHahnLab
diff --git a/‎CHANGELOG.md‎
Lines changed: 24 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion b/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎igseq/convert.py‎
Lines changed: 8 additions & 0 deletions b/‎igseq/convert.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎igseq/data/examples/outputs/convert/unwrapped.csv‎
Lines changed: 2 additions & 2 deletions b/‎igseq/data/examples/outputs/convert/unwrapped.csv‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎igseq/getreads.py‎
Lines changed: 4 additions & 5 deletions b/‎igseq/getreads.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎igseq/identity.py‎
Lines changed: 1 addition & 1 deletion b/‎igseq/identity.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎igseq/igblast.py‎
Lines changed: 1 addition & 1 deletion b/‎igseq/igblast.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎igseq/msa.py‎
Lines changed: 3 additions & 2 deletions b/‎igseq/msa.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎igseq/record.py‎
Lines changed: 7 additions & 3 deletions b/‎igseq/record.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎igseq/tree.py‎
Lines changed: 1 addition & 1 deletion b/‎igseq/tree.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,5 +1,5 @@
 # https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html
-{% set version = "0.5.0" %}
+{% set version = "0.5.1" %}
 {% set build = "0" %}
 
 package:
 
@@ -20,4 +20,12 @@ def convert(path_in, path_out, fmt_in=None, fmt_out=None, colmap=None, dummyqual
     with RecordReader(path_in, fmt_in, colmap, dry_run=dry_run) as reader, \
         RecordWriter(path_out, fmt_out, colmap, dummyqual=dummyqual, dry_run=dry_run) as writer:
         for record in reader:
+            # special case for descriptions: they may or may not exist on any
+            # particular record for seq input, but for tabular output, we have
+            # to have consistent columns.  So in that case make sure to include
+            # a description column by forcing one for the first record to be
+            # written.
+            if not writer.writer and writer.tabular and not reader.tabular:
+                key = reader.colmap["sequence_description"]
+                record[key] = record.get(key, "")
             writer.write(record)
@@ -1,2 +1,2 @@
-sequence_id,sequence
-seqid,ACTGACTGACTGACTG
+sequence_id,sequence,sequence_description
+seqid,ACTGACTGACTGACTG,
@@ -15,6 +15,7 @@
 those options.
 """
 
+import sys
 import logging
 import subprocess
 from tempfile import NamedTemporaryFile
@@ -98,10 +99,6 @@ def getreads(path_input, dir_out, path_counts="", extra_args=None, threads_load=
                 "--sample-sheet", sample_sheet.name,
                 # parallel processing during loading can help a bit in my tests
                 "--loading-threads", threads_load,
-                # parallel processing does *not* help during the bcl2fastq
-                # demultiplexing step, go figure, when we don't have any
-                # demultiplexing to perform here
-                "--demultiplexing-threads", 1,
                 # parallel processing in the processing step helps quite a bit
                 "--processing-threads", threads_proc,
                 # help text says "this must not be higher than number of
@@ -165,4 +162,6 @@ def _run_bcl2fastq(args, extra_args=None):
             raise util.IgSeqError(f"bcl2fastq arg collision from extra arguments: {shared}")
         args += extra_args
     LOGGER.info("bcl2fastq command: %s", args)
-    subprocess.run(args, check=True)
+    proc = subprocess.run(args, check=True, capture_output=True, text=True)
+    sys.stdout.write(proc.stdout)
+    sys.stderr.write(proc.stderr)
@@ -11,7 +11,7 @@
 
 The scoring is based on a simple global pairwise alignment, with matches scored
 as 1, mismatches and gaps 0.  Any existing gaps are removed before comparing
-sequences, and differeces in case (lower/upper) are disregarded.
+sequences, and differences in case (lower/upper) are disregarded.
 """
 
 import logging
 
@@ -10,7 +10,7 @@
 command, so you can configure things like the output format and file path.  See
 igblastn -help for those options.  Any igblastn argument can be given with two
 dashes if needed to force igseq to handle it correctly (for example,
--num_alignments_V will be interprted as -n um_alignments_V, but
+-num_alignments_V will be interpreted as -n um_alignments_V, but
 --num_alignments_V will work).
 """
 
 
@@ -37,8 +37,9 @@ def msa(path_in, path_out, fmt_in=None, fmt_out=None, colmap=None, dry_run=False
 def run_muscle(records):
     """Align a set of records with MUSCLE."""
     # muscle crashes with empty input, so we'll just do a noop for that case
-    if not records:
-        LOGGER.warning("no records provided to align; skipping MUSCLE")
+    if len(records) < 2:
+        detail = "only one record" if len(records) else "no records"
+        LOGGER.warning("%s provided to align; skipping MUSCLE", detail)
         return records
     args = ["muscle", "-align", "/dev/stdin", "-output", "/dev/stdout"]
     with Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True) as proc:
 
@@ -94,6 +94,10 @@ def infer_fmt(self, fmt=None):
             fmt = fmt_inferred
         return fmt
 
+    @property
+    def tabular(self):
+        return self.fmt in ["csv", "tsv", "csvgz", "tsvgz"]
+
     @staticmethod
     def _infer_fmt(path):
         try:
@@ -141,7 +145,7 @@ def decode_record(self, obj):
             if quals:
                 record[self.colmap["sequence_quality"]] = self.encode_phred(quals)
             if seq_desc is not None:
-                record["sequence_description"] = seq_desc
+                record[self.colmap["sequence_description"]] = seq_desc
         else:
             record = obj
         return record
@@ -267,7 +271,7 @@ def _write_fa(self, record):
         seq = record[self.colmap["sequence"]]
         defline = record[self.colmap["sequence_id"]]
         desc = record.get(self.colmap["sequence_description"])
-        if desc is not None:
+        if desc:
             defline += f" {desc}"
         if not self.dry_run:
             self.handle.write(f">{defline}\n{seq}\n")
@@ -283,7 +287,7 @@ def _write_fq(self, record):
                 "No quality scores available, using default dummy value: %s",
                 DEFAULT_DUMMY_QUAL)
             quals = "".join(DEFAULT_DUMMY_QUAL * len(seq))
-        if desc is not None:
+        if desc:
             defline += f" {desc}"
         if not self.dry_run:
             self.handle.write(f"@{defline}\n{seq}\n+\n{quals}\n")
@@ -314,7 +314,7 @@ def make_seq_set_colors(seq_sets):
         # adapted from SONAR
         # this stretches across COLORS in even increments for as many as we need here
         num = len(colors.COLORS)
-        subset = [int( a * (num-1) / (len(seq_sets)-1) ) for a in range(num)]
+        subset = [int( a * (num-1) / max(1, (len(seq_sets)-1)) ) for a in range(num)]
         try:
             seq_set_colors[set_name] = colors.color_str_to_trio(colors.COLORS[subset[idx]])
         except IndexError: