Skip to content

Commit 30ae20e

Browse files
authored
Merge pull request #52 from ShawHahnLab/release-0.5.0
Version 0.5.0
2 parents 75f7d62 + 36bec98 commit 30ae20e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+1745
-82
lines changed

CHANGELOG.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
11
# Changelog
22

3+
## 0.5.0 - 2023-01-04
4+
5+
### Added
6+
7+
* `summarize` command will automatically use all available references for a
8+
given species if a species is given but no references ([#50])
9+
* `tree` command for creating and formatting phylogenetic trees ([#44])
10+
* support for additional arguments for `getreads` command passed through to
11+
bcl2fastq ([#43])
12+
* `msa` command for building multiple sequence alignments with
13+
[MUSCLE](https://drive5.com/muscle5/) ([#41])
14+
15+
### Fixed
16+
17+
* `convert` command and underlying input/output features now handles sequence
18+
descriptions ([#51])
19+
* `identity` command now uses a custom sequence ID column if one is given
20+
([#49])
21+
22+
[#51]: https://github.com/ShawHahnLab/igseq/pull/51
23+
[#50]: https://github.com/ShawHahnLab/igseq/pull/50
24+
[#49]: https://github.com/ShawHahnLab/igseq/pull/49
25+
[#44]: https://github.com/ShawHahnLab/igseq/pull/44
26+
[#43]: https://github.com/ShawHahnLab/igseq/pull/43
27+
[#41]: https://github.com/ShawHahnLab/igseq/pull/41
28+
329
## 0.4.0 - 2022-09-17
430

531
### Added

conda/meta.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html
2-
{% set version = "0.4.0" %}
2+
{% set version = "0.5.0" %}
33
{% set build = "0" %}
44

55
package:
@@ -43,13 +43,15 @@ requirements:
4343
# should follow the package match specifications."
4444
run:
4545
- python>=3.9.7,<4
46-
- biopython>=1.79,<2
46+
- biopython>=1.80,<2
4747
- python-newick>=1.3.2,<2
4848
- bwa>=0.7.17,<0.8
49-
- samtools>=1.15,<2
49+
- samtools>=1.16,<2
5050
- cutadapt>=3.7,<4
5151
- pear>=0.9.6,<1
5252
- igblast>=1.19,<2
53+
- muscle>=5.1,<6
54+
- fasttree>=2.1,<3
5355

5456
test:
5557
source_files:

igseq/__main__.py

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from . import vdj_match
2020
from . import convert
2121
from . import identity
22+
from . import msa
23+
from . import tree
2224
from . import show
2325
from .util import IgSeqError
2426
from .version import __version__
@@ -77,10 +79,9 @@ def main(arglist=None):
7779
try:
7880
if args_extra:
7981
# If there were unparsed arguments, see if we're in one of the
80-
# commands (currently just igblast) that can take extra
81-
# pass-through arguments. If so pass them along, but if not,
82-
# error out.
83-
if args.func in [_main_igblast]:
82+
# commands that can take extra pass-through arguments. If so
83+
# pass them along, but if not, error out.
84+
if args.func in [_main_igblast, _main_getreads]:
8485
args.func(args, args_extra)
8586
else:
8687
parser.parse_args(args_extra)
@@ -111,13 +112,14 @@ def main(arglist=None):
111112
except BrokenPipeError:
112113
os.dup2(devnull, sys.stderr.fileno())
113114

114-
def _main_getreads(args):
115+
def _main_getreads(args, extra_args=None):
115116
if args.no_counts:
116117
args.countsfile = None
117118
getreads.getreads(
118119
path_input=args.input,
119120
dir_out=args.outdir,
120121
path_counts=args.countsfile,
122+
extra_args=extra_args,
121123
threads_load=args.threads_load,
122124
threads_proc=args.threads,
123125
dry_run=args.dry_run)
@@ -240,6 +242,30 @@ def _main_identity(args):
240242
colmap=colmap,
241243
dry_run=args.dry_run)
242244

245+
def _main_msa(args):
246+
colmap = args_to_colmap(args)
247+
msa.msa(
248+
path_in=args.input,
249+
path_out=args.output,
250+
fmt_in=args.input_format,
251+
fmt_out=args.output_format,
252+
colmap=colmap,
253+
dry_run=args.dry_run)
254+
255+
def _main_tree(args):
256+
colmap = args_to_colmap(args)
257+
tree.tree(
258+
paths_in=args.input,
259+
path_out=args.output,
260+
fmt_in=args.input_format,
261+
fmt_out=args.output_format,
262+
aligned=args.aligned,
263+
pattern=args.set_pattern,
264+
lists=args.set_list,
265+
colors=args.set_color,
266+
colmap=colmap,
267+
dry_run=args.dry_run)
268+
243269
def _setup_log(verbose, quiet, prefix):
244270
# Handle warnings via logging
245271
logging.captureWarnings(True)
@@ -306,6 +332,14 @@ def __setup_arg_parser():
306332
help="Calculate pairwise identities",
307333
description=rewrap(identity.__doc__),
308334
formatter_class=argparse.RawDescriptionHelpFormatter)
335+
p_msa = subps.add_parser("msa",
336+
help="Create multiple sequence alignments",
337+
description=rewrap(msa.__doc__),
338+
formatter_class=argparse.RawDescriptionHelpFormatter)
339+
p_tree = subps.add_parser("tree",
340+
help="Create and format phylogenetic trees",
341+
description=rewrap(tree.__doc__),
342+
formatter_class=argparse.RawDescriptionHelpFormatter)
309343
p_show = subps.add_parser("show",
310344
help="show file contents",
311345
description=rewrap(show.__doc__),
@@ -519,6 +553,54 @@ def __setup_arg_parser():
519553
help="Name of column containing sequences (for tabular input/output)")
520554
p_identity.set_defaults(func=_main_identity)
521555

556+
__add_common_args(p_msa)
557+
p_msa.add_argument("input",
558+
help="input file path, or a literal '-' for standard input")
559+
p_msa.add_argument("output",
560+
help="output file path, or a literal '-' for standard output")
561+
p_msa.add_argument("--input-format",
562+
help="format of input "
563+
"(default: detect from input filename if possible)")
564+
p_msa.add_argument("--output-format",
565+
help="format of output "
566+
"(default: detect from output filename if possible)")
567+
p_msa.add_argument("--col-seq-id",
568+
help="Name of column containing sequence IDs (for tabular input/output)")
569+
p_msa.add_argument("--col-seq",
570+
help="Name of column containing sequences (for tabular input/output)")
571+
p_msa.add_argument("--col-seq-desc",
572+
help="Name of column containing sequence descriptions (for tabular input/output)")
573+
p_msa.set_defaults(func=_main_msa)
574+
575+
__add_common_args(p_tree)
576+
p_tree.add_argument("input", nargs="+",
577+
help="input file path, or a literal '-' for standard input")
578+
p_tree.add_argument("output",
579+
help="output file path, or a literal '-' for standard output")
580+
p_tree.add_argument("--input-format",
581+
help="format of input "
582+
"(default: detect from input filename if possible)")
583+
p_tree.add_argument("--output-format",
584+
help="format of output "
585+
"(default: detect from output filename if possible)")
586+
p_tree.add_argument("--aligned", action=argparse.BooleanOptionalAction,
587+
help="Explicitly specify if input is aligned or not, for sequence input "
588+
"(default: guess from lengths)")
589+
p_tree.add_argument("--col-seq-id",
590+
help="Name of column containing sequence IDs (for tabular input)")
591+
p_tree.add_argument("--col-seq",
592+
help="Name of column containing sequences (for tabular input)")
593+
p_tree.add_argument("--set-pattern", "-P",
594+
help="regular expression to define set membership, with zero or one capture groups. "
595+
"If a capture group is given, only that text is used to define the set names.")
596+
p_tree.add_argument("--set-list", "-L", action="append",
597+
help="filename containing a list of sequence IDs for a set. "
598+
"This can be given multiple times for multiple set/filename pairs.")
599+
p_tree.add_argument("--set-color", "-C", action="append",
600+
help="setname=colorcode, like set1=#ff0000, to override automatic set colors. "
601+
"This can be given multiple times for multiple set/color pairs.")
602+
p_tree.set_defaults(func=_main_tree)
603+
522604
return parser
523605

524606
def __add_common_args(obj):

igseq/colors.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
# from SONAR's display_tree, which in turn got these from the iwanthue tool
3+
COLORS = ["#BE4229", "#E74721", "#8C431C", "#CB6A27", "#E98C25", "#946E13", "#D0A620", "#8B8A22", "#A9B71D", "#555C10", "#839A21", "#82B531", "#4D8426", "#55C42A", "#34992A", "#2B6B2E", "#4FC456", "#33B66D", "#4296CB", "#5A8CE2", "#3E5988", "#656CE2", "#524EA0", "#8F83CC", "#A57CE4", "#8E46AD", "#C056EB", "#CA6BE4", "#7B4D87", "#D186D7"]
4+
5+
def merge_colors(colors, scale=0):
6+
"""Take an average of a list of colors and shift toward black.
7+
8+
More colors results in a darker result, up to the integer value given for
9+
scale. If scale is less than the number of colors this scaling is skipped.
10+
"""
11+
result = [0, 0, 0]
12+
if not colors:
13+
return result
14+
if len(colors) == 1:
15+
return colors[0]
16+
for color in colors:
17+
for idx in range(3):
18+
result[idx] += color[idx]
19+
# not quite right, should rotate, really, not move directly toward the
20+
# middle... but it'll do for now
21+
if scale < len(colors):
22+
scaling = 1
23+
else:
24+
scaling = ((scale - len(colors))/scale)**0.3
25+
for idx in range(3):
26+
result[idx] = result[idx] / len(colors)
27+
result[idx] = int(result[idx] * scaling)
28+
return result
29+
30+
def color_str_to_trio(color_txt):
31+
"""Convert hex color string to trio of 0:255 ints."""
32+
color_txt = color_txt.removeprefix("#")
33+
# e.g. "ff0000"
34+
if len(color_txt) == 6:
35+
color = [int(color_txt[idx:(idx+2)], 16) for idx in range(0, 6, 2)]
36+
# e.g. "f00" = "ff0000"
37+
elif len(color_txt) == 3:
38+
color = [int(color_txt[idx:(idx+1)]*2, 16) for idx in range(0, 3)]
39+
else:
40+
raise ValueError
41+
return color
42+
43+
def color_trio_to_str(color):
44+
"""Convert trio of 0:255 ints to hex color string."""
45+
return "#" + "".join([f"{c:02x}" for c in color])

igseq/data/environment.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@ channels:
66
- defaults
77
dependencies:
88
- python>=3.9.7,<4
9-
- biopython>=1.79,<2
9+
- biopython>=1.80,<2
1010
- python-newick>=1.3.2,<2
1111
- bwa>=0.7.17,<0.8
12-
- samtools>=1.15,<2
12+
- samtools>=1.16,<2
1313
- cutadapt>=3.7,<4
1414
- pear>=0.9.6,<1
1515
- igblast>=1.19,<2
16+
- muscle>=5.1,<6
17+
- fasttree>=2.1,<3
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
>seq1
2+
ATCGCAAGACCCCTTGCTTGAGTATATCCC
3+
>seq2
4+
ATCGCAAGACCCCTTCTTGAGTATATCCC
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
>wk16-001
2+
AGGGTCGTCACCATTTCCGCTAAGATCGTAACGACCATCGATGCAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGGATCTGCC
3+
>wk16-002
4+
AGGGTCGTCACCATTGCCGCTAAGATCGTAACGACCATCGATGCAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGGATCTGCC
5+
>wk16-003
6+
AGGGTCGTCACCATTTCCGCTAAGATCGTAACGACCATCGATGCAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGTCCTGGATCTGCC
7+
>wk20-001
8+
AGGGTCGTCACCATTTCCGCTAAGATCGTAACGACCATCGATGTAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGGATCTGCC
9+
>wk20-002
10+
AGGGTCGTCACCATTGCCGCTAAGATCGTAACGACCATCGATGCAGACGTATGGTATATGTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGGATCTGCC
11+
>wk20-003
12+
AGGGTCGTCACCATTTCCGCTACGATCGTAACGACCATCGATGCAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGTCCTGGATCTGCC
13+
>somethingelse
14+
AGGGCCGTCAGCATTACCGCTAAGGTCGTAATGACCATCTATGCACACGTATGGTATAGTTATTCGCCCACCAGCAGCTCGTGCAGAGCCTGGTTCTGCC
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
>wk24-001
2+
AGGGTCGTCACTATTTCCGCTAAGATCGTAACGACCATCGATGTAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGGATCTGCC
3+
>wk24-002
4+
AGGGTCGTCACCATTTCCGCTAAGATCGTAACGACCATCGATGTAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGGCCTGCATCTGCC
5+
>wk24-003
6+
AGGCTCGTCACCATTGCCGCTAAGATCGTAACGACCATCGATGCAGACGTATGGTATATGTATTCGCCGACCAGAAGCTCATGCAGGGCCTGGATCTGCC
7+
>wk24-004
8+
AGGCTCGTCACCATTTCCGCTACGATCGTAACGACCATCGATGCAGACGTATGGTATATTTATTCGCCGACCAGAAGCTCTTGCAGGTCCTGGATCTGCC
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
wk16-001
2+
wk16-002
3+
wk16-003
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
wk20-001
2+
wk20-002
3+
wk20-003

0 commit comments

Comments
 (0)