Skip to content

Commit 75f7d62

Browse files
authored
Merge pull request #40 from ShawHahnLab/release-0.4.0
Release 0.4.0
2 parents 72257a6 + a174ded commit 75f7d62

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+669
-60
lines changed

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
11
# Changelog
22

3+
## 0.4.0 - 2022-09-17
4+
5+
### Added
6+
7+
* Automatic usage of all available references for a given species in `igblast`
8+
command ([#39])
9+
* `identity` command for calculating pairwise identity between arbitrary
10+
queries and references ([#31], [#37])
11+
* Support for showing basic tree topology for Newick-format files in `show`
12+
command ([#33])
13+
14+
### Fixed
15+
16+
* Uppercase file extensions are now supported by the `show` command ([#35])
17+
* broken pipes (such as from `igseq something | something else`) are now
18+
handled gracefully ([#30])
19+
20+
[#39]: https://github.com/ShawHahnLab/igseq/pull/39
21+
[#37]: https://github.com/ShawHahnLab/igseq/pull/37
22+
[#35]: https://github.com/ShawHahnLab/igseq/pull/35
23+
[#33]: https://github.com/ShawHahnLab/igseq/pull/33
24+
[#31]: https://github.com/ShawHahnLab/igseq/pull/31
25+
[#30]: https://github.com/ShawHahnLab/igseq/pull/30
26+
327
## 0.3.0 - 2022-07-14
428

529
### Added

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# IgSeq Utilities
22

3+
[![CircleCI Build Status](https://circleci.com/gh/ShawHahnLab/igseq/tree/dev.svg?style=svg)](https://circleci.com/gh/ShawHahnLab/igseq/tree/dev)
4+
35
**Work in Progress**
46

57
A command-line tool for various common Ig-Seq tasks. These are heavily biased

conda/meta.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html
2-
{% set version = "0.3.0" %}
2+
{% set version = "0.4.0" %}
33
{% set build = "0" %}
44

55
package:
@@ -44,11 +44,12 @@ requirements:
4444
run:
4545
- python>=3.9.7,<4
4646
- biopython>=1.79,<2
47+
- python-newick>=1.3.2,<2
4748
- bwa>=0.7.17,<0.8
48-
- samtools>=1.13,<2
49-
- cutadapt>=3.5,<4
49+
- samtools>=1.15,<2
50+
- cutadapt>=3.7,<4
5051
- pear>=0.9.6,<1
51-
- igblast>=1.17.1,<2
52+
- igblast>=1.19,<2
5253

5354
test:
5455
source_files:

igseq/__main__.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Utilities for common IgSeq tasks.
33
"""
44

5+
import os
56
import sys
67
import argparse
78
import logging
@@ -17,6 +18,7 @@
1718
from . import vdj_gather
1819
from . import vdj_match
1920
from . import convert
21+
from . import identity
2022
from . import show
2123
from .util import IgSeqError
2224
from .version import __version__
@@ -72,22 +74,42 @@ def main(arglist=None):
7274
prefix = "[DRYRUN] "
7375
_setup_log(args.verbose, args.quiet, prefix)
7476
try:
75-
if args_extra:
76-
# If there were unparsed arguments, see if we're in one of the
77-
# commands (currently just igblast) that can take extra
78-
# pass-through arguments. If so pass them along, but if not, error
79-
# out.
80-
if args.func in [_main_igblast]:
81-
args.func(args, args_extra)
77+
try:
78+
if args_extra:
79+
# If there were unparsed arguments, see if we're in one of the
80+
# commands (currently just igblast) that can take extra
81+
# pass-through arguments. If so pass them along, but if not,
82+
# error out.
83+
if args.func in [_main_igblast]:
84+
args.func(args, args_extra)
85+
else:
86+
parser.parse_args(args_extra)
8287
else:
83-
parser.parse_args(args_extra)
84-
else:
85-
args.func(args)
86-
except IgSeqError as err:
87-
sys.stderr.write(
88-
f"\nigseq failed because: {err.message}\n"
89-
"Considering adding -v or -vv to the command if the problem isn't clear.\n")
90-
sys.exit(1)
88+
args.func(args)
89+
except IgSeqError as err:
90+
sys.stderr.write(
91+
f"\nigseq failed because: {err.message}\n"
92+
"Considering adding -v or -vv to the command if the problem isn't clear.\n")
93+
sys.exit(1)
94+
sys.stdout.flush()
95+
sys.stderr.flush()
96+
except BrokenPipeError:
97+
# If stdout and/or stderr were writing to a pipe and that pipe is now
98+
# closed, we'll swap in /dev/null for whichever it is to handle this
99+
# quietly and to prevent it from arising again when Python tries to
100+
# flush file handles on exit.
101+
# Adapted from
102+
# https://stackoverflow.com/questions/26692284
103+
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
104+
devnull = os.open(os.devnull, os.O_WRONLY)
105+
try:
106+
sys.stdout.flush()
107+
except BrokenPipeError:
108+
os.dup2(devnull, sys.stdout.fileno())
109+
try:
110+
sys.stderr.flush()
111+
except BrokenPipeError:
112+
os.dup2(devnull, sys.stderr.fileno())
91113

92114
def _main_getreads(args):
93115
if args.no_counts:
@@ -207,6 +229,17 @@ def _main_convert(args):
207229
dummyqual=args.dummy_qual,
208230
dry_run=args.dry_run)
209231

232+
def _main_identity(args):
233+
colmap = args_to_colmap(args)
234+
identity.identity(
235+
path_in=args.input,
236+
path_out=args.output,
237+
path_ref=args.reference,
238+
fmt_in=args.input_format,
239+
fmt_in_ref=args.ref_format,
240+
colmap=colmap,
241+
dry_run=args.dry_run)
242+
210243
def _setup_log(verbose, quiet, prefix):
211244
# Handle warnings via logging
212245
logging.captureWarnings(True)
@@ -269,6 +302,10 @@ def __setup_arg_parser():
269302
help="Convert FASTA/FASTQ/CSV/TSV",
270303
description=rewrap(convert.__doc__),
271304
formatter_class=argparse.RawDescriptionHelpFormatter)
305+
p_identity = subps.add_parser("identity",
306+
help="Calculate pairwise identities",
307+
description=rewrap(identity.__doc__),
308+
formatter_class=argparse.RawDescriptionHelpFormatter)
272309
p_show = subps.add_parser("show",
273310
help="show file contents",
274311
description=rewrap(show.__doc__),
@@ -463,6 +500,25 @@ def __setup_arg_parser():
463500
'as text (e.g. use "I" for 40)')
464501
p_convert.set_defaults(func=_main_convert)
465502

503+
__add_common_args(p_identity)
504+
p_identity.add_argument("input",
505+
help="input file path, or a literal '-' for standard input")
506+
p_identity.add_argument("output",
507+
help="output file path, or a literal '-' for standard output")
508+
p_identity.add_argument("-r", "--reference",
509+
help="optional reference file path (default: use first query as ref)")
510+
p_identity.add_argument("--input-format",
511+
help="format of input "
512+
"(default: detect from input filename if possible)")
513+
p_identity.add_argument("--ref-format",
514+
help="format of reference "
515+
"(default: detect from reference filename if possible)")
516+
p_identity.add_argument("--col-seq-id",
517+
help="Name of column containing sequence IDs (for tabular input/output)")
518+
p_identity.add_argument("--col-seq",
519+
help="Name of column containing sequences (for tabular input/output)")
520+
p_identity.set_defaults(func=_main_identity)
521+
466522
return parser
467523

468524
def __add_common_args(obj):

igseq/data/environment.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ channels:
77
dependencies:
88
- python>=3.9.7,<4
99
- biopython>=1.79,<2
10+
- python-newick>=1.3.2,<2
1011
- bwa>=0.7.17,<0.8
11-
- samtools>=1.13,<2
12-
- cutadapt>=3.5,<4
12+
- samtools>=1.15,<2
13+
- cutadapt>=3.7,<4
1314
- pear>=0.9.6,<1
14-
- igblast>=1.17.1,<2
15+
- igblast>=1.19,<2

igseq/data/examples/identity.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env bash
2+
3+
[ -v EXAMPLES ] || EXAMPLES=$(python -c 'import igseq.util; print(igseq.util.DATA)')/examples
4+
5+
# A few example query sequences derived from the igblast example
6+
QUERY=$EXAMPLES/inputs/identity/query.fasta
7+
# AIRR TSV table for those sequences
8+
QUERY_AIRR=$EXAMPLES/inputs/identity/query_airr.tsv
9+
# A contrived reference (slightly altered version of the first query)
10+
REF=$EXAMPLES/inputs/identity/ref.fasta
11+
# Just the junction region from that sequence, as NT and AA
12+
REF_JUNCTION=$EXAMPLES/inputs/identity/ref_junction.fasta
13+
REF_JUNCTION_AA=$EXAMPLES/inputs/identity/ref_junction_aa.fasta
14+
# AIRR TSV for the reference
15+
REF_AIRR=$EXAMPLES/inputs/identity/ref_airr.tsv
16+
17+
# Check identity of each query to the one reference, with each row in the CSV
18+
# output giving query ID, reference ID, and fraction of matching bases
19+
igseq identity -r $REF $QUERY idents.csv
20+
# use "igseq show" command for a human-readable view of the CSV file
21+
igseq show idents.csv
22+
23+
# With no reference specified the first query sequence will be used as the reference
24+
igseq identity $QUERY idents_first_query.csv
25+
26+
# To compare all-to-all, explicitly give the query path as the reference too
27+
igseq identity -r $QUERY $QUERY idents_all.csv
28+
29+
# We can use a "-" to read from/to standard input and/or output, but we need to
30+
# specify the input format for that case
31+
igseq identity -r $REF --input-format fa - - < $QUERY > idents_stdout.csv
32+
33+
# We can work with the formats supported by the convert command, like AIRR TSV
34+
# for example. As with that command, the defaults are to use the sequence_id
35+
# and sequence columns.
36+
igseq identity -r $REF $QUERY_AIRR idents_from_airr.csv
37+
38+
# We can specify other columns to use instead
39+
igseq identity -r $REF_JUNCTION --col-seq junction $QUERY_AIRR idents_from_airr_junctions.csv
40+
41+
# AA works too
42+
igseq identity -r $REF_JUNCTION_AA --col-seq junction_aa $QUERY_AIRR idents_from_airr_junctions_aa.csv
43+
44+
# We can also give the reference in a format other than FASTA, though the same
45+
# column settings are used for query and reference
46+
igseq identity -r $REF_AIRR --col-seq junction_aa $QUERY_AIRR idents_from_airr_junctions_aa_2.csv
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
>query1
2+
CAGCTGCAGCTGCAGGAGTCGGGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTC
3+
ACCTGCGCTGTCTCTGGTGGCTCCATCAGCAGTAACTACTGGAGCTGGATCCGCCAGCCC
4+
CCAGGGAAGGGACTGGAGTGGATTGGACGTATCTCTGGTAGTGGTGGGAGCCCCGACTAC
5+
AACCCCTCCCTCAAGAGTCGAGTCACCATTTCAACAGACACGTCCAAGAGCCAGTTCTCC
6+
CTGAAGCTGAGCTCTGTGACCGCCGCGGACACGGCCGTGTATTACTGTGCGAGAGATACT
7+
TACAGTAACATCCCACCCAACTTTGACTACTGGGGCCAGGGAGTCCTGGTCACCGTCTCC
8+
TCAG
9+
>query2
10+
CAGCTGCAGCTGCAGGAGTCGGGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTC
11+
ACCTGCGCTGTCTCTGTTGGCTCCATCAGCAGTAACTACTGGAGCTGGATCCGCCAGCCC
12+
CCAGGGAAGGGACTGGAGTGGATTGCACGTATCTCTGGTAGTGGTGGGAGCCCCGACTAC
13+
AACCCCTCCCGCAAGAGTCGAGTCACCATTTCAACAGACACGTCCAAGAGCCAGTTCTCC
14+
CTGAAGCTGAGCTCTGTGACCGCCGCGGACACGGCCGTGTATTACTATGCGAGAGATACT
15+
TACAGTACCATCCCACCCAACTTTGACTACTGGGGCCAGGGAGTCCTGGTCACCGTGTCC
16+
TCAG
17+
>query3
18+
CAGCTGCAGCTGCAGGAGTAGGGCCCAGGACTGGTGAAGCCTTCGGAGACGCTGTCCCTC
19+
ACCTGCGCTGTCTCTGGTGGCTCCATCAGCAGTAAGTACTGGAGCTGGATCCGCCAGCCC
20+
CCAGGGAATGGACTGGAGTGGATTGGACGTATCTCTGGTAGTGGTGGGAGCCCGGACTAC
21+
AACCCCTCCCTCAAGAGTCGAGTCACCATTTCAACAGACACGTCCATGAGCCAGTTCTCC
22+
CTGAAGCTGCGCTCTGTGACCGCCGCGGACACGGCCGTGTATTACTGTGCGAGAGATACT
23+
TACAGTAACATCCCACCCAACTTTGACTACTGGGGCCAGGGAGTCCTGGTCAGCGTCTCC
24+
TCAG

0 commit comments

Comments
 (0)