Skip to content

Commit fb30178

Browse files
committed
v0.3.8
1 parent 58db66a commit fb30178

File tree

4 files changed

+59
-31
lines changed

4 files changed

+59
-31
lines changed

doc/docs/download.md

+9-6
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,10 @@ SeqKit is implemented in [Go](https://golang.org/) programming language,
66

77
## Latest Version
88

9-
[SeqKit v0.3.7](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
10-
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.7/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
9+
[SeqKit v0.3.8](https://github.com/shenwei356/seqkit/releases/tag/v0.3.8)
10+
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.8/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.8)
1111

12-
- fix bug in `seqkit split --by-id` when sequence ID contains invalid characters for system path.
13-
- add more flags validation for `seqkit replace`.
14-
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files and more controls are added.
15-
- changes: do not wrap sequence and quality in output for FASTQ format.
12+
- enhancement of `seqkit common`: better handling of files containing replicated sequences
1613

1714
***64-bit versions are highly recommended.***
1815

@@ -94,6 +91,12 @@ For Go developer, just one command:
9491

9592
## Release History
9693

94+
- [SeqKit v0.3.7](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
95+
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.7/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.7)
96+
- fix bug in `seqkit split --by-id` when sequence ID contains invalid characters for system path.
97+
- add more flags validation for `seqkit replace`.
98+
- enhancement: raise error when key pattern matches multiple targes in cases of replacing with key-value files and more controls are added.
99+
- changes: do not wrap sequence and quality in output for FASTQ format.
97100
- [SeqKit v0.3.6](https://github.com/shenwei356/seqkit/releases/tag/v0.3.6)
98101
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v0.3.6/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v0.3.6)
99102
- add new feature for `seqkit grep`: new flag `-R` (`--region`) for specifying sequence region for searching.

seqkit/cmd/common.go

+48-23
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@ var commonCmd = &cobra.Command{
4242
4343
`,
4444
Run: func(cmd *cobra.Command, args []string) {
45-
if len(args) < 2 {
46-
checkError(errors.New("at least 2 files needed"))
47-
}
4845
config := getConfigs(cmd)
4946
alphabet := config.Alphabet
5047
idRegexp := config.IDRegexp
@@ -68,21 +65,34 @@ var commonCmd = &cobra.Command{
6865
}
6966

7067
files := getFileList(args)
68+
if len(files) < 2 {
69+
checkError(errors.New("at least 2 files needed"))
70+
}
7171

7272
outfh, err := xopen.Wopen(outFile)
7373
checkError(err)
7474
defer outfh.Close()
7575

76-
counter := make(map[string]map[string]int)
77-
names := make(map[string]map[string]string)
76+
// target -> file -> struct{}
77+
counter := make(map[string]map[string]struct{})
78+
// target -> file -> seqname
79+
names := make(map[string]map[string][]string)
80+
7881
var fastxReader *fastx.Reader
7982

8083
// read all files
8184
var subject string
85+
var checkFirstFile = true
86+
var firstFile string
8287
for _, file := range files {
8388
if !quiet {
8489
log.Infof("read file: %s", file)
8590
}
91+
if checkFirstFile {
92+
firstFile = file
93+
checkFirstFile = false
94+
}
95+
8696
fastxReader, err = fastx.NewReader(alphabet, file, idRegexp)
8797
checkError(err)
8898
for {
@@ -124,42 +134,56 @@ var commonCmd = &cobra.Command{
124134
}
125135

126136
if _, ok := counter[subject]; !ok {
127-
counter[subject] = make(map[string]int)
137+
counter[subject] = make(map[string]struct{})
128138
}
129-
counter[subject][file] = counter[subject][file] + 1
139+
counter[subject][file] = struct{}{}
130140

131141
if _, ok := names[subject]; !ok {
132-
names[subject] = make(map[string]string)
142+
names[subject] = make(map[string][]string)
133143
}
134-
names[subject][file] = string(record.Name)
144+
names[subject][file] = append(names[subject][file], string(record.Name))
135145
}
136146
}
137147

138148
// find common seqs
139149
if !quiet {
140150
log.Info("find common seqs ...")
141151
}
142-
fileNum := len(args)
143-
firstFile := args[0]
144-
namesOK := make(map[string]int)
145-
n := 0
146-
for subject, count := range counter {
147-
if len(count) != fileNum {
152+
fileNum := len(files)
153+
namesOK := make(map[string]struct{})
154+
var n, n2 int
155+
var seqname string
156+
for subject, presence := range counter {
157+
if len(presence) != fileNum {
148158
continue
149159
}
150-
namesOK[names[subject][firstFile]] = counter[subject][firstFile]
160+
151161
n++
152-
}
153-
if !quiet {
154-
log.Infof("%d common seqs found", n)
162+
for _, seqname = range names[subject][firstFile] {
163+
n2++
164+
namesOK[seqname] = struct{}{}
165+
}
155166
}
156167

168+
var t string
169+
if byName {
170+
t = "sequence headers"
171+
} else if bySeq {
172+
t = "sequences"
173+
} else {
174+
t = "sequence IDs"
175+
}
157176
if n == 0 {
177+
log.Infof("no common %s found", t)
158178
return
159179
}
180+
if !quiet {
181+
log.Infof("%d unique %s found in %d files, which belong to %d records in the first file: %s",
182+
n, t, fileNum, len(namesOK), firstFile)
183+
}
160184

161185
if !quiet {
162-
log.Infof("extract common seqs from first file: %s", firstFile)
186+
log.Infof("extract seqs from the first file: %s", firstFile)
163187
}
164188

165189
// extract
@@ -174,11 +198,12 @@ var commonCmd = &cobra.Command{
174198
checkError(err)
175199
break
176200
}
201+
if fastxReader.IsFastq {
202+
config.LineWidth = 0
203+
}
177204

178-
name := string(record.Name)
179-
if _, ok := namesOK[name]; ok && namesOK[name] > 0 {
205+
if _, ok := namesOK[string(record.Name)]; ok {
180206
record.FormatToWriter(outfh, lineWidth)
181-
namesOK[name] = 0
182207
}
183208
}
184209
},

seqkit/cmd/helper.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ import (
4343
)
4444

4545
// VERSION of seqkit
46-
const VERSION = "0.3.7"
46+
const VERSION = "0.3.8"
4747

4848
func checkError(err error) {
4949
if err != nil {

seqkit/download_all_binaries.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/sh
22

33
rm seqkit_*.tar.gz
4-
version="0.3.5"
4+
version="0.3.8"
55

66
wget https://github.com/shenwei356/seqkit/releases/download/v$version/seqkit_linux_386.tar.gz
77
wget https://github.com/shenwei356/seqkit/releases/download/v$version/seqkit_linux_amd64.tar.gz

0 commit comments

Comments
 (0)