@@ -42,9 +42,6 @@ var commonCmd = &cobra.Command{
42
42
43
43
` ,
44
44
Run : func (cmd * cobra.Command , args []string ) {
45
- if len (args ) < 2 {
46
- checkError (errors .New ("at least 2 files needed" ))
47
- }
48
45
config := getConfigs (cmd )
49
46
alphabet := config .Alphabet
50
47
idRegexp := config .IDRegexp
@@ -68,21 +65,34 @@ var commonCmd = &cobra.Command{
68
65
}
69
66
70
67
files := getFileList (args )
68
+ if len (files ) < 2 {
69
+ checkError (errors .New ("at least 2 files needed" ))
70
+ }
71
71
72
72
outfh , err := xopen .Wopen (outFile )
73
73
checkError (err )
74
74
defer outfh .Close ()
75
75
76
- counter := make (map [string ]map [string ]int )
77
- names := make (map [string ]map [string ]string )
76
+ // target -> file -> struct{}
77
+ counter := make (map [string ]map [string ]struct {})
78
+ // target -> file -> seqname
79
+ names := make (map [string ]map [string ][]string )
80
+
78
81
var fastxReader * fastx.Reader
79
82
80
83
// read all files
81
84
var subject string
85
+ var checkFirstFile = true
86
+ var firstFile string
82
87
for _ , file := range files {
83
88
if ! quiet {
84
89
log .Infof ("read file: %s" , file )
85
90
}
91
+ if checkFirstFile {
92
+ firstFile = file
93
+ checkFirstFile = false
94
+ }
95
+
86
96
fastxReader , err = fastx .NewReader (alphabet , file , idRegexp )
87
97
checkError (err )
88
98
for {
@@ -124,42 +134,56 @@ var commonCmd = &cobra.Command{
124
134
}
125
135
126
136
if _ , ok := counter [subject ]; ! ok {
127
- counter [subject ] = make (map [string ]int )
137
+ counter [subject ] = make (map [string ]struct {} )
128
138
}
129
- counter [subject ][file ] = counter [ subject ][ file ] + 1
139
+ counter [subject ][file ] = struct {}{}
130
140
131
141
if _ , ok := names [subject ]; ! ok {
132
- names [subject ] = make (map [string ]string )
142
+ names [subject ] = make (map [string ][] string )
133
143
}
134
- names [subject ][file ] = string (record .Name )
144
+ names [subject ][file ] = append ( names [ subject ][ file ], string (record .Name ) )
135
145
}
136
146
}
137
147
138
148
// find common seqs
139
149
if ! quiet {
140
150
log .Info ("find common seqs ..." )
141
151
}
142
- fileNum := len (args )
143
- firstFile := args [ 0 ]
144
- namesOK := make ( map [ string ] int )
145
- n := 0
146
- for subject , count := range counter {
147
- if len (count ) != fileNum {
152
+ fileNum := len (files )
153
+ namesOK := make ( map [ string ] struct {})
154
+ var n , n2 int
155
+ var seqname string
156
+ for subject , presence := range counter {
157
+ if len (presence ) != fileNum {
148
158
continue
149
159
}
150
- namesOK [ names [ subject ][ firstFile ]] = counter [ subject ][ firstFile ]
160
+
151
161
n ++
152
- }
153
- if ! quiet {
154
- log .Infof ("%d common seqs found" , n )
162
+ for _ , seqname = range names [subject ][firstFile ] {
163
+ n2 ++
164
+ namesOK [seqname ] = struct {}{}
165
+ }
155
166
}
156
167
168
+ var t string
169
+ if byName {
170
+ t = "sequence headers"
171
+ } else if bySeq {
172
+ t = "sequences"
173
+ } else {
174
+ t = "sequence IDs"
175
+ }
157
176
if n == 0 {
177
+ log .Infof ("no common %s found" , t )
158
178
return
159
179
}
180
+ if ! quiet {
181
+ log .Infof ("%d unique %s found in %d files, which belong to %d records in the first file: %s" ,
182
+ n , t , fileNum , len (namesOK ), firstFile )
183
+ }
160
184
161
185
if ! quiet {
162
- log .Infof ("extract common seqs from first file: %s" , firstFile )
186
+ log .Infof ("extract seqs from the first file: %s" , firstFile )
163
187
}
164
188
165
189
// extract
@@ -174,11 +198,12 @@ var commonCmd = &cobra.Command{
174
198
checkError (err )
175
199
break
176
200
}
201
+ if fastxReader .IsFastq {
202
+ config .LineWidth = 0
203
+ }
177
204
178
- name := string (record .Name )
179
- if _ , ok := namesOK [name ]; ok && namesOK [name ] > 0 {
205
+ if _ , ok := namesOK [string (record .Name )]; ok {
180
206
record .FormatToWriter (outfh , lineWidth )
181
- namesOK [name ] = 0
182
207
}
183
208
}
184
209
},
0 commit comments