Skip to content

Commit 5b43ccc

Browse files
committed
seqkit: add a global flag: --skip-file-check
1 parent 89f35b0 commit 5b43ccc

38 files changed

+41
-35
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
- [SeqKit v2.10.0](https://github.com/shenwei356/seqkit/releases/tag/v2.10.0) - 2025-xx-xx
22
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v2.10.0/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v2.10.0)
3+
- `seqkit`:
4+
- add a global flag `--skip-file-check`: skip input file checking when given a file list if you believe these files do exist.
5+
It helps to reduce file checking time when given a huge number of sequence files.
36
- `seqkit split2`:
47
- fix prefix checking when paired-end files are given. [#512](https://github.com/shenwei356/seqkit/issues/512)
58
- `seqkit stat`:

seqkit/cmd/amplicon.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ Examples:
121121
runtime.GOMAXPROCS(config.Threads)
122122
bwt.CheckEndSymbol = false
123123

124-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
124+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
125125

126126
outfh, err := xopen.Wopen(outFile)
127127
checkError(err)

seqkit/cmd/bam.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ var bamCmd = &cobra.Command{
501501
outFile := config.OutFile
502502
runtime.GOMAXPROCS(config.Threads)
503503

504-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
504+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
505505

506506
mapQual := getFlagInt(cmd, "map-qual")
507507
field := getFlagString(cmd, "field")

seqkit/cmd/common.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ Note:
9696
checkError(fmt.Errorf("flag -s (--by-seq) needed when using -e (--check-embedded-seqs)"))
9797
}
9898

99-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
99+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
100100

101101
if len(files) < 2 {
102102
checkError(errors.New("at least 2 files needed"))

seqkit/cmd/concat.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Attention:
7070
fill := fillBase != ""
7171
separator := getFlagString(cmd, "separator")
7272

73-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
73+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7474

7575
if len(files) < 2 {
7676
checkError(errors.New("at least 2 files needed"))

seqkit/cmd/convert.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ var convertCmd = &cobra.Command{
6565
seq.NMostCommonThreshold = getFlagPositiveInt(cmd, "thresh-B-in-n-most-common")
6666
threshIllumina1p5Frac := getFlagFloat64(cmd, "thresh-illumina1.5-frac")
6767

68-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
68+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
6969

7070
outfh, err := xopen.Wopen(outFile)
7171
checkError(err)

seqkit/cmd/dup.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ You may need "seqkit rename" to make the the sequence IDs unique.
5454

5555
times := getFlagPositiveInt(cmd, "times")
5656

57-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
57+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
5858

5959
outfh, err := xopen.Wopen(outFile)
6060
checkError(err)

seqkit/cmd/fa2fq.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Attention:
6262
}
6363
onlyPositiveStrand := getFlagBool(cmd, "only-positive-strand")
6464

65-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
65+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
6666

6767
records, err := fastx.GetSeqsMap(fileFasta, seq.Unlimit, config.Threads, 10, "")
6868
checkError(err)

seqkit/cmd/fish.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ Attention:
162162
seq.ValidateSeq = true
163163
}
164164

165-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
165+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
166166

167167
var alns []*AlignedSeq
168168
if len(files) == 0 {

seqkit/cmd/fq2fa.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var fq2faCmd = &cobra.Command{
4949
seq.ValidateSeq = false
5050
runtime.GOMAXPROCS(config.Threads)
5151

52-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
52+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
5353

5454
outfh, err := xopen.Wopen(outFile)
5555
checkError(err)

seqkit/cmd/fx2tab.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Attention:
6060
seq.ValidateSeq = false
6161
runtime.GOMAXPROCS(config.Threads)
6262

63-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
63+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
6464

6565
onlyID := getFlagBool(cmd, "only-id")
6666
printLength := getFlagBool(cmd, "length")

seqkit/cmd/grep.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ Examples:
9898

9999
bwt.CheckEndSymbol = false
100100

101-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
101+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
102102

103103
justCount := getFlagBool(cmd, "count")
104104
pattern := getFlagStringSlice(cmd, "pattern")

seqkit/cmd/head-genome.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Attention:
7070

7171
minWords := getFlagPositiveInt(cmd, "mini-common-words")
7272

73-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
73+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7474

7575
outfh, err := xopen.Wopen(outFile)
7676
checkError(err)

seqkit/cmd/head.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ For returning the last N records, use:
5454

5555
number := getFlagPositiveInt(cmd, "number")
5656

57-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
57+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
5858

5959
outfh, err := xopen.Wopen(outFile)
6060
checkError(err)

seqkit/cmd/helper.go

+2
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ type Config struct {
245245
AlphabetGuessSeqLength int
246246
ValidateSeqLength int
247247
CompressionLevel int
248+
SkipFileCheck bool
248249
}
249250

250251
func getConfigs(cmd *cobra.Command) Config {
@@ -294,6 +295,7 @@ func getConfigs(cmd *cobra.Command) Config {
294295
Quiet: getFlagBool(cmd, "quiet"),
295296
AlphabetGuessSeqLength: getFlagAlphabetGuessSeqLength(cmd, "alphabet-guess-seq-length"),
296297
CompressionLevel: level,
298+
SkipFileCheck: getFlagBool(cmd, "skip-file-check"),
297299
}
298300

299301
}

seqkit/cmd/locate.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Attention:
7777

7878
bwt.CheckEndSymbol = false
7979

80-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
80+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
8181

8282
pattern := getFlagStringSlice(cmd, "pattern")
8383
patternFile := getFlagString(cmd, "pattern-file")

seqkit/cmd/merge-slides.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ Output (BED3 format):
9494
reQuery, err := regexp.Compile(reQueryStr)
9595
checkError(err)
9696

97-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
97+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
9898

9999
if !config.Quiet {
100100
if len(files) == 1 && isStdin(files[0]) {

seqkit/cmd/mutate.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Examples:
7070
runtime.GOMAXPROCS(config.Threads)
7171
quiet := config.Quiet
7272

73-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
73+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7474
var err error
7575

7676
mPoints := []_mutatePoint{}

seqkit/cmd/range.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ Examples:
9090
if start > 0 && end < 0 && end != -1 {
9191
checkError(fmt.Errorf("not supported range: %d:%d, the end needs to be -1 when start > 0 and end < 0", start, end))
9292
}
93-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
93+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
9494

9595
outfh, err := xopen.Wopen(outFile)
9696
checkError(err)

seqkit/cmd/rename.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ Example:
7272
seq.ValidateSeq = false
7373
runtime.GOMAXPROCS(config.Threads)
7474

75-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
75+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7676

7777
byName := getFlagBool(cmd, "by-name")
7878
mOutputs := getFlagBool(cmd, "multiple-outfiles")

seqkit/cmd/replace.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ Filtering records to edit:
324324

325325
// -------------------
326326

327-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
327+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
328328

329329
outfh, err := xopen.Wopen(outFile)
330330
checkError(err)

seqkit/cmd/restart.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ Examples
6666
seq.ValidateSeq = false
6767
runtime.GOMAXPROCS(config.BufferSize)
6868

69-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
69+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7070

7171
newstart := getFlagInt(cmd, "new-start")
7272
if newstart == 0 {

seqkit/cmd/rmdup.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ Attention:
8181
checkError(fmt.Errorf("flag -s (--by-seq) needed when using -P (--only-positive-strand)"))
8282
}
8383

84-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
84+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
8585

8686
outfh, err := xopen.Wopen(outFile)
8787
checkError(err)

seqkit/cmd/root.go

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ func init() {
133133
RootCmd.PersistentFlags().BoolP("quiet", "", false, "be quiet and do not show extra information")
134134
RootCmd.PersistentFlags().IntP("alphabet-guess-seq-length", "", 10000, "length of sequence prefix of the first FASTA record based on which seqkit guesses the sequence type (0 for whole seq)")
135135
RootCmd.PersistentFlags().StringP("infile-list", "X", "", "file of input files list (one file per line), if given, they are appended to files from cli arguments")
136+
RootCmd.PersistentFlags().BoolP("skip-file-check", "", false, `skip input file checking when given a file list if you believe these files do exist`)
136137
RootCmd.PersistentFlags().IntP("compress-level", "", -1, `compression level for gzip, zstd, xz and bzip2. type "seqkit -h" for the range and default value for each format`)
137138

138139
RootCmd.CompletionOptions.DisableDefaultCmd = true

seqkit/cmd/sample.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Attention:
6060
seq.ValidateSeq = false
6161
runtime.GOMAXPROCS(config.Threads)
6262

63-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
63+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
6464

6565
seed := getFlagInt64(cmd, "rand-seed")
6666
twoPass := getFlagBool(cmd, "two-pass")

seqkit/cmd/sana.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ Sana currently supports this FASTQ dialect:
8787
allowGaps := getFlagBool(cmd, "allow-gaps")
8888
runtime.GOMAXPROCS(config.Threads)
8989

90-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
90+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
9191

9292
outfh, err := xopen.Wopen(outFile)
9393
checkError(err)

seqkit/cmd/seq.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ var seqCmd = &cobra.Command{
131131
checkError(fmt.Errorf("could not give both flags -l (--lower-case) and -u (--upper-case)"))
132132
}
133133

134-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
134+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
135135

136136
var seqCol *SeqColorizer
137137
if color {

seqkit/cmd/shuffle.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Attention:
7171
fai.MapWholeFile = false
7272
runtime.GOMAXPROCS(config.Threads)
7373

74-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
74+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7575

7676
seed := getFlagInt64(cmd, "rand-seed")
7777
twoPass := getFlagBool(cmd, "two-pass")

seqkit/cmd/sliding.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ var slidingCmd = &cobra.Command{
5050
seq.ValidateSeq = false
5151
runtime.GOMAXPROCS(config.BufferSize)
5252

53-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
53+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
5454

5555
greedy := getFlagBool(cmd, "greedy")
5656
circular := getFlagBool(cmd, "circular-genome") || getFlagBool(cmd, "circular")

seqkit/cmd/sort.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Attention:
7474
fai.MapWholeFile = false
7575
runtime.GOMAXPROCS(config.Threads)
7676

77-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
77+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
7878

7979
inNaturalOrder := getFlagBool(cmd, "natural-order")
8080
bySeq := getFlagBool(cmd, "by-seq")

seqkit/cmd/split.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ Examples:
8787
fai.MapWholeFile = false
8888
runtime.GOMAXPROCS(config.Threads)
8989

90-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
90+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
9191

9292
if len(files) > 1 {
9393
checkError(fmt.Errorf("no more than one file should be given"))

seqkit/cmd/split2.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ If you want to cut a sequence into multiple segments.
8989
fai.MapWholeFile = false
9090
runtime.GOMAXPROCS(config.Threads)
9191

92-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
92+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
9393

9494
if len(files) > 1 {
9595
checkError(fmt.Errorf("no more than one file should be given"))

seqkit/cmd/stat.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ Tips:
136136
}
137137
}
138138

139-
files := getFileListFromArgsAndFile(cmd, args, !skipFileCheck, "infile-list", !skipFileCheck)
139+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !(skipFileCheck || config.SkipFileCheck))
140140

141141
style := &stable.TableStyle{
142142
Name: "plain",

seqkit/cmd/subseq.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ Examples:
7777
Threads = config.Threads
7878
runtime.GOMAXPROCS(config.Threads)
7979

80-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
80+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
8181
chrs := getFlagStringSlice(cmd, "chr")
8282
chrs2 := make([]string, len(chrs))
8383
for i, chr := range chrs {

seqkit/cmd/sum.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ Examples:
109109
rna2dna := getFlagBool(cmd, "rna2dna")
110110
singleStrand := getFlagBool(cmd, "single-strand")
111111

112-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
112+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
113113

114114
// process bar
115115
var pbs *mpb.Progress

seqkit/cmd/tab2fx.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ var tab2faCmd = &cobra.Command{
4646
outFile := config.OutFile
4747
runtime.GOMAXPROCS(config.Threads)
4848

49-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
49+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
5050

5151
commentPrefixes := getFlagStringSlice(cmd, "comment-line-prefix")
5252
bufferSizeS := getFlagString(cmd, "buffer-size")

seqkit/cmd/translate.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ Translate Tables/Genetic Codes:
162162
return
163163
}
164164

165-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
165+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
166166

167167
var record *fastx.Record
168168
var _seq *seq.Seq

seqkit/cmd/watch.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ var watchCmd = &cobra.Command{
154154
seq.ValidateSeq = true
155155
}
156156

157-
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", true)
157+
files := getFileListFromArgsAndFile(cmd, args, true, "infile-list", !config.SkipFileCheck)
158158

159159
outfh, err := xopen.Wopen(outFile)
160160
checkError(err)

0 commit comments

Comments
 (0)