@@ -124,6 +124,8 @@ For example: "\w" will be wrongly converted to "\[AT]".
124124
125125 outfh .WriteString ("seqID\t patternName\t pattern\t strand\t start\t end\t matched\n " )
126126 var seqRP * seq.Seq
127+ var offset , l int
128+ var loc []int
127129 for _ , file := range files {
128130
129131 fastxReader , err := fastx .NewReader (alphabet , file , idRegexp )
@@ -138,43 +140,56 @@ For example: "\w" will be wrongly converted to "\[AT]".
138140 break
139141 }
140142
143+ l = len (record .Seq .Seq )
141144 if ! onlyPositiveStrand {
142145 seqRP = record .Seq .RevCom ()
143146 }
144147 for pName , re := range regexps {
145- found := re .FindAllSubmatchIndex (record .Seq .Seq , - 1 )
146- if len (found ) > 0 {
147- for _ , loc := range found {
148- outfh .WriteString (fmt .Sprintf ("%s\t %s\t %s\t %s\t %d\t %d\t %s\n " ,
149- record .ID ,
150- pName ,
151- patterns [pName ],
152- "+" ,
153- loc [0 ]+ 1 ,
154- loc [1 ],
155- record .Seq .Seq [loc [0 ]:loc [1 ]]))
148+ offset = 0
149+ for {
150+ loc = re .FindSubmatchIndex (record .Seq .Seq [offset :])
151+ if loc == nil {
152+ break
153+ }
154+ outfh .WriteString (fmt .Sprintf ("%s\t %s\t %s\t %s\t %d\t %d\t %s\n " ,
155+ record .ID ,
156+ pName ,
157+ patterns [pName ],
158+ "+" ,
159+ offset + loc [0 ]+ 1 ,
160+ offset + loc [1 ],
161+ record .Seq .Seq [offset + loc [0 ]:offset + loc [1 ]]))
162+
163+ offset = offset + loc [0 ] + 1
164+ if offset >= len (record .Seq .Seq ) {
165+ break
156166 }
157167 }
158168
159169 if onlyPositiveStrand {
160170 continue
161171 }
162- found = re . FindAllSubmatchIndex ( seqRP . Seq , - 1 )
163- if len ( found ) > 0 {
164- l := len ( seqRP . Seq )
165- tlocs := make ([][] int , len ( found ) )
166- for i , loc := range found {
167- tlocs [ i ] = [] int { l - loc [ 1 ], l - loc [ 0 ]}
172+
173+ offset = 0
174+ for {
175+ loc = re . FindSubmatchIndex ( seqRP . Seq [ offset :] )
176+ if loc == nil {
177+ break
168178 }
169- for _ , loc := range tlocs {
179+ if len ( loc ) > 0 {
170180 outfh .WriteString (fmt .Sprintf ("%s\t %s\t %s\t %s\t %d\t %d\t %s\n " ,
171181 record .ID ,
172182 pName ,
173183 patterns [pName ],
174184 "-" ,
175- loc [0 ]+ 1 ,
176- loc [1 ],
177- record .Seq .SubSeq (loc [0 ]+ 1 , loc [1 ]).RevCom ().Seq ))
185+ l - offset - loc [1 ]+ 1 ,
186+ l - offset - loc [0 ],
187+ record .Seq .SubSeq (l - offset - loc [1 ]+ 1 , l - offset - loc [0 ]).RevCom ().Seq ))
188+ }
189+
190+ offset = offset + loc [0 ] + 1
191+ if offset >= len (record .Seq .Seq ) {
192+ break
178193 }
179194 }
180195 }
0 commit comments