1010# Create a bgzf file
1111.bgzf_create <- function (fn ) .Call(SEQ_bgzip_create , fn )
1212
13+ # quote a string if it is needed
14+ .dquote <- function (s )
15+ {
16+ if (length(s )== 0L ) return (" " )
17+ if (length(s )> 1L ) s <- s [1L ]
18+ q <- grepl(" [,;\"\' ]" , s )
19+ s <- gsub(' \" ' , ' \\\" ' , s , fixed = TRUE )
20+ s <- gsub(" \' " , " \\\' " , s , fixed = TRUE )
21+ if (q ) s <- paste0(' "' , s , ' "' )
22+ s # output
23+ }
24+
1325
1426# ######################################################################
1527# Convert a SeqArray GDS file to a VCF file
@@ -72,10 +84,6 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
7284 }
7385
7486
75- # # double quote text if needed
76- dq <- function (s , text = FALSE ) .Call(SEQ_Quote , s , text )
77-
78-
7987 # #####################################################
8088 # create an output text file
8189
@@ -168,17 +176,17 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
168176
169177 # reference
170178 if (length(z $ reference ) > 0L )
171- txt <- c(txt , paste0(" ##reference=" , z $ reference [ 1L ] ))
179+ txt <- c(txt , paste0(" ##reference=" , .dquote( z $ reference ) ))
172180
173181 # assembly
174182 if (! is.null(a $ vcf.assembly ))
175- txt <- c(txt , paste0(" ##assembly=" , dq (a $ vcf.assembly )))
183+ txt <- c(txt , paste0(" ##assembly=" , .dquote (a $ vcf.assembly )))
176184
177185 # ALT=<ID=type,Description=description>
178186 for (i in seq_len(nrow(z $ allele )))
179187 {
180188 s <- sprintf(" ##ALT=<ID=%s,Description=%s>" ,
181- as.character(z $ allele [i ,1L ]), dq (z $ allele [i ,2L ]))
189+ as.character(z $ allele [i ,1L ]), .dquote (z $ allele [i ,2L ]))
182190 txt <- c(txt , s )
183191 }
184192
@@ -192,7 +200,7 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
192200 {
193201 s <- NULL
194202 for (j in seq_len(ncol(dat )))
195- s [j ] <- paste(nm [j ], " =" , dq (dat [i ,j ]), sep = " " )
203+ s [j ] <- paste(nm [j ], " =" , .dquote (dat [i ,j ]), sep = " " )
196204 s <- paste(s , collapse = " ," )
197205 txt <- c(txt , paste0(" ##contig=<" , s , " >" ))
198206 }
@@ -203,12 +211,18 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
203211 {
204212 a <- get.attr.gdsn(index.gdsn(gdsfile ,
205213 paste(" annotation/info/" , nm , sep = " " )))
214+ if (is.null(a $ Number ))
215+ {
216+ }
217+ if (is.null(a $ Type ))
218+ {
219+ }
206220 s <- sprintf(" ID=%s,Number=%s,Type=%s,Description=%s" ,
207- nm , dq (a $ Number ), dq (a $ Type ), dq (a $ Description , TRUE ))
221+ nm , .dquote (a $ Number ), .dquote (a $ Type ), .dquote (a $ Description ))
208222 if (! is.null(a $ Source ))
209- s <- paste (s , " ,Source=" , dq (a $ Source , TRUE ), sep = " " )
223+ s <- paste0 (s , " ,Source=" , .dquote (a $ Source ) )
210224 if (! is.null(a $ Version ))
211- s <- paste (s , " ,Version=" , dq (a $ Version , TRUE ), sep = " " )
225+ s <- paste0 (s , " ,Version=" , .dquote (a $ Version ) )
212226 txt <- c(txt , paste0(" ##INFO=<" , s , " >" ))
213227 }
214228
@@ -221,22 +235,23 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
221235 for (i in seq_along(id ))
222236 {
223237 txt <- c(txt , sprintf(" ##FILTER=<ID=%s,Description=%s>" ,
224- dq (id [i ]), dq (dp [i ], TRUE )))
238+ .dquote (id [i ]), .dquote (dp [i ])))
225239 }
226240 }
227241
228242 # FORMAT field
229243 a <- get.attr.gdsn(index.gdsn(gdsfile , " genotype" ))
230244 txt <- c(txt , sprintf(
231245 " ##FORMAT=<ID=%s,Number=1,Type=String,Description=%s>" ,
232- a $ VariableName , dq (a $ Description , TRUE )))
246+ a $ VariableName , .dquote (a $ Description )))
233247 for (nm in z $ format $ ID )
234248 {
235249 a <- get.attr.gdsn(index.gdsn(gdsfile ,
236250 paste(" annotation/format/" , nm , sep = " " )))
251+ # ToDo
237252 txt <- c(txt , sprintf(
238253 " ##FORMAT=<ID=%s,Number=%s,Type=%s,Description=%s>" ,
239- nm , dq (a $ Number ), dq (a $ Type ), dq (a $ Description , TRUE )))
254+ nm , .dquote (a $ Number ), .dquote (a $ Type ), .dquote (a $ Description )))
240255 }
241256
242257 # others
@@ -248,7 +263,7 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
248263 {
249264 s <- dat [i ,1L ]
250265 if (! (s %in% c(" fileDate" , " source" )))
251- txt <- c(txt , paste0(" ##" , s , " =" , dq (dat [i ,2L ])))
266+ txt <- c(txt , paste0(" ##" , s , " =" , .dquote (dat [i ,2L ])))
252267 }
253268 }
254269
0 commit comments