Skip to content

Commit 00aa28b

Browse files
committed
better .dquote()
1 parent d621c2e commit 00aa28b

File tree

3 files changed

+31
-81
lines changed

3 files changed

+31
-81
lines changed

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ useDynLib(SeqArray,
1212
SEQ_Summary, SEQ_System,
1313
SEQ_VCF_NumLines, SEQ_VCF_Split, SEQ_VCF_Parse,
1414
SEQ_ToVCF_Init, SEQ_ToVCF_Done,
15-
SEQ_Quote, SEQ_GetData, SEQ_ListVarData,
15+
SEQ_GetData, SEQ_ListVarData,
1616
SEQ_Apply_Variant, SEQ_Apply_Sample, SEQ_BApply_Variant,
1717
SEQ_ConvBED2GDS,
1818
SEQ_SelectFlag, SEQ_ResetChrom,

R/Conversion.R

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@
1010
# Create a bgzf file
1111
.bgzf_create <- function(fn) .Call(SEQ_bgzip_create, fn)
1212

13+
# quote a string if it is needed
14+
.dquote <- function(s)
15+
{
16+
if (length(s)==0L) return("")
17+
if (length(s)>1L) s <- s[1L]
18+
q <- grepl("[,;\"\' ]", s)
19+
s <- gsub('\"', '\\\"', s, fixed=TRUE)
20+
s <- gsub("\'", "\\\'", s, fixed=TRUE)
21+
if (q) s <- paste0('"', s, '"')
22+
s # output
23+
}
24+
1325

1426
#######################################################################
1527
# Convert a SeqArray GDS file to a VCF file
@@ -72,10 +84,6 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
7284
}
7385

7486

75-
## double quote text if needed
76-
dq <- function(s, text=FALSE) .Call(SEQ_Quote, s, text)
77-
78-
7987
######################################################
8088
# create an output text file
8189

@@ -168,17 +176,17 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
168176

169177
# reference
170178
if (length(z$reference) > 0L)
171-
txt <- c(txt, paste0("##reference=", z$reference[1L]))
179+
txt <- c(txt, paste0("##reference=", .dquote(z$reference)))
172180

173181
# assembly
174182
if (!is.null(a$vcf.assembly))
175-
txt <- c(txt, paste0("##assembly=", dq(a$vcf.assembly)))
183+
txt <- c(txt, paste0("##assembly=", .dquote(a$vcf.assembly)))
176184

177185
# ALT=<ID=type,Description=description>
178186
for (i in seq_len(nrow(z$allele)))
179187
{
180188
s <- sprintf("##ALT=<ID=%s,Description=%s>",
181-
as.character(z$allele[i,1L]), dq(z$allele[i,2L]))
189+
as.character(z$allele[i,1L]), .dquote(z$allele[i,2L]))
182190
txt <- c(txt, s)
183191
}
184192

@@ -192,7 +200,7 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
192200
{
193201
s <- NULL
194202
for (j in seq_len(ncol(dat)))
195-
s[j] <- paste(nm[j], "=", dq(dat[i,j]), sep="")
203+
s[j] <- paste(nm[j], "=", .dquote(dat[i,j]), sep="")
196204
s <- paste(s, collapse=",")
197205
txt <- c(txt, paste0("##contig=<", s, ">"))
198206
}
@@ -203,12 +211,18 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
203211
{
204212
a <- get.attr.gdsn(index.gdsn(gdsfile,
205213
paste("annotation/info/", nm, sep="")))
214+
if (is.null(a$Number))
215+
{
216+
}
217+
if (is.null(a$Type))
218+
{
219+
}
206220
s <- sprintf("ID=%s,Number=%s,Type=%s,Description=%s",
207-
nm, dq(a$Number), dq(a$Type), dq(a$Description, TRUE))
221+
nm, .dquote(a$Number), .dquote(a$Type), .dquote(a$Description))
208222
if (!is.null(a$Source))
209-
s <- paste(s, ",Source=", dq(a$Source, TRUE), sep="")
223+
s <- paste0(s, ",Source=", .dquote(a$Source))
210224
if (!is.null(a$Version))
211-
s <- paste(s, ",Version=", dq(a$Version, TRUE), sep="")
225+
s <- paste0(s, ",Version=", .dquote(a$Version))
212226
txt <- c(txt, paste0("##INFO=<", s, ">"))
213227
}
214228

@@ -221,22 +235,23 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
221235
for (i in seq_along(id))
222236
{
223237
txt <- c(txt, sprintf("##FILTER=<ID=%s,Description=%s>",
224-
dq(id[i]), dq(dp[i], TRUE)))
238+
.dquote(id[i]), .dquote(dp[i])))
225239
}
226240
}
227241

228242
# FORMAT field
229243
a <- get.attr.gdsn(index.gdsn(gdsfile, "genotype"))
230244
txt <- c(txt, sprintf(
231245
"##FORMAT=<ID=%s,Number=1,Type=String,Description=%s>",
232-
a$VariableName, dq(a$Description, TRUE)))
246+
a$VariableName, .dquote(a$Description)))
233247
for (nm in z$format$ID)
234248
{
235249
a <- get.attr.gdsn(index.gdsn(gdsfile,
236250
paste("annotation/format/", nm, sep="")))
251+
# ToDo
237252
txt <- c(txt, sprintf(
238253
"##FORMAT=<ID=%s,Number=%s,Type=%s,Description=%s>",
239-
nm, dq(a$Number), dq(a$Type), dq(a$Description, TRUE)))
254+
nm, .dquote(a$Number), .dquote(a$Type), .dquote(a$Description)))
240255
}
241256

242257
# others
@@ -248,7 +263,7 @@ seqGDS2VCF <- function(gdsfile, vcf.fn, info.var=NULL, fmt.var=NULL,
248263
{
249264
s <- dat[i,1L]
250265
if (!(s %in% c("fileDate", "source")))
251-
txt <- c(txt, paste0("##", s, "=", dq(dat[i,2L])))
266+
txt <- c(txt, paste0("##", s, "=", .dquote(dat[i,2L])))
252267
}
253268
}
254269

src/ConvGDS2VCF.cpp

Lines changed: 0 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -30,40 +30,6 @@ using namespace std;
3030
namespace SeqArray
3131
{
3232

33-
// double quote the text if it is needed
34-
static string QuoteText(const char *p)
35-
{
36-
string rv;
37-
38-
rv.clear();
39-
bool flag = false;
40-
for (; *p != 0; p ++)
41-
{
42-
switch (*p)
43-
{
44-
case ',': case ';':
45-
flag = true; rv.push_back(*p); break;
46-
case '\"':
47-
flag = true; rv.append("\\\""); break;
48-
case '\'':
49-
flag = true; rv.append("\\\'"); break;
50-
case ' ':
51-
flag = true; rv.push_back(' '); break;
52-
default:
53-
rv.push_back(*p);
54-
}
55-
}
56-
if (flag) // add double quote
57-
{
58-
rv.insert(0, "\"");
59-
rv.push_back('\"');
60-
}
61-
62-
return rv;
63-
}
64-
65-
66-
6733
// ========================================================================
6834

6935
/// used in SEQ_OutVCF4
@@ -551,37 +517,6 @@ using namespace SeqArray;
551517
// Convert to VCF4: GDS -> VCF4
552518
// ========================================================================
553519

554-
/// double quote text if needed
555-
COREARRAY_DLL_EXPORT SEXP SEQ_Quote(SEXP text, SEXP dQuote)
556-
{
557-
SEXP NewText, ans;
558-
PROTECT(NewText = AS_CHARACTER(text));
559-
PROTECT(ans = NEW_CHARACTER(Rf_length(NewText)));
560-
561-
for (int i=0; i < Rf_length(NewText); i++)
562-
{
563-
string tmp = QuoteText(CHAR(STRING_ELT(NewText, i)));
564-
if (LOGICAL(dQuote)[0] == TRUE)
565-
{
566-
if ((tmp[0] != '\"') || (tmp[tmp.size()-1] != '\"'))
567-
{
568-
tmp.insert(0, "\"");
569-
tmp.push_back('\"');
570-
}
571-
}
572-
SET_STRING_ELT(ans, i, Rf_mkChar(tmp.c_str()));
573-
}
574-
575-
UNPROTECT(2);
576-
return ans;
577-
}
578-
579-
580-
581-
582-
// ========================================================================
583-
584-
585520
/// initialize
586521
COREARRAY_DLL_EXPORT SEXP SEQ_ToVCF_Init(SEXP SelDim, SEXP ChrPrefix, SEXP Info,
587522
SEXP Format, SEXP File, SEXP Verbose)

0 commit comments

Comments
 (0)