Skip to content

Commit c2e8464

Browse files
committed
[escaping] process concat
1 parent 78fdd88 commit c2e8464

File tree

4 files changed

+141
-49
lines changed

4 files changed

+141
-49
lines changed

driver/escaping/escape_sequences.cpp

Lines changed: 86 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ using namespace std;
55

66
namespace {
77

8-
string convertFunctionByType(const std::string& typeName) {
8+
string processEscapeSequencesImpl(const StringView seq, Lexer& lex);
9+
10+
string convertFunctionByType(const StringView& typeName) {
911
if (typeName == "SQL_BIGINT") {
1012
return "toInt64";
1113
}
@@ -16,29 +18,53 @@ string convertFunctionByType(const std::string& typeName) {
1618
}
1719

1820
string processFunction(const StringView seq, Lexer& lex) {
19-
if (!lex.Match(Token::CONVERT)) {
20-
return seq.to_string();
21-
}
22-
if (!lex.Match(Token::LPARENT)) {
23-
return seq.to_string();
24-
}
21+
const Token fn(lex.Consume());
2522

26-
Token num = lex.Consume();
27-
if (num.type != Token::NUMBER) {
28-
return seq.to_string();
29-
}
30-
if (!lex.Match(Token::COMMA)) {
31-
return seq.to_string();
32-
}
33-
Token type = lex.Consume();
34-
if (type.type != Token::IDENT) {
35-
return seq.to_string();
36-
}
23+
if (fn.type == Token::CONVERT) {
24+
if (!lex.Match(Token::LPARENT)) {
25+
return seq.to_string();
26+
}
27+
28+
Token num = lex.Consume();
29+
if (num.type != Token::NUMBER) {
30+
return seq.to_string();
31+
}
32+
if (!lex.Match(Token::COMMA)) {
33+
return seq.to_string();
34+
}
35+
Token type = lex.Consume();
36+
if (type.type != Token::IDENT) {
37+
return seq.to_string();
38+
}
3739

38-
string func = convertFunctionByType(type.literal.to_string());
40+
string func = convertFunctionByType(type.literal.to_string());
3941

40-
if (!func.empty()) {
41-
return func + "(" + num.literal.to_string() + ")";
42+
if (!func.empty()) {
43+
if (!lex.Match(Token::RPARENT)) {
44+
return seq.to_string();
45+
}
46+
return func + "(" + num.literal.to_string() + ")";
47+
}
48+
49+
} else if (fn.type == Token::CONCAT) {
50+
string result = "concat";
51+
52+
while (true) {
53+
const Token tok(lex.Peek());
54+
55+
if (tok.type == Token::RCURLY) {
56+
break;
57+
} else if (tok.type == Token::LCURLY) {
58+
result += processEscapeSequencesImpl(seq, lex);
59+
} else if (tok.type == Token::EOS || tok.type == Token::INVALID) {
60+
break;
61+
} else {
62+
result += tok.literal.to_string();
63+
lex.Consume();
64+
}
65+
}
66+
67+
return result;
4268
}
4369

4470
return seq.to_string();
@@ -49,7 +75,7 @@ string processDate(const StringView seq, Lexer& lex) {
4975
if (data.isInvalid()) {
5076
return seq.to_string();
5177
} else {
52-
return string("toDate('") + data.literal.to_string() + "')";
78+
return string("toDate(") + data.literal.to_string() + ")";
5379
}
5480
}
5581

@@ -58,29 +84,47 @@ string processDateTime(const StringView seq, Lexer& lex) {
5884
if (data.isInvalid()) {
5985
return seq.to_string();
6086
} else {
61-
return string("toDateTime('") + data.literal.to_string() + "')";
87+
return string("toDateTime(") + data.literal.to_string() + ")";
6288
}
6389
}
6490

65-
string processEscapeSequences(const StringView seq) {
66-
Lexer lex(seq);
91+
string processEscapeSequencesImpl(const StringView seq, Lexer& lex) {
92+
string result;
6793

68-
Token cmd = lex.Consume();
69-
switch (cmd.type) {
70-
case Token::FN:
71-
return processFunction(seq, lex);
72-
case Token::D:
73-
return processDate(seq, lex);
74-
case Token::TS:
75-
return processDateTime(seq, lex);
76-
77-
// Unimplemented
78-
case Token::T:
79-
default:
80-
break;
94+
if (!lex.Match(Token::LCURLY)) {
95+
return seq.to_string();
8196
}
8297

83-
return seq.to_string();
98+
while (true) {
99+
const Token tok(lex.Consume());
100+
101+
switch (tok.type) {
102+
case Token::FN:
103+
result += processFunction(seq, lex);
104+
break;
105+
106+
case Token::D:
107+
result += processDate(seq, lex);
108+
break;
109+
case Token::TS:
110+
result += processDateTime(seq, lex);
111+
break;
112+
113+
// End of escape sequence
114+
case Token::RCURLY:
115+
return result;
116+
117+
// Unimplemented
118+
case Token::T:
119+
default:
120+
return seq.to_string();
121+
}
122+
};
123+
}
124+
125+
string processEscapeSequences(const StringView seq) {
126+
Lexer lex(seq);
127+
return processEscapeSequencesImpl(seq, lex);
84128
}
85129

86130
} // namespace
@@ -95,12 +139,12 @@ std::string replaceEscapeSequences(const std::string & query)
95139

96140
while (p != end) {
97141
switch (*p) {
98-
case '{': // TODO {fn
142+
case '{':
99143
if (level == 0) {
100144
if (st < p) {
101145
ret += std::string(st, p);
102146
}
103-
st = p + 1;
147+
st = p;
104148
}
105149
level++;
106150
break;
@@ -111,7 +155,7 @@ std::string replaceEscapeSequences(const std::string & query)
111155
return query;
112156
}
113157
if (--level == 0) {
114-
ret += processEscapeSequences(StringView(st, p));
158+
ret += processEscapeSequences(StringView(st, p + 1));
115159
st = p + 1;
116160
}
117161
break;

driver/escaping/lexer.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ static const std::unordered_map<std::string, Token::Type> KEYWORDS = {
1010
{"D", Token::D},
1111
{"T", Token::T},
1212
{"TS", Token::TS},
13+
{"CONCAT", Token::CONCAT},
1314
{"CONVERT", Token::CONVERT}
1415
};
1516

@@ -60,6 +61,14 @@ Token Lexer::Consume(Token::Type expected) {
6061
return Token{Token::INVALID, StringView()};
6162
}
6263

64+
Token Lexer::LookAhead(size_t n) {
65+
while (readed_.size() < n + 1) {
66+
readed_.push_back(NextToken());
67+
}
68+
69+
return readed_[n];
70+
}
71+
6372
bool Lexer::Match(Token::Type expected) {
6473
if (readed_.empty()) {
6574
readed_.push_back(NextToken());
@@ -84,6 +93,10 @@ Token Lexer::MakeToken(const Token::Type type, size_t len) {
8493
return token;
8594
}
8695

96+
Token Lexer::Peek() {
97+
return LookAhead(0);
98+
}
99+
87100
Token Lexer::NextToken() {
88101
for (; cur_ < end_; ++cur_) {
89102
switch (*cur_) {
@@ -111,18 +124,16 @@ Token Lexer::NextToken() {
111124
return MakeToken(Token::COMMA, 1);
112125

113126
case '\'': {
114-
const char* st = ++cur_;
127+
const char* st = cur_;
115128
bool has_slash = false;
116129

117-
for (; cur_ < end_; ++cur_) {
130+
for (++cur_; cur_ < end_; ++cur_) {
118131
if (*cur_ == '\\' && !has_slash) {
119132
has_slash = true;
120133
continue;
121134
}
122135
if (*cur_ == '\'' && !has_slash) {
123-
return Token{
124-
Token::STRING,
125-
StringView(st, ++cur_ - st - 1)};
136+
return Token{Token::STRING, StringView(st, ++cur_)};
126137
}
127138

128139
has_slash = false;
@@ -134,6 +145,20 @@ Token Lexer::NextToken() {
134145
default: {
135146
const char* st = cur_;
136147

148+
if (*cur_ == '`') {
149+
for (++cur_; cur_ < end_; ++cur_) {
150+
if (*cur_ == '`') {
151+
return Token{Token::IDENT, StringView(st, ++cur_)};
152+
}
153+
if (!isalpha(*cur_) && !isdigit(*cur_) && *cur_ != '_')
154+
{
155+
return Token{Token::INVALID, StringView(st, cur_)};
156+
}
157+
}
158+
159+
break;
160+
}
161+
137162
if (isalpha(*cur_) || *cur_ == '_') {
138163
for (++cur_; cur_ < end_; ++cur_) {
139164
if (!isalpha(*cur_) && !isdigit(*cur_) && *cur_ != '_')

driver/escaping/lexer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ struct Token {
2020
D,
2121
T,
2222
TS,
23+
CONCAT,
2324
CONVERT,
2425

2526
// Delimiters
@@ -57,10 +58,16 @@ class Lexer {
5758
/// Returns next token if its type is equal to expected or error otherwise.
5859
Token Consume(Token::Type expected);
5960

61+
/// Look at type of token at position n.
62+
Token LookAhead(size_t n);
63+
6064
/// Checks whether type of next token is equal to expected.
6165
/// Skips token if true.
6266
bool Match(Token::Type expected);
6367

68+
/// Peek next token.
69+
Token Peek();
70+
6471
private:
6572
/// Makes token of length len againts current position.
6673
Token MakeToken(const Token::Type type, size_t len);

driver/ut/escape_sequences_ut.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,23 @@ TEST(EscapeSequencesCase, ParseConvert) {
99
);
1010
}
1111

12+
TEST(EscapeSequencesCase, ParseConcat) {
13+
ASSERT_EQ(
14+
replaceEscapeSequences("SELECT {fn CONCAT('a', 'b')}"),
15+
"SELECT concat('a','b')"
16+
);
17+
18+
ASSERT_EQ(
19+
replaceEscapeSequences("SELECT {fn CONCAT(`table`.`field1`, `table`.`field1`)}"),
20+
"SELECT concat(`table`.`field1`,`table`.`field1`)"
21+
);
22+
23+
ASSERT_EQ(
24+
replaceEscapeSequences("SELECT {fn CONCAT({fn CONCAT(`table`.`field1`, '.')}, `table`.`field1`)}"),
25+
"SELECT concat(concat(`table`.`field1`,'.'),`table`.`field1`)"
26+
);
27+
}
28+
1229
TEST(EscapeSequencesCase, DateTime) {
1330
ASSERT_EQ(
1431
replaceEscapeSequences("SELECT {d '2017-01-01'}"),
@@ -21,10 +38,9 @@ TEST(EscapeSequencesCase, DateTime) {
2138
);
2239
}
2340

24-
2541
TEST(LexerCase, ParseString) {
2642
Token tok = Lexer("'2017-01-01'").Consume();
2743

2844
ASSERT_EQ(tok.type, Token::STRING);
29-
ASSERT_EQ(tok.literal, "2017-01-01");
45+
ASSERT_EQ(tok.literal, "'2017-01-01'");
3046
}

0 commit comments

Comments
 (0)