Skip to content

Commit 1c961c6

Browse files
committed
SQL: allow identifier with placeholder such as '${a_1}'
Close #3169. Some? dialects support shell-like variable substitution. HiveQL is one of such dialects. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution With this change, the SQL parser accepts '${var}' as a part of an identifier. TODO: `var` itself can be extracted as a reference tag. Signed-off-by: Masatake YAMATO <[email protected]>
1 parent c31d572 commit 1c961c6

File tree

4 files changed

+116
-8
lines changed

4 files changed

+116
-8
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--sort=no
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
tb_name${dt} input.sql /^create table database.tb_name${dt} as$/;" t
2+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
3+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}
4+
tb_name${dt}${dt0} input.sql /^create table database.tb_name${dt}${dt0} as$/;" t
5+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
6+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:tb_name${dt}${dt0}
7+
${dt1}tb_name${dt}${dt0} input.sql /^create table database.${dt1}tb_name${dt}${dt0} as$/;" t
8+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
9+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}
10+
${dt1}tb_name${dt}${dt0}Z input.sql /^create table database.${dt1}tb_name${dt}${dt0}Z as$/;" t
11+
col_a input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
12+
col_b input.sql /^select col_a, col_b from database.tb_name;$/;" E table:${dt1}tb_name${dt}${dt0}Z
13+
tb_${dt2}_name input.sql /^create table database.tb_${dt2}_name as$/;" t
14+
col_${key0} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
15+
col_${key1} input.sql /^select col_${key0}, col_${key1} from database.tb_name;$/;" E table:tb_${dt2}_name
16+
tb_${${d}${t:h}${i}}_name input.sql /^create table database.tb_${${d}${t:h}${i}}_name as$/;" t
17+
col_${key${n}${m}a} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
18+
col_${key${m}${n}b} input.sql /^select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;$/;" E table:tb_${${d}${t:h}${i}}_name
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Based on issue #3169 opened by @Appalled
2+
3+
create table database.tb_name${dt} as
4+
select col_a, col_b from database.tb_name;
5+
6+
create table database.tb_name${dt}${dt0} as
7+
select col_a, col_b from database.tb_name;
8+
9+
create table database.${dt1}tb_name${dt}${dt0} as
10+
select col_a, col_b from database.tb_name;
11+
12+
create table database.${dt1}tb_name${dt}${dt0}Z as
13+
select col_a, col_b from database.tb_name;
14+
15+
create table database.tb_${dt2}_name as
16+
select col_${key0}, col_${key1} from database.tb_name;
17+
18+
create table database.tb_${${d}${t:h}${i}}_name as
19+
select col_${key${n}${m}a}, col_${key${m}${n}b} from database.tb_name;

parsers/sql.c

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -684,16 +684,66 @@ static void parseString (vString *const string, const int delimiter, int *promis
684684
}
685685
}
686686

687+
/* Parsing ${foo}.
688+
*
689+
* HiveQL is one of implementation having the variable substitution feature.
690+
* https://cwiki.apache.org/confluence/display/Hive/LanguageManual+VariableSubstitution
691+
*/
692+
static int parseVarSubstSequence (vString *const string, const int firstChar);
693+
static int parseVarSubst (vString *const string, const int firstChar)
694+
{
695+
int c = firstChar;
696+
Assert (c == '$');
697+
vStringPut (string, c);
698+
699+
c = getcFromInputFile ();
700+
if (c != '{')
701+
return c;
702+
vStringPut (string, c);
703+
704+
while ((c = getcFromInputFile ())!= EOF)
705+
{
706+
if (c == '}')
707+
{
708+
vStringPut (string, c);
709+
c = getcFromInputFile ();
710+
return c;
711+
}
712+
else if (c == '$')
713+
{
714+
c = parseVarSubstSequence (string, c);
715+
ungetcToInputFile (c);
716+
}
717+
else
718+
vStringPut (string, c);
719+
}
720+
721+
return c;
722+
}
723+
724+
static int parseVarSubstSequence (vString *const string, const int firstChar)
725+
{
726+
int c;
727+
728+
while ((c = parseVarSubst (string, c)) == '$');
729+
730+
return c;
731+
}
732+
687733
/* Read a C identifier beginning with "firstChar" and places it into "name".
688734
*/
689735
static void parseIdentifier (vString *const string, const int firstChar)
690736
{
691737
int c = firstChar;
692-
Assert (isIdentChar1 (c));
738+
Assert (vStringLength (string) > 0 || isIdentChar1 (c));
693739
do
694740
{
695741
vStringPut (string, c);
696742
c = getcFromInputFile ();
743+
744+
/* Handle ${var} in HiveQL. */
745+
if (c == '$')
746+
c = parseVarSubstSequence (string, c);
697747
} while (isIdentChar (c));
698748
if (!isspace (c))
699749
ungetcToInputFile (c); /* unget non-identifier character */
@@ -937,15 +987,23 @@ static void readToken (tokenInfo *const token)
937987
}
938988

939989
case '$':
940-
token->type = parseDollarQuote (token->string, c, &token->promise);
941-
token->lineNumber = getInputLineNumber ();
942-
token->filePosition = getInputFilePosition ();
943-
break;
990+
{
991+
int c0 = getcFromInputFile ();
992+
ungetcToInputFile (c0);
993+
if (c0 != '{')
994+
{
995+
token->type = parseDollarQuote (token->string, c, &token->promise);
996+
token->lineNumber = getInputLineNumber ();
997+
token->filePosition = getInputFilePosition ();
998+
break;
999+
}
1000+
c = parseVarSubstSequence (token->string, c);
1001+
/* FALL THROUGH */
1002+
}
9441003

9451004
default:
946-
if (! isIdentChar1 (c))
947-
token->type = TOKEN_UNDEFINED;
948-
else
1005+
if ( isIdentChar1 (c)
1006+
|| (vStringLength (token->string) > 0 && isIdentChar (c)))
9491007
{
9501008
parseIdentifier (token->string, c);
9511009
token->lineNumber = getInputLineNumber ();
@@ -962,6 +1020,18 @@ static void readToken (tokenInfo *const token)
9621020
else
9631021
token->type = TOKEN_KEYWORD;
9641022
}
1023+
else if (vStringLength (token->string) > 0)
1024+
{
1025+
ungetcToInputFile (c);
1026+
1027+
/* token->string may be ${var}.
1028+
* We regard ${var} as an identifier. */
1029+
token->type = TOKEN_IDENTIFIER;
1030+
token->lineNumber = getInputLineNumber ();
1031+
token->filePosition = getInputFilePosition ();
1032+
}
1033+
else
1034+
token->type = TOKEN_UNDEFINED;
9651035
break;
9661036
}
9671037
}

0 commit comments

Comments
 (0)