Skip to content

Commit 052ad4a

Browse files
Fix: parsing ident starting with underscore in certain dialects (#1835)
1 parent 2182f7e commit 052ad4a

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

src/tokenizer.rs

+52
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,22 @@ impl<'a> Tokenizer<'a> {
11911191
}
11921192
// numbers and period
11931193
'0'..='9' | '.' => {
1194+
// special case where if ._ is encountered after a word then that word
1195+
// is a table and the _ is the start of the col name.
1196+
// if the prev token is not a word, then this is not a valid sql
1197+
// word or number.
1198+
if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
1199+
if let Some(Token::Word(_)) = prev_token {
1200+
chars.next();
1201+
return Ok(Some(Token::Period));
1202+
}
1203+
1204+
return self.tokenizer_error(
1205+
chars.location(),
1206+
"Unexpected character '_'".to_string(),
1207+
);
1208+
}
1209+
11941210
// Some dialects support underscore as number separator
11951211
// There can only be one at a time and it must be followed by another digit
11961212
let is_number_separator = |ch: char, next_char: Option<char>| {
@@ -4018,4 +4034,40 @@ mod tests {
40184034
],
40194035
);
40204036
}
4037+
4038+
#[test]
4039+
fn tokenize_period_underscore() {
4040+
let sql = String::from("SELECT table._col");
4041+
// a dialect that supports underscores in numeric literals
4042+
let dialect = PostgreSqlDialect {};
4043+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4044+
4045+
let expected = vec![
4046+
Token::make_keyword("SELECT"),
4047+
Token::Whitespace(Whitespace::Space),
4048+
Token::Word(Word {
4049+
value: "table".to_string(),
4050+
quote_style: None,
4051+
keyword: Keyword::TABLE,
4052+
}),
4053+
Token::Period,
4054+
Token::Word(Word {
4055+
value: "_col".to_string(),
4056+
quote_style: None,
4057+
keyword: Keyword::NoKeyword,
4058+
}),
4059+
];
4060+
4061+
compare(expected, tokens);
4062+
4063+
let sql = String::from("SELECT ._123");
4064+
if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
4065+
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
4066+
}
4067+
4068+
let sql = String::from("SELECT ._abc");
4069+
if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
4070+
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
4071+
}
4072+
}
40214073
}

0 commit comments

Comments
 (0)