From 311deaa5123876d144196e52172d971c0959461a Mon Sep 17 00:00:00 2001 From: lovasoa Date: Wed, 7 May 2025 01:02:09 +0200 Subject: [PATCH 1/3] Add support for MATCH and REGEXP see https://www.sqlite.org/lang_expr.html#the_like_glob_regexp_match_and_extract_operators --- src/ast/operator.rs | 7 +++++++ src/dialect/mod.rs | 2 ++ src/dialect/sqlite.rs | 24 +++++++++++++++++++++++- tests/sqlparser_sqlite.rs | 28 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 73fe9cf42..d0bb05e3c 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -139,6 +139,11 @@ pub enum BinaryOperator { DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, + /// MATCH operator, e.g. `a MATCH b` (SQLite-specific) + /// See + Match, + /// REGEXP operator, e.g. `a REGEXP b` (SQLite-specific) + Regexp, /// Support for custom operators (such as Postgres custom operators) Custom(String), /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) @@ -350,6 +355,8 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseXor => f.write_str("^"), BinaryOperator::DuckIntegerDivide => f.write_str("//"), BinaryOperator::MyIntegerDivide => f.write_str("DIV"), + BinaryOperator::Match => f.write_str("MATCH"), + BinaryOperator::Regexp => f.write_str("REGEXP"), BinaryOperator::Custom(s) => f.write_str(s), BinaryOperator::PGBitwiseXor => f.write_str("#"), BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b754a04f1..6fbbc7a23 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -619,6 +619,7 @@ pub trait Dialect: Debug + Any { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), _ => Ok(self.prec_unknown()), }, @@ -630,6 +631,7 @@ pub trait Dialect: Debug + Any { Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)), Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)), diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 138c4692c..5a1987f8e 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::ast::Statement; +use crate::ast::BinaryOperator; +use crate::ast::{Expr, Statement}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -70,6 +71,27 @@ impl Dialect for SQLiteDialect { } } + fn parse_infix( + &self, + parser: &mut crate::parser::Parser, + expr: &crate::ast::Expr, + _precedence: u8, + ) -> Option> { + // Parse MATCH and REGEXP as operators + // See + for (keyword, op) in [ + (Keyword::REGEXP, BinaryOperator::Regexp), + (Keyword::MATCH, BinaryOperator::Match), + ] { + if parser.parse_keyword(keyword) { + let left = Box::new(expr.clone()); + let right = Box::new(parser.parse_expr().unwrap()); + return Some(Ok(Expr::BinaryOp { left, op, right })); + } + } + None + } + fn supports_in_empty_list(&self) -> bool { true } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 361c9b051..9bf2a9d1a 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -562,6 +562,34 @@ fn test_dollar_identifier_as_placeholder() { } } +#[test] +fn test_match_operator() { + assert_eq!( + sqlite().verified_expr("col MATCH 'pattern'"), + Expr::BinaryOp { + op: BinaryOperator::Match, + left: Box::new(Expr::Identifier(Ident::new("col"))), + right: Box::new(Expr::Value( + (Value::SingleQuotedString("pattern".to_string())).with_empty_span() + )) + } + ); +} + +#[test] +fn test_regexp_operator() { + assert_eq!( + sqlite().verified_expr("col REGEXP 'pattern'"), + Expr::BinaryOp { + op: BinaryOperator::Regexp, + left: Box::new(Expr::Identifier(Ident::new("col"))), + right: Box::new(Expr::Value( + (Value::SingleQuotedString("pattern".to_string())).with_empty_span() + )) + } + ); +} + fn sqlite() -> TestedDialects { TestedDialects::new(vec![Box::new(SQLiteDialect {})]) } From c4fc3e5486d7a085da02f243284794aa357f933b Mon Sep 17 00:00:00 2001 From: lovasoa Date: Wed, 7 May 2025 11:38:21 +0200 Subject: [PATCH 2/3] nostd fix --- src/dialect/sqlite.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 5a1987f8e..847e0d135 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +#[cfg(not(feature = "std"))] +use alloc::boxed::Box; + use crate::ast::BinaryOperator; use crate::ast::{Expr, Statement}; use crate::dialect::Dialect; From 98c5e0d2c84dc92614ca188aca3b5d0e53cf630f Mon Sep 17 00:00:00 2001 From: lovasoa Date: Wed, 7 May 2025 11:44:07 +0200 Subject: [PATCH 3/3] add entire queries in tests --- tests/sqlparser_sqlite.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 9bf2a9d1a..b759065f3 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -574,6 +574,7 @@ fn test_match_operator() { )) } ); + sqlite().verified_only_select("SELECT * FROM email WHERE email MATCH 'fts5'"); } #[test] @@ -588,6 +589,7 @@ fn test_regexp_operator() { )) } ); + sqlite().verified_only_select(r#"SELECT count(*) FROM messages WHERE msg_text REGEXP '\d+'"#); } fn sqlite() -> TestedDialects {