Skip to content

Commit b9af09c

Browse files
authored
Translate MySQL CONVERT() expressions to SQLite (#356)
## Summary Fixes #344. MySQL's `CONVERT()` function was passed through to SQLite unchanged, causing queries like `SELECT CONVERT('Customer' USING utf8mb4) COLLATE utf8mb4_bin` to fail with a syntax error. This PR has two parts: 1. **Extract `simpleExprBody` as a named grammar rule.** The `%simpleExpr_factored` fragment is promoted to a real `simpleExprBody` rule so it creates its own AST node. This separates the core expression (CONVERT, CAST, literals, etc.) from trailing modifiers (COLLATE, CONCAT_PIPES) that remain in the parent `simpleExpr` node, making individual expression handlers simpler. 2. **Translate `CONVERT()` expressions.** Adds explicit handling for both forms of `CONVERT()` in the AST-based driver: - **`CONVERT(expr, type)`** is translated to `CAST(expr AS type)`, reusing the existing `castType` translation. - **`CONVERT(expr USING charset)`** is reduced to just the expression, as SQLite stores all text as UTF-8 and charset conversions are not needed. ## Test plan - [x] Added `testConvert` translation test — verifies SQL-to-SQL translation for both CONVERT forms and COLLATE. - [x] Added `testConvertExpression` — verifies CONVERT with type casting (BINARY, CHAR, SIGNED, UNSIGNED, DECIMAL, DATE). - [x] Added `testConvertUsingExpression` — verifies CONVERT with charset conversion (utf8mb4, utf8, latin1). - [x] Added `testConvertUsingWithCollate` — verifies the exact query from #344. - [x] Added `testConvertWithColumnReferences` — verifies CONVERT with column references in SELECT, WHERE, and ORDER BY. - [x] Full test suite passes (640 tests, 0 failures). - [x] PHPCS passes.
1 parent ee854d2 commit b9af09c

File tree

6 files changed

+157
-16
lines changed

6 files changed

+157
-16
lines changed

.github/workflows/wp-tests-phpunit-run.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ const expectedFailures = [
5858
'Tests_DB_Charset::test_strip_invalid_text with data set #32',
5959
'Tests_DB_Charset::test_strip_invalid_text with data set #33',
6060
'Tests_DB_Charset::test_strip_invalid_text with data set #34',
61-
'Tests_DB_Charset::test_strip_invalid_text with data set #35',
62-
'Tests_DB_Charset::test_strip_invalid_text with data set #36',
63-
'Tests_DB_Charset::test_strip_invalid_text with data set #37',
6461
'Tests_DB_Charset::test_strip_invalid_text with data set #39',
6562
'Tests_DB_Charset::test_strip_invalid_text with data set #40',
6663
'Tests_DB_Charset::test_strip_invalid_text with data set #41',

grammar-tools/MySQLParser.g4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,13 +2961,13 @@ bitExpr: simpleExpr %bitExpr_rr*;
29612961

29622962
/*
29632963
* @CHANGED:
2964-
* Factored left recursion.
2964+
* Factored left recursion and introduced "simpleExprBody" for easier processing.
29652965
*/
29662966
simpleExpr: %simpleExpr_collate (CONCAT_PIPES_SYMBOL %simpleExpr_collate)*;
29672967

2968-
%simpleExpr_collate: %simpleExpr_factored (COLLATE_SYMBOL textOrIdentifier)?;
2968+
%simpleExpr_collate: simpleExprBody (COLLATE_SYMBOL textOrIdentifier)?;
29692969

2970-
%simpleExpr_factored:
2970+
simpleExprBody:
29712971
literal # simpleExprLiteral
29722972
| sumExpr # simpleExprSum
29732973
| variable (equal expr)? # simpleExprVariable

packages/mysql-on-sqlite/src/mysql/mysql-grammar.php

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

packages/mysql-on-sqlite/src/sqlite/class-wp-pdo-mysql-on-sqlite.php

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3691,8 +3691,8 @@ private function translate( $node ): ?string {
36913691
return null;
36923692
}
36933693
return $this->translate_sequence( $node->get_children() );
3694-
case 'simpleExpr':
3695-
return $this->translate_simple_expr( $node );
3694+
case 'simpleExprBody':
3695+
return $this->translate_simple_expr_body( $node );
36963696
case 'predicateOperations':
36973697
$token = $node->get_first_child_token();
36983698
if ( WP_MySQL_Lexer::LIKE_SYMBOL === $token->id ) {
@@ -3790,6 +3790,8 @@ private function translate( $node ): ?string {
37903790
return 'TEXT';
37913791
case WP_MySQL_Lexer::SIGNED_SYMBOL:
37923792
case WP_MySQL_Lexer::UNSIGNED_SYMBOL:
3793+
// @TODO: Emulate UNSIGNED semantics. MySQL wraps negative
3794+
// values, but SQLite has no unsigned integer type.
37933795
return 'INTEGER';
37943796
case WP_MySQL_Lexer::DECIMAL_SYMBOL:
37953797
case WP_MySQL_Lexer::FLOAT_SYMBOL:
@@ -4204,13 +4206,13 @@ private function translate_query_specification( WP_Parser_Node $node ): string {
42044206
}
42054207

42064208
/**
4207-
* Translate a MySQL simple expression to SQLite.
4209+
* Translate a MySQL simple expression body to SQLite.
42084210
*
4209-
* @param WP_Parser_Node $node The "simpleExpr" AST node.
4211+
* @param WP_Parser_Node $node The "simpleExprBody" AST node.
42104212
* @return string The translated value.
42114213
* @throws WP_SQLite_Driver_Exception When the translation fails.
42124214
*/
4213-
private function translate_simple_expr( WP_Parser_Node $node ): string {
4215+
private function translate_simple_expr_body( WP_Parser_Node $node ): string {
42144216
$token = $node->get_first_child_token();
42154217

42164218
// Translate "VALUES(col)" to "excluded.col" in ON DUPLICATE KEY UPDATE.
@@ -4221,6 +4223,28 @@ private function translate_simple_expr( WP_Parser_Node $node ): string {
42214223
);
42224224
}
42234225

4226+
/**
4227+
* Translate MySQL CONVERT() expression.
4228+
*
4229+
* MySQL supports two forms of CONVERT():
4230+
* 1. CONVERT(expr, type): Equivalent to CAST(expr AS type).
4231+
* 2. CONVERT(expr USING charset): Converts the character set.
4232+
*/
4233+
if ( null !== $token && WP_MySQL_Lexer::CONVERT_SYMBOL === $token->id ) {
4234+
$expr = $this->translate( $node->get_first_child_node( 'expr' ) );
4235+
$cast_type = $node->get_first_child_node( 'castType' );
4236+
4237+
if ( null !== $cast_type ) {
4238+
// CONVERT(expr, type): Translate to cast expression.
4239+
// TODO: Emulate UNSIGNED cast. SQLite has no unsigned integer type.
4240+
return sprintf( 'CAST(%s AS %s)', $expr, $this->translate( $cast_type ) );
4241+
} else {
4242+
// CONVERT(expr USING charset): Keep "expr" as is (no SQLite support).
4243+
// TODO: Consider rejecting UTF-8-incompatible charasets.
4244+
return $expr;
4245+
}
4246+
}
4247+
42244248
return $this->translate_sequence( $node->get_children() );
42254249
}
42264250

@@ -5350,12 +5374,12 @@ private function store_last_column_meta_from_statement( PDOStatement $stmt ): vo
53505374
private function unnest_parenthesized_expression( WP_Parser_Node $node ): WP_Parser_Node {
53515375
$children = $node->get_children();
53525376

5353-
// Descend the "expr -> boolPri -> predicate -> bitExpr -> simpleExpr" tree,
5354-
// when on each level we have only a single child node (expression nesting).
5377+
// Descend the "expr -> boolPri -> predicate -> bitExpr -> simpleExpr" -> "simpleExprBody"
5378+
// tree, when on each level we have only a single child node (expression nesting).
53555379
if (
53565380
1 === count( $children )
53575381
&& $children[0] instanceof WP_Parser_Node
5358-
&& in_array( $children[0]->rule_name, array( 'expr', 'boolPri', 'predicate', 'bitExpr', 'simpleExpr' ), true )
5382+
&& in_array( $children[0]->rule_name, array( 'expr', 'boolPri', 'predicate', 'bitExpr', 'simpleExpr', 'simpleExprBody' ), true )
53595383
) {
53605384
$unnested = $this->unnest_parenthesized_expression( $children[0] );
53615385
return $unnested === $children[0] ? $node : $unnested;

packages/mysql-on-sqlite/tests/WP_SQLite_Driver_Tests.php

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9971,7 +9971,8 @@ public function testCastExpression(): void {
99719971
'expr_5' => 'abc',
99729972
'expr_6' => 'abc', // 'ab' In MySQL
99739973
'expr_7' => '-10',
9974-
'expr_8' => '-10', // 18446744073709551606 in MySQL
9974+
// @TODO: Emulate UNSIGNED cast. MySQL returns 18446744073709551606 (2^64 - 10).
9975+
'expr_8' => '-10',
99759976
'expr_9' => '2025-10-05 14:05:28', // 2025-10-05 in MySQL
99769977
'expr_10' => '2025-10-05 14:05:28', // 14:05:28 in MySQL
99779978
'expr_11' => '2025-10-05 14:05:28',
@@ -9986,6 +9987,90 @@ public function testCastExpression(): void {
99869987
);
99879988
}
99889989

9990+
public function testConvertExpression(): void {
9991+
// CONVERT(expr, type) should behave like CAST(expr AS type).
9992+
$result = $this->assertQuery(
9993+
"SELECT
9994+
CONVERT('abc', BINARY) AS expr_1,
9995+
CONVERT('abc', CHAR) AS expr_2,
9996+
CONVERT('-10', SIGNED) AS expr_3,
9997+
CONVERT('-10', UNSIGNED) AS expr_4,
9998+
CONVERT('123.456', DECIMAL) AS expr_5,
9999+
CONVERT('2025-10-05', DATE) AS expr_6
10000+
"
10001+
);
10002+
10003+
$this->assertEquals(
10004+
array(
10005+
(object) array(
10006+
'expr_1' => 'abc',
10007+
'expr_2' => 'abc',
10008+
'expr_3' => '-10',
10009+
// @TODO: Emulate UNSIGNED cast. MySQL returns 18446744073709551606 (2^64 - 10).
10010+
'expr_4' => '-10',
10011+
'expr_5' => '123.456',
10012+
'expr_6' => '2025-10-05',
10013+
),
10014+
),
10015+
$result
10016+
);
10017+
}
10018+
10019+
public function testConvertUsingExpression(): void {
10020+
// CONVERT(expr USING charset) converts character set.
10021+
// In SQLite, all text is UTF-8 — the conversion is a no-op.
10022+
$result = $this->assertQuery(
10023+
"SELECT
10024+
CONVERT('Customer' USING utf8mb4) AS expr_1,
10025+
CONVERT('test' USING utf8) AS expr_2,
10026+
CONVERT('data' USING latin1) AS expr_3
10027+
"
10028+
);
10029+
10030+
$this->assertEquals(
10031+
array(
10032+
(object) array(
10033+
'expr_1' => 'Customer',
10034+
'expr_2' => 'test',
10035+
'expr_3' => 'data',
10036+
),
10037+
),
10038+
$result
10039+
);
10040+
}
10041+
10042+
public function testConvertUsingWithCollate(): void {
10043+
$result = $this->assertQuery(
10044+
"SELECT CONVERT('Customer' USING utf8mb4) COLLATE utf8mb4_bin AS val"
10045+
);
10046+
10047+
$this->assertEquals(
10048+
array(
10049+
(object) array( 'val' => 'Customer' ),
10050+
),
10051+
$result
10052+
);
10053+
}
10054+
10055+
public function testConvertWithColumnReferences(): void {
10056+
$this->assertQuery( 'CREATE TABLE t (val VARCHAR(255), num VARCHAR(255))' );
10057+
$this->assertQuery( "INSERT INTO t (val, num) VALUES ('hello', '-42')" );
10058+
10059+
$result = $this->assertQuery(
10060+
'SELECT CONVERT(val, BINARY) AS v1, CONVERT(val USING utf8mb4) AS v2
10061+
FROM t WHERE CONVERT(num, SIGNED) < 0 ORDER BY CONVERT(val USING utf8mb4)'
10062+
);
10063+
$this->assertEquals(
10064+
array(
10065+
(object) array(
10066+
'v1' => 'hello',
10067+
'v2' => 'hello',
10068+
),
10069+
),
10070+
$result
10071+
);
10072+
}
10073+
998910074
public function testInsertWithoutInto(): void {
999010075
$this->assertQuery( 'CREATE TABLE t (id INT PRIMARY KEY, name VARCHAR(255))' );
999110076

packages/mysql-on-sqlite/tests/WP_SQLite_Driver_Translation_Tests.php

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,41 @@ public function testSelect(): void {
101101
);
102102
}
103103

104+
public function testConvert(): void {
105+
// CONVERT(expr, type) → CAST(expr AS type)
106+
$this->assertQuery(
107+
"SELECT CAST('abc' AS BLOB) AS `CONVERT('abc', BINARY)`",
108+
"SELECT CONVERT('abc', BINARY)"
109+
);
110+
111+
$this->assertQuery(
112+
"SELECT CAST('abc' AS TEXT) AS `CONVERT('abc', CHAR)`",
113+
"SELECT CONVERT('abc', CHAR)"
114+
);
115+
116+
$this->assertQuery(
117+
"SELECT CAST('-10' AS INTEGER) AS `CONVERT('-10', SIGNED)`",
118+
"SELECT CONVERT('-10', SIGNED)"
119+
);
120+
121+
// CONVERT(expr USING charset) → expr
122+
$this->assertQuery(
123+
"SELECT 'Customer' AS `Customer`",
124+
"SELECT CONVERT('Customer' USING utf8mb4)"
125+
);
126+
127+
$this->assertQuery(
128+
"SELECT 'test' AS `test`",
129+
"SELECT CONVERT('test' USING utf8)"
130+
);
131+
132+
// CONVERT(expr USING charset) COLLATE collation → expr COLLATE collation
133+
$this->assertQuery(
134+
"SELECT 'Customer' COLLATE `utf8mb4_bin` AS `CONVERT('Customer' USING utf8mb4) COLLATE utf8mb4_bin`",
135+
"SELECT CONVERT('Customer' USING utf8mb4) COLLATE utf8mb4_bin"
136+
);
137+
}
138+
104139
public function testInsert(): void {
105140
$this->driver->query( 'CREATE TABLE t (c INT, c1 INT, c2 INT)' );
106141
$this->driver->query( 'CREATE TABLE t1 (c1 INT, c2 INT)' );

0 commit comments

Comments
 (0)