Skip to content

Commit 70d642d

Browse files
committed
Add the test suite and a CI job
Cover the new logic with PHPUnit tests: the token stream (@ splitting, empty host names, WITH ROLLUP contraction, NOT2, end markers, partial streams, keyword gating), the parser runtime (AST root, unit-production inlining, error handling on invalid and partial input), token value and name resolution, generated grammar-data invariants, and a corpus regression test pinning the exact acceptance tally (69,491 of 69,577). Run the suite on the oldest and newest supported PHP versions in CI.
1 parent d89b277 commit 70d642d

10 files changed

Lines changed: 588 additions & 2 deletions
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: MySQL Parser Tests
2+
3+
on:
4+
push:
5+
branches:
6+
- trunk
7+
paths:
8+
- '.github/workflows/mysql-parser-tests.yml'
9+
- 'packages/mysql-parser/**'
10+
pull_request:
11+
paths:
12+
- '.github/workflows/mysql-parser-tests.yml'
13+
- 'packages/mysql-parser/**'
14+
workflow_dispatch:
15+
16+
concurrency:
17+
group: ${{ github.workflow }}-${{ github.ref }}
18+
cancel-in-progress: true
19+
20+
# Disable permissions for all available scopes by default.
21+
# Any needed permissions should be configured at the job level.
22+
permissions: {}
23+
24+
jobs:
25+
test:
26+
# The runtime supports PHP 7.2+; test the oldest and the latest.
27+
name: PHP ${{ matrix.php }}
28+
runs-on: ubuntu-latest
29+
timeout-minutes: 10
30+
permissions:
31+
contents: read # Required to clone the repo.
32+
strategy:
33+
fail-fast: false
34+
matrix:
35+
php: [ '7.2', '8.5' ]
36+
37+
steps:
38+
- name: Checkout repository
39+
uses: actions/checkout@v4
40+
41+
- name: Set up PHP
42+
uses: shivammathur/setup-php@v2
43+
with:
44+
php-version: ${{ matrix.php }}
45+
coverage: none
46+
47+
- name: Install dependencies
48+
working-directory: packages/mysql-parser
49+
run: composer install --no-interaction --no-progress
50+
51+
- name: Run tests
52+
working-directory: packages/mysql-parser
53+
run: composer run test

packages/mysql-parser/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,14 @@ runs Bison in Docker, and rewrites `src/grammar/parse-table.php` and
107107
byte for byte. Both artifacts are plain PHP arrays. The fetched sources and the (large) automaton dump land in
108108
`build/`, which is gitignored.
109109

110-
## Benchmark
110+
## Tests and benchmark
111111

112112
```bash
113+
composer run test # PHPUnit suite (includes a corpus regression test)
113114
composer run benchmark # corpus throughput, without and with the tracing JIT
114115
```
115116

116-
The benchmark runs a ~69.5k-query corpus of MySQL server test queries from
117+
The corpus tests run a ~69.5k-query corpus of MySQL server test queries from
117118
the monorepo's shared test data. The parser accepts **99.88%** of it; the 0.12%
118119
it rejects is syntax removed in MySQL 8.4 (e.g. `RESET MASTER`),
119120
multi-statement input, statements needing non-default session SQL modes, and a

packages/mysql-parser/composer.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
"require": {
77
"php": ">=7.2"
88
},
9+
"require-dev": {
10+
"phpunit/phpunit": "^8.5"
11+
},
912
"scripts": {
13+
"test": "phpunit",
1014
"build-grammar": [
1115
"./bin/build-grammar"
1216
],
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<phpunit
2+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:noNamespaceSchemaLocation="http://schema.phpunit.de/9.2/phpunit.xsd"
4+
bootstrap="tests/bootstrap.php"
5+
backupGlobals="false"
6+
colors="true"
7+
beStrictAboutTestsThatDoNotTestAnything="true"
8+
beStrictAboutOutputDuringTests="true"
9+
convertErrorsToExceptions="true"
10+
convertWarningsToExceptions="true"
11+
convertNoticesToExceptions="true"
12+
convertDeprecationsToExceptions="true"
13+
>
14+
<php>
15+
<ini name="memory_limit" value="512M"/>
16+
</php>
17+
<testsuites>
18+
<!-- Default test suite to run all tests. -->
19+
<testsuite name="default">
20+
<directory suffix="_Tests.php">tests/</directory>
21+
</testsuite>
22+
</testsuites>
23+
</phpunit>
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
<?php
2+
3+
use PHPUnit\Framework\TestCase;
4+
5+
/**
6+
* Tests for the Bison token stream produced by WP_MySQL_Lexer::remaining_tokens().
7+
*/
8+
class WP_MySQL_Lexer_Tests extends TestCase {
9+
/**
10+
* Get the Bison terminal names of a tokenized input.
11+
*
12+
* @param string $sql The SQL payload to tokenize.
13+
* @param string[] $sql_modes The SQL modes to activate.
14+
* @return string[] Terminal names, in input order.
15+
*/
16+
private static function token_names( string $sql, array $sql_modes = array() ): array {
17+
$tokens = ( new WP_MySQL_Lexer( $sql, 80400, $sql_modes ) )->remaining_tokens();
18+
$names = array();
19+
foreach ( $tokens as $token ) {
20+
$names[] = $token->get_name();
21+
}
22+
return $names;
23+
}
24+
25+
public function test_emits_bison_terminals_with_end_markers(): void {
26+
$this->assertSame(
27+
array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ),
28+
self::token_names( 'SELECT id FROM users' )
29+
);
30+
}
31+
32+
public function test_at_name_splits_into_at_and_ident(): void {
33+
$tokens = ( new WP_MySQL_Lexer( 'SELECT @var1' ) )->remaining_tokens();
34+
$this->assertSame( "'@'", $tokens[1]->get_name() );
35+
$this->assertSame( 'IDENT', $tokens[2]->get_name() );
36+
$this->assertSame( 'var1', $tokens[2]->get_value() );
37+
$this->assertSame( 7, $tokens[1]->start );
38+
$this->assertSame( 1, $tokens[1]->length );
39+
$this->assertSame( 8, $tokens[2]->start );
40+
$this->assertSame( 4, $tokens[2]->length );
41+
}
42+
43+
public function test_at_at_splits_into_two_at_signs(): void {
44+
$this->assertSame(
45+
array( 'SELECT_SYM', "'@'", "'@'", 'IDENT', 'END_OF_INPUT', '$end' ),
46+
self::token_names( 'SELECT @@sql_mode' )
47+
);
48+
}
49+
50+
public function test_bare_at_emits_empty_name(): void {
51+
// MySQL's lexer emits an empty LEX_HOSTNAME after a bare "@", making
52+
// "user1@" (an empty host part) and "SELECT @" valid.
53+
$tokens = ( new WP_MySQL_Lexer( 'SELECT @' ) )->remaining_tokens();
54+
$this->assertSame( "'@'", $tokens[1]->get_name() );
55+
$this->assertSame( 'IDENT', $tokens[2]->get_name() );
56+
$this->assertSame( 0, $tokens[2]->length );
57+
$this->assertSame( '', $tokens[2]->get_value() );
58+
59+
$this->assertSame(
60+
array( 'CREATE', 'USER', 'IDENT', "'@'", 'IDENT', 'END_OF_INPUT', '$end' ),
61+
self::token_names( 'CREATE USER user1@' )
62+
);
63+
}
64+
65+
public function test_bare_at_before_quote_stands_alone(): void {
66+
// In "@'name'" the quoted text supplies the name itself.
67+
$this->assertSame(
68+
array( 'SET_SYM', "'@'", 'TEXT_STRING', 'EQ', 'NUM', 'END_OF_INPUT', '$end' ),
69+
self::token_names( "SET @'v' = 1" )
70+
);
71+
}
72+
73+
public function test_with_rollup_is_contracted(): void {
74+
$names = self::token_names( 'SELECT 1 FROM t GROUP BY a WITH ROLLUP' );
75+
$this->assertContains( 'WITH_ROLLUP_SYM', $names );
76+
$this->assertNotContains( 'WITH', $names );
77+
}
78+
79+
public function test_with_rollup_contracts_across_comments(): void {
80+
$tokens = ( new WP_MySQL_Lexer( 'SELECT 1 FROM t GROUP BY a WITH /* c */ ROLLUP' ) )->remaining_tokens();
81+
$rollup = null;
82+
foreach ( $tokens as $token ) {
83+
if ( 'WITH_ROLLUP_SYM' === $token->get_name() ) {
84+
$rollup = $token;
85+
}
86+
}
87+
$this->assertNotNull( $rollup );
88+
$this->assertSame( 'WITH /* c */ ROLLUP', $rollup->get_bytes() );
89+
}
90+
91+
public function test_lone_with_is_emitted(): void {
92+
$this->assertSame(
93+
array( 'WITH', 'IDENT', 'AS', "'('", 'SELECT_SYM', 'NUM', "')'", 'SELECT_SYM', "'*'", 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ),
94+
self::token_names( 'WITH c AS (SELECT 1) SELECT * FROM c' )
95+
);
96+
97+
// A statement ending on WITH still emits it before the end markers.
98+
$this->assertSame(
99+
array( 'SELECT_SYM', 'NUM', 'WITH', 'END_OF_INPUT', '$end' ),
100+
self::token_names( 'SELECT 1 WITH' )
101+
);
102+
}
103+
104+
public function test_invalid_input_returns_partial_stream_without_end_markers(): void {
105+
$names = self::token_names( "SELECT 1 WITH \x01" );
106+
$this->assertSame( array( 'SELECT_SYM', 'NUM', 'WITH' ), $names );
107+
}
108+
109+
public function test_high_not_precedence_emits_not2(): void {
110+
$names = self::token_names( 'SELECT NOT 1', array( 'HIGH_NOT_PRECEDENCE' ) );
111+
$this->assertContains( 'NOT2_SYM', $names );
112+
113+
$names = self::token_names( 'SELECT NOT 1' );
114+
$this->assertContains( 'NOT_SYM', $names );
115+
}
116+
117+
public function test_end_of_input_word_is_an_identifier(): void {
118+
// "end_of_input" is not a MySQL keyword; it must not truncate the stream.
119+
$this->assertSame(
120+
array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ),
121+
self::token_names( 'SELECT end_of_input FROM t' )
122+
);
123+
}
124+
125+
public function test_current_date_is_a_keyword_without_parentheses(): void {
126+
// CURRENT_DATE/CURRENT_TIME are plain reserved keywords in MySQL 8.4
127+
// (lex.h SYM), unlike CURDATE/CURTIME which require parentheses.
128+
$this->assertSame(
129+
array( 'SELECT_SYM', 'CURDATE', 'END_OF_INPUT', '$end' ),
130+
self::token_names( 'SELECT CURRENT_DATE' )
131+
);
132+
$this->assertSame(
133+
array( 'SELECT_SYM', 'IDENT', 'END_OF_INPUT', '$end' ),
134+
self::token_names( 'SELECT curdate' )
135+
);
136+
}
137+
138+
public function test_json_aggregates_are_keywords_only_before_parenthesis(): void {
139+
$this->assertSame(
140+
array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ),
141+
self::token_names( 'SELECT json_objectagg FROM t' )
142+
);
143+
$names = self::token_names( 'SELECT JSON_OBJECTAGG(a, b) FROM t' );
144+
$this->assertContains( 'JSON_OBJECTAGG', $names );
145+
}
146+
147+
public function test_number_tokens_follow_mysql_magnitude_classes(): void {
148+
$this->assertSame( array( 'SELECT_SYM', 'NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 2147483647' ) );
149+
$this->assertSame( array( 'SELECT_SYM', 'LONG_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 2147483648' ) );
150+
$this->assertSame( array( 'SELECT_SYM', 'ULONGLONG_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 18446744073709551615' ) );
151+
$this->assertSame( array( 'SELECT_SYM', 'DECIMAL_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 18446744073709551616' ) );
152+
}
153+
}

0 commit comments

Comments
 (0)