|
| 1 | +<?php |
| 2 | + |
| 3 | +use PHPUnit\Framework\TestCase; |
| 4 | + |
| 5 | +/** |
| 6 | + * Tests for the Bison token stream produced by WP_MySQL_Lexer::remaining_tokens(). |
| 7 | + */ |
| 8 | +class WP_MySQL_Lexer_Tests extends TestCase { |
| 9 | + /** |
| 10 | + * Get the Bison terminal names of a tokenized input. |
| 11 | + * |
| 12 | + * @param string $sql The SQL payload to tokenize. |
| 13 | + * @param string[] $sql_modes The SQL modes to activate. |
| 14 | + * @return string[] Terminal names, in input order. |
| 15 | + */ |
| 16 | + private static function token_names( string $sql, array $sql_modes = array() ): array { |
| 17 | + $tokens = ( new WP_MySQL_Lexer( $sql, 80400, $sql_modes ) )->remaining_tokens(); |
| 18 | + $names = array(); |
| 19 | + foreach ( $tokens as $token ) { |
| 20 | + $names[] = $token->get_name(); |
| 21 | + } |
| 22 | + return $names; |
| 23 | + } |
| 24 | + |
| 25 | + public function test_emits_bison_terminals_with_end_markers(): void { |
| 26 | + $this->assertSame( |
| 27 | + array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ), |
| 28 | + self::token_names( 'SELECT id FROM users' ) |
| 29 | + ); |
| 30 | + } |
| 31 | + |
| 32 | + public function test_at_name_splits_into_at_and_ident(): void { |
| 33 | + $tokens = ( new WP_MySQL_Lexer( 'SELECT @var1' ) )->remaining_tokens(); |
| 34 | + $this->assertSame( "'@'", $tokens[1]->get_name() ); |
| 35 | + $this->assertSame( 'IDENT', $tokens[2]->get_name() ); |
| 36 | + $this->assertSame( 'var1', $tokens[2]->get_value() ); |
| 37 | + $this->assertSame( 7, $tokens[1]->start ); |
| 38 | + $this->assertSame( 1, $tokens[1]->length ); |
| 39 | + $this->assertSame( 8, $tokens[2]->start ); |
| 40 | + $this->assertSame( 4, $tokens[2]->length ); |
| 41 | + } |
| 42 | + |
| 43 | + public function test_at_at_splits_into_two_at_signs(): void { |
| 44 | + $this->assertSame( |
| 45 | + array( 'SELECT_SYM', "'@'", "'@'", 'IDENT', 'END_OF_INPUT', '$end' ), |
| 46 | + self::token_names( 'SELECT @@sql_mode' ) |
| 47 | + ); |
| 48 | + } |
| 49 | + |
| 50 | + public function test_bare_at_emits_empty_name(): void { |
| 51 | + // MySQL's lexer emits an empty LEX_HOSTNAME after a bare "@", making |
| 52 | + // "user1@" (an empty host part) and "SELECT @" valid. |
| 53 | + $tokens = ( new WP_MySQL_Lexer( 'SELECT @' ) )->remaining_tokens(); |
| 54 | + $this->assertSame( "'@'", $tokens[1]->get_name() ); |
| 55 | + $this->assertSame( 'IDENT', $tokens[2]->get_name() ); |
| 56 | + $this->assertSame( 0, $tokens[2]->length ); |
| 57 | + $this->assertSame( '', $tokens[2]->get_value() ); |
| 58 | + |
| 59 | + $this->assertSame( |
| 60 | + array( 'CREATE', 'USER', 'IDENT', "'@'", 'IDENT', 'END_OF_INPUT', '$end' ), |
| 61 | + self::token_names( 'CREATE USER user1@' ) |
| 62 | + ); |
| 63 | + } |
| 64 | + |
| 65 | + public function test_bare_at_before_quote_stands_alone(): void { |
| 66 | + // In "@'name'" the quoted text supplies the name itself. |
| 67 | + $this->assertSame( |
| 68 | + array( 'SET_SYM', "'@'", 'TEXT_STRING', 'EQ', 'NUM', 'END_OF_INPUT', '$end' ), |
| 69 | + self::token_names( "SET @'v' = 1" ) |
| 70 | + ); |
| 71 | + } |
| 72 | + |
| 73 | + public function test_with_rollup_is_contracted(): void { |
| 74 | + $names = self::token_names( 'SELECT 1 FROM t GROUP BY a WITH ROLLUP' ); |
| 75 | + $this->assertContains( 'WITH_ROLLUP_SYM', $names ); |
| 76 | + $this->assertNotContains( 'WITH', $names ); |
| 77 | + } |
| 78 | + |
| 79 | + public function test_with_rollup_contracts_across_comments(): void { |
| 80 | + $tokens = ( new WP_MySQL_Lexer( 'SELECT 1 FROM t GROUP BY a WITH /* c */ ROLLUP' ) )->remaining_tokens(); |
| 81 | + $rollup = null; |
| 82 | + foreach ( $tokens as $token ) { |
| 83 | + if ( 'WITH_ROLLUP_SYM' === $token->get_name() ) { |
| 84 | + $rollup = $token; |
| 85 | + } |
| 86 | + } |
| 87 | + $this->assertNotNull( $rollup ); |
| 88 | + $this->assertSame( 'WITH /* c */ ROLLUP', $rollup->get_bytes() ); |
| 89 | + } |
| 90 | + |
| 91 | + public function test_lone_with_is_emitted(): void { |
| 92 | + $this->assertSame( |
| 93 | + array( 'WITH', 'IDENT', 'AS', "'('", 'SELECT_SYM', 'NUM', "')'", 'SELECT_SYM', "'*'", 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ), |
| 94 | + self::token_names( 'WITH c AS (SELECT 1) SELECT * FROM c' ) |
| 95 | + ); |
| 96 | + |
| 97 | + // A statement ending on WITH still emits it before the end markers. |
| 98 | + $this->assertSame( |
| 99 | + array( 'SELECT_SYM', 'NUM', 'WITH', 'END_OF_INPUT', '$end' ), |
| 100 | + self::token_names( 'SELECT 1 WITH' ) |
| 101 | + ); |
| 102 | + } |
| 103 | + |
| 104 | + public function test_invalid_input_returns_partial_stream_without_end_markers(): void { |
| 105 | + $names = self::token_names( "SELECT 1 WITH \x01" ); |
| 106 | + $this->assertSame( array( 'SELECT_SYM', 'NUM', 'WITH' ), $names ); |
| 107 | + } |
| 108 | + |
| 109 | + public function test_high_not_precedence_emits_not2(): void { |
| 110 | + $names = self::token_names( 'SELECT NOT 1', array( 'HIGH_NOT_PRECEDENCE' ) ); |
| 111 | + $this->assertContains( 'NOT2_SYM', $names ); |
| 112 | + |
| 113 | + $names = self::token_names( 'SELECT NOT 1' ); |
| 114 | + $this->assertContains( 'NOT_SYM', $names ); |
| 115 | + } |
| 116 | + |
| 117 | + public function test_end_of_input_word_is_an_identifier(): void { |
| 118 | + // "end_of_input" is not a MySQL keyword; it must not truncate the stream. |
| 119 | + $this->assertSame( |
| 120 | + array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ), |
| 121 | + self::token_names( 'SELECT end_of_input FROM t' ) |
| 122 | + ); |
| 123 | + } |
| 124 | + |
| 125 | + public function test_current_date_is_a_keyword_without_parentheses(): void { |
| 126 | + // CURRENT_DATE/CURRENT_TIME are plain reserved keywords in MySQL 8.4 |
| 127 | + // (lex.h SYM), unlike CURDATE/CURTIME which require parentheses. |
| 128 | + $this->assertSame( |
| 129 | + array( 'SELECT_SYM', 'CURDATE', 'END_OF_INPUT', '$end' ), |
| 130 | + self::token_names( 'SELECT CURRENT_DATE' ) |
| 131 | + ); |
| 132 | + $this->assertSame( |
| 133 | + array( 'SELECT_SYM', 'IDENT', 'END_OF_INPUT', '$end' ), |
| 134 | + self::token_names( 'SELECT curdate' ) |
| 135 | + ); |
| 136 | + } |
| 137 | + |
| 138 | + public function test_json_aggregates_are_keywords_only_before_parenthesis(): void { |
| 139 | + $this->assertSame( |
| 140 | + array( 'SELECT_SYM', 'IDENT', 'FROM', 'IDENT', 'END_OF_INPUT', '$end' ), |
| 141 | + self::token_names( 'SELECT json_objectagg FROM t' ) |
| 142 | + ); |
| 143 | + $names = self::token_names( 'SELECT JSON_OBJECTAGG(a, b) FROM t' ); |
| 144 | + $this->assertContains( 'JSON_OBJECTAGG', $names ); |
| 145 | + } |
| 146 | + |
| 147 | + public function test_number_tokens_follow_mysql_magnitude_classes(): void { |
| 148 | + $this->assertSame( array( 'SELECT_SYM', 'NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 2147483647' ) ); |
| 149 | + $this->assertSame( array( 'SELECT_SYM', 'LONG_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 2147483648' ) ); |
| 150 | + $this->assertSame( array( 'SELECT_SYM', 'ULONGLONG_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 18446744073709551615' ) ); |
| 151 | + $this->assertSame( array( 'SELECT_SYM', 'DECIMAL_NUM', 'END_OF_INPUT', '$end' ), self::token_names( 'SELECT 18446744073709551616' ) ); |
| 152 | + } |
| 153 | +} |
0 commit comments