|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Benchmark the MySQL parser over the MySQL server test corpus. |
| 4 | + * |
| 5 | + * Reports the corpus parse rate and end-to-end (lex + parse) throughput. |
| 6 | + * |
| 7 | + * Methodology: a few warmup passes (discarded — they heat opcache, the tracing |
| 8 | + * JIT, and the CPU caches) followed by N timed passes over the whole corpus. The |
| 9 | + * headline is the BEST pass: parsing is deterministic and CPU-bound, so outside |
| 10 | + * interference only ever makes a pass slower, which makes the fastest pass the |
| 11 | + * most reproducible estimate. A single cold pass badly under-reports the tracing |
| 12 | + * JIT (it pays compilation inside the timed run), so warmup is on by default. |
| 13 | + * |
| 14 | + * Options: |
| 15 | + * --json Machine-readable output. |
| 16 | + * --limit=N Only benchmark the first N queries. |
| 17 | + * --iterations=N Number of timed passes (default 5). |
| 18 | + * --warmup=N Number of discarded warmup passes (default 2). |
| 19 | + * --corpus=PATH Path to the queries CSV (default: the mysql-on-sqlite corpus). |
| 20 | + * --inline-units Parse with unit-production inlining (the collapsed AST). |
| 21 | + */ |
| 22 | + |
| 23 | +set_error_handler( |
| 24 | + function ( $severity, $message, $file, $line ) { |
| 25 | + throw new ErrorException( $message, 0, $severity, $file, $line ); |
| 26 | + } |
| 27 | +); |
| 28 | + |
| 29 | +$json = in_array( '--json', $argv, true ); |
| 30 | +$inline_units = in_array( '--inline-units', $argv, true ); |
| 31 | +$limit = null; |
| 32 | +$iterations = 5; |
| 33 | +$warmup = 2; |
| 34 | +$corpus = __DIR__ . '/../../mysql-on-sqlite/tests/mysql/data/mysql-server-tests-queries.csv'; |
| 35 | +foreach ( $argv as $arg ) { |
| 36 | + if ( 0 === strpos( $arg, '--limit=' ) ) { |
| 37 | + $limit = max( 1, (int) substr( $arg, strlen( '--limit=' ) ) ); |
| 38 | + } elseif ( 0 === strpos( $arg, '--iterations=' ) ) { |
| 39 | + $iterations = max( 1, (int) substr( $arg, strlen( '--iterations=' ) ) ); |
| 40 | + } elseif ( 0 === strpos( $arg, '--warmup=' ) ) { |
| 41 | + $warmup = max( 0, (int) substr( $arg, strlen( '--warmup=' ) ) ); |
| 42 | + } elseif ( 0 === strpos( $arg, '--corpus=' ) ) { |
| 43 | + $corpus = substr( $arg, strlen( '--corpus=' ) ); |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +require_once __DIR__ . '/../src/load.php'; |
| 48 | +$parser = new WP_MySQL_Parser( require __DIR__ . '/../src/grammar/parse-table.php', $inline_units ); |
| 49 | + |
| 50 | +// Load the corpus before timing so file IO is excluded. |
| 51 | +if ( ! is_readable( $corpus ) ) { |
| 52 | + fwrite( STDERR, "error: corpus not found at $corpus (pass --corpus=PATH).\n" ); |
| 53 | + exit( 1 ); |
| 54 | +} |
| 55 | +$handle = fopen( $corpus, 'r' ); |
| 56 | +$queries = array(); |
| 57 | +while ( ( $record = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) { |
| 58 | + $query = $record[0] ?? null; |
| 59 | + if ( null === $query || '' === $query ) { |
| 60 | + continue; |
| 61 | + } |
| 62 | + $queries[] = $query; |
| 63 | + if ( null !== $limit && count( $queries ) >= $limit ) { |
| 64 | + break; |
| 65 | + } |
| 66 | +} |
| 67 | +fclose( $handle ); |
| 68 | +$query_count = count( $queries ); |
| 69 | + |
| 70 | +// One end-to-end pass over the corpus (lex + parse), recording failures and |
| 71 | +// exceptions (deterministic across passes, so the last pass's counts are kept). |
| 72 | +$failures = 0; |
| 73 | +$exceptions = 0; |
| 74 | +$parse_corpus = function () use ( $queries, $parser, &$failures, &$exceptions ) { |
| 75 | + $failures = 0; |
| 76 | + $exceptions = 0; |
| 77 | + foreach ( $queries as $query ) { |
| 78 | + try { |
| 79 | + $tokens = ( new WP_MySQL_Lexer( $query ) )->remaining_tokens(); |
| 80 | + if ( null === $parser->parse( $tokens ) ) { |
| 81 | + ++$failures; |
| 82 | + } |
| 83 | + } catch ( Throwable $e ) { |
| 84 | + ++$exceptions; |
| 85 | + } |
| 86 | + } |
| 87 | +}; |
| 88 | + |
| 89 | +for ( $i = 0; $i < $warmup; $i++ ) { |
| 90 | + $parse_corpus(); |
| 91 | +} |
| 92 | + |
| 93 | +$samples = array(); |
| 94 | +for ( $i = 0; $i < $iterations; $i++ ) { |
| 95 | + $start = microtime( true ); |
| 96 | + $parse_corpus(); |
| 97 | + $samples[] = $query_count / ( microtime( true ) - $start ); |
| 98 | +} |
| 99 | +sort( $samples ); |
| 100 | + |
| 101 | +$best = $samples[ count( $samples ) - 1 ]; |
| 102 | +$worst = $samples[0]; |
| 103 | +$mean = array_sum( $samples ) / count( $samples ); |
| 104 | +$mid = intdiv( count( $samples ), 2 ); |
| 105 | +$median = 0 === count( $samples ) % 2 |
| 106 | + ? ( $samples[ $mid - 1 ] + $samples[ $mid ] ) / 2 |
| 107 | + : $samples[ $mid ]; |
| 108 | +$spread = $best > 0 ? ( $best - $worst ) / $best : 0.0; |
| 109 | + |
| 110 | +$opcache_status = function_exists( 'opcache_get_status' ) ? opcache_get_status( false ) : false; |
| 111 | +$opcache_on = is_array( $opcache_status ); |
| 112 | +$jit_on = $opcache_on && ! empty( $opcache_status['jit']['on'] ); |
| 113 | + |
| 114 | +if ( $json ) { |
| 115 | + echo json_encode( |
| 116 | + array( |
| 117 | + 'benchmark' => 'mysql-parser', |
| 118 | + 'inline_units' => $inline_units, |
| 119 | + 'opcache' => $opcache_on, |
| 120 | + 'jit' => $jit_on, |
| 121 | + 'queries' => $query_count, |
| 122 | + 'warmup' => $warmup, |
| 123 | + 'iterations' => $iterations, |
| 124 | + 'qps' => $best, |
| 125 | + 'qps_best' => $best, |
| 126 | + 'qps_median' => $median, |
| 127 | + 'qps_mean' => $mean, |
| 128 | + 'qps_worst' => $worst, |
| 129 | + 'spread' => $spread, |
| 130 | + 'failures' => $failures, |
| 131 | + 'exceptions' => $exceptions, |
| 132 | + 'php_version' => PHP_VERSION, |
| 133 | + ), |
| 134 | + JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES |
| 135 | + ), "\n"; |
| 136 | + exit; |
| 137 | +} |
| 138 | + |
| 139 | +$config = $jit_on ? 'opcache + tracing JIT' : ( $opcache_on ? 'opcache, no JIT' : 'no opcache' ); |
| 140 | +printf( "MySQL parser (official 8.4 grammar%s) — %s\n", $inline_units ? ', unit inlining' : '', $config ); |
| 141 | +$jit_requested = ! in_array( strtolower( (string) ini_get( 'opcache.jit' ) ), array( '', '0', 'off', 'disable' ), true ); |
| 142 | +if ( $jit_requested && ! $jit_on ) { |
| 143 | + printf( " warning: opcache.jit is set but the JIT is NOT active here — check that opcache is enabled and jit_buffer_size > 0.\n" ); |
| 144 | +} |
| 145 | +printf( "%s queries, %d warmup + %d timed passes (end-to-end lex+parse)\n", number_format( $query_count ), $warmup, $iterations ); |
| 146 | +printf( " best: %s QPS\n", number_format( $best ) ); |
| 147 | +printf( " median: %s QPS\n", number_format( $median ) ); |
| 148 | +printf( " spread: %.1f%% (best vs worst)\n", $spread * 100 ); |
| 149 | +printf( |
| 150 | + " failures: %d (%.2f%%) | exceptions: %d\n", |
| 151 | + $failures, |
| 152 | + $query_count > 0 ? $failures / $query_count * 100 : 0.0, |
| 153 | + $exceptions |
| 154 | +); |
| 155 | +if ( $spread > 0.10 ) { |
| 156 | + printf( " note: >10%% spread — the machine is noisy; close other apps for a steadier number.\n" ); |
| 157 | +} |
0 commit comments