Skip to content

Commit e4ad9eb

Browse files
committed
perf: optimize RST parsing with instance and pattern caching
Add caching optimizations for hot paths in RST parsing: - InlineParser: reuse single InlineLexer instance instead of creating new one per parse call (lexer state fully reset via setInput()) - InlineLexer: cache expensive hyperlink pattern built from SUPPORTED_SCHEMAS (5600+ chars) as static variable - LineChecker: add static caches for isDirective(), isLink(), and isAnnotation() regex results with proper cache key handling - Buffer: ensure unindented flag is reset in all mutators (set, pop, clear) for consistent cache invalidation - CachableInlineRule: simplify type annotations Note: Lexer reuse assumes single-threaded parsing. Concurrent parsing would require separate instances. See https://cybottm.github.io/render-guides/ for benchmark data.
1 parent 5820131 commit e4ad9eb

File tree

5 files changed

+72
-9
lines changed

5 files changed

+72
-9
lines changed

packages/guides-restructured-text/src/RestructuredText/Parser/Buffer.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727

2828
final class Buffer
2929
{
30+
/** @var bool Whether unIndent() has already been called */
31+
private bool $unindented = false;
32+
3033
/** @param string[] $lines */
3134
public function __construct(
3235
private array $lines = [],
@@ -56,11 +59,13 @@ public function get(int $key): string
5659

5760
public function push(string $line): void
5861
{
62+
$this->unindented = false;
5963
$this->lines[] = $line;
6064
}
6165

6266
public function set(int $key, string $line): void
6367
{
68+
$this->unindented = false;
6469
$this->lines[$key] = $line;
6570
}
6671

@@ -81,6 +86,8 @@ public function getLinesString(): string
8186

8287
public function pop(): string|null
8388
{
89+
$this->unindented = false;
90+
8491
return array_pop($this->lines);
8592
}
8693

@@ -97,6 +104,7 @@ public function getLastLine(): string|null
97104

98105
public function clear(): void
99106
{
107+
$this->unindented = false;
100108
$this->lines = [];
101109
}
102110

@@ -109,12 +117,18 @@ public function trimLines(): void
109117

110118
private function unIndent(): void
111119
{
120+
if ($this->unindented) {
121+
return;
122+
}
123+
112124
if ($this->unindentStrategy === UnindentStrategy::NONE) {
113125
return;
114126
}
115127

116128
$indentation = $this->detectIndentation();
117129
if ($indentation === 0) {
130+
$this->unindented = true;
131+
118132
return;
119133
}
120134

@@ -125,6 +139,8 @@ private function unIndent(): void
125139

126140
$this->lines[$i] = substr($line, $indentation);
127141
}
142+
143+
$this->unindented = true;
128144
}
129145

130146
private function detectIndentation(): int

packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ final class InlineLexer extends AbstractLexer
5555
public const VARIABLE_DELIMITER = 24;
5656
public const ESCAPED_SIGN = 25;
5757

58+
/** @var string|null Cached hyperlink pattern (built once from SUPPORTED_SCHEMAS) */
59+
private static string|null $hyperlinkPattern = null;
60+
5861
/**
5962
* Map between string position and position in token list.
6063
*
@@ -162,7 +165,12 @@ protected function getType(string &$value)
162165
return self::LITERAL;
163166
}
164167

165-
if (preg_match('/' . ExternalReferenceResolver::SUPPORTED_SCHEMAS . ':[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*[-a-zA-Z0-9()@%_\\+~#&\\/=]/', $value) && parse_url($value, PHP_URL_SCHEME) !== null) {
168+
// Cache the expensive hyperlink pattern (5600+ chars from SUPPORTED_SCHEMAS)
169+
if (self::$hyperlinkPattern === null) {
170+
self::$hyperlinkPattern = '/' . ExternalReferenceResolver::SUPPORTED_SCHEMAS . ':[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*[-a-zA-Z0-9()@%_\\+~#&\\/=]/';
171+
}
172+
173+
if (preg_match(self::$hyperlinkPattern, $value) && parse_url($value, PHP_URL_SCHEME) !== null) {
166174
return self::HYPERLINK;
167175
}
168176

packages/guides-restructured-text/src/RestructuredText/Parser/InlineParser.php

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,18 @@ class InlineParser
2929
/** @var InlineRule[] */
3030
private array $rules;
3131

32-
/** @var array<InlineLexer::*, CachableInlineRule> */
32+
/** @var array<int, CachableInlineRule> */
3333
private array $cache = [];
3434

35+
/**
36+
* Reusable lexer instance to avoid repeated instantiation.
37+
*
38+
* Note: This assumes single-threaded parsing. The lexer state is fully
39+
* reset via setInput() before each parse, but concurrent parsing would
40+
* cause race conditions.
41+
*/
42+
private InlineLexer $lexer;
43+
3544
/** @param iterable<InlineRule> $inlineRules */
3645
public function __construct(iterable $inlineRules)
3746
{
@@ -44,11 +53,13 @@ public function __construct(iterable $inlineRules)
4453

4554
$this->cache[$rule->getToken()] = $rule;
4655
}
56+
57+
$this->lexer = new InlineLexer();
4758
}
4859

4960
public function parse(string $content, BlockContext $blockContext): InlineCompoundNode
5061
{
51-
$lexer = new InlineLexer();
62+
$lexer = $this->lexer;
5263
$lexer->setInput($content);
5364
$lexer->moveNext();
5465
$lexer->moveNext();

packages/guides-restructured-text/src/RestructuredText/Parser/LineChecker.php

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@
2020

2121
final class LineChecker
2222
{
23+
/** @var array<string, bool> Cache for isDirective results */
24+
private static array $directiveCache = [];
25+
26+
/** @var array<string, bool> Cache for isLink results */
27+
private static array $linkCache = [];
28+
29+
/** @var array<string, bool> Cache for isAnnotation results */
30+
private static array $annotationCache = [];
31+
2332
private const HEADER_LETTERS = [
2433
'!',
2534
'"',
@@ -79,16 +88,38 @@ public static function isSpecialLine(string $line, int $minimumLength = 2): stri
7988

8089
public static function isDirective(string $line): bool
8190
{
82-
return preg_match('/^\.\.\s+(\|(.+)\| |)([^\s]+)::( (.*)|)$/mUsi', $line) > 0;
91+
if (isset(self::$directiveCache[$line])) {
92+
return self::$directiveCache[$line];
93+
}
94+
95+
$result = preg_match('/^\.\.\s+(\|(.+)\| |)([^\s]+)::( (.*)|)$/mUsi', $line) > 0;
96+
self::$directiveCache[$line] = $result;
97+
98+
return $result;
8399
}
84100

85101
public static function isLink(string $line): bool
86102
{
87-
return preg_match('/^\.\.\s+_(.+):.*$/mUsi', trim($line)) > 0;
103+
$trimmedLine = trim($line);
104+
if (isset(self::$linkCache[$trimmedLine])) {
105+
return self::$linkCache[$trimmedLine];
106+
}
107+
108+
$result = preg_match('/^\.\.\s+_(.+):.*$/mUsi', $trimmedLine) > 0;
109+
self::$linkCache[$trimmedLine] = $result;
110+
111+
return $result;
88112
}
89113

90114
public static function isAnnotation(string $line): bool
91115
{
92-
return preg_match('/^\.\.\s+\[([#a-zA-Z0-9]*)\]\s(.*)$$/mUsi', $line) > 0;
116+
if (isset(self::$annotationCache[$line])) {
117+
return self::$annotationCache[$line];
118+
}
119+
120+
$result = preg_match('/^\.\.\s+\[([#a-zA-Z0-9]*)\]\s(.*)$$/mUsi', $line) > 0;
121+
self::$annotationCache[$line] = $result;
122+
123+
return $result;
93124
}
94125
}

packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/CachableInlineRule.php

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@
1313

1414
namespace phpDocumentor\Guides\RestructuredText\Parser\Productions\InlineRules;
1515

16-
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
17-
1816
interface CachableInlineRule extends InlineRule
1917
{
20-
/** @return InlineLexer::* */
2118
public function getToken(): int;
2219
}

0 commit comments

Comments
 (0)