Skip to content

Commit 4f63341

Browse files
authored
Merge pull request #25 from BackEndTea/feat/named-back-reference
Support named back references
2 parents 7c27ed1 + c0f267f commit 4f63341

11 files changed

+243
-6
lines changed

composer.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
}
2525
},
2626
"require-dev": {
27-
"phpstan/phpstan": "^0.12.64",
27+
"phpstan/phpstan": "^0.12.70",
2828
"phpunit/phpunit": "^9.5",
29-
"vimeo/psalm": "^4.3",
30-
"infection/infection": "^0.20.2",
29+
"vimeo/psalm": "^4.4",
30+
"infection/infection": "^0.21",
3131
"doctrine/coding-standard": "^8.2"
3232
}
3333
}

phpstan_baseline.neon

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ parameters:
22
ignoreErrors:
33
-
44
message: "#^Parameter \\#1 \\$char of static method BackEndTea\\\\Regexer\\\\Token\\\\Exception\\\\MissingEnd\\:\\:fromDelimiter\\(\\) expects string, string\\|null given\\.$#"
5-
count: 1
5+
count: 2
66
path: src/Lexer/Lexer.php

psalm_baseline.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<MixedArgument occurrences="1">
88
<code>$this-&gt;delimiter</code>
99
</MixedArgument>
10-
<PossiblyNullArgument occurrences="2">
10+
<PossiblyNullArgument occurrences="3">
1111
<code>$this-&gt;delimiter</code>
1212
<code>$input-&gt;next()</code>
1313
</PossiblyNullArgument>

src/Lexer/Lexer.php

+47
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
use BackEndTea\Regexer\Token\Dot;
1212
use BackEndTea\Regexer\Token\Escaped;
1313
use BackEndTea\Regexer\Token\Exception\InvalidDelimiter;
14+
use BackEndTea\Regexer\Token\Exception\InvalidReference;
1415
use BackEndTea\Regexer\Token\Exception\MissingEnd;
1516
use BackEndTea\Regexer\Token\Exception\MissingStart;
1617
use BackEndTea\Regexer\Token\Exception\UnclosedBracketList;
@@ -111,6 +112,20 @@ public function regexToTokenStream(Stream $input): iterable
111112
$token = Token\Anchor\End::create();
112113
break;
113114
case '(':
115+
if ($input->getBetween($input->currentIndex(), $input->currentIndex() + 3) === '(?P=') {
116+
$referenceTo = '';
117+
while ($input->next() !== null) {
118+
$current = $input->current();
119+
$referenceTo .= $current;
120+
if ($current === ')') {
121+
break;
122+
}
123+
}
124+
125+
$token = SubPattern\Reference::forPNotation($referenceTo);
126+
break;
127+
}
128+
114129
++$this->subPatternCount;
115130
$token = SubPattern\Start::create();
116131
$currentIndex = $input->currentIndex();
@@ -375,6 +390,38 @@ private function createEscapeSequence(Stream $input): Token
375390
return Escaped\EscapedCharacter::fromCharacter('-');
376391
}
377392

393+
if ($current === 'k') {
394+
$next = $input->next();
395+
switch ($next) {
396+
case '{':
397+
$closing = '}';
398+
break;
399+
case '\'':
400+
$closing = '\'';
401+
break;
402+
case '<':
403+
$closing = '>';
404+
break;
405+
case null:
406+
throw MissingEnd::fromDelimiter($this->delimiter);
407+
408+
default:
409+
throw InvalidReference::fromKNotation($next);
410+
}
411+
412+
$start = 'k' . $next;
413+
$referenceTo = '';
414+
while ($input->next() !== null) {
415+
$current = $input->current();
416+
$referenceTo .= $current;
417+
if ($current === $closing) {
418+
break;
419+
}
420+
}
421+
422+
return SubPattern\Reference::create($start . $referenceTo);
423+
}
424+
378425
if ($current === 'g' && $input->at($input->currentIndex() + 1) === '{') {
379426
$current = 'g{';
380427
$input->next();

src/Node/SubPattern/Reference.php

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ final class Reference extends Node
1717
'\\' => '',
1818
'\g' => '',
1919
'\g{' => '}',
20+
'\k{' => '}',
21+
'\k<' => '>',
22+
'\k\'' => '\'',
23+
'(?P=' => ')',
2024
];
2125

2226
private string $referenceTo;

src/Parser/TokenParser.php

+5-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,11 @@ private function parseFromToken(Node\NodeWithChildren $parent, array $tokens, in
209209
if ($token instanceof Token\SubPattern\Reference) {
210210
$tokenString = $token->asString();
211211

212-
if ($tokenString[1] !== 'g') {
212+
if ($tokenString[1] === 'k') {
213+
$node = new Node\SubPattern\Reference('\\k' . $tokenString[2], substr($token->asString(), 3, -1));
214+
} elseif ($tokenString[0] === '(') {
215+
$node = new Node\SubPattern\Reference('(?P=', substr($token->asString(), 4, -1));
216+
} elseif ($tokenString[1] !== 'g') {
213217
$node = new Node\SubPattern\Reference('\\', substr($token->asString(), 1));
214218
} elseif ($tokenString[2] === '{') {
215219
$node = new Node\SubPattern\Reference('\\g{', substr($token->asString(), 3, -1));
+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace BackEndTea\Regexer\Token\Exception;
6+
7+
use function sprintf;
8+
9+
final class InvalidReference extends SyntaxException
10+
{
11+
public static function fromKNotation(string $character): self
12+
{
13+
return new self(sprintf(
14+
'Only \',{ or < are allowed after \k, got "%s"',
15+
$character
16+
));
17+
}
18+
}

src/Token/SubPattern/Reference.php

+10
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,20 @@
66

77
use BackEndTea\Regexer\Token;
88

9+
use function sprintf;
10+
911
final class Reference extends Token
1012
{
1113
public static function create(string $characters): self
1214
{
1315
return new self('\\' . $characters);
1416
}
17+
18+
public static function forPNotation(string $name): self
19+
{
20+
return new self(sprintf(
21+
'(%s',
22+
$name
23+
));
24+
}
1525
}

tests/E2E/ParserLexer/NamedSubPatternTest.php

+130
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,135 @@ public function provideTestCases(): Generator
5151
],
5252
new Node\RootNode('/', [new Node\SubPattern([new Node\LiteralCharacters('ab')], true, new Node\SubPattern\Name('?P<', 'foo'))], ''),
5353
];
54+
55+
yield 'Named back reference with \g{ notation' => [
56+
'/(?P<foo>ab)=\g{foo}/',
57+
[
58+
Token\Delimiter::create('/'),
59+
Token\SubPattern\Start::create(),
60+
Token\SubPattern\Named::fromName('?P<foo>'),
61+
Token\LiteralCharacters::create('ab'),
62+
Token\SubPattern\End::create(),
63+
Token\LiteralCharacters::create('='),
64+
Token\SubPattern\Reference::create('g{foo}'),
65+
Token\Delimiter::create('/'),
66+
],
67+
new Node\RootNode('/', [
68+
new Node\SubPattern(
69+
[
70+
new Node\LiteralCharacters('ab'),
71+
],
72+
true,
73+
new Node\SubPattern\Name('?P<', 'foo')
74+
),
75+
new Node\LiteralCharacters('='),
76+
new Node\SubPattern\Reference('\g{', 'foo'),
77+
], ''),
78+
['ab=ab'],
79+
];
80+
81+
yield 'Named back reference with \k{ notation' => [
82+
'/(?P<foo>ab)=\k{foo}/',
83+
[
84+
Token\Delimiter::create('/'),
85+
Token\SubPattern\Start::create(),
86+
Token\SubPattern\Named::fromName('?P<foo>'),
87+
Token\LiteralCharacters::create('ab'),
88+
Token\SubPattern\End::create(),
89+
Token\LiteralCharacters::create('='),
90+
Token\SubPattern\Reference::create('k{foo}'),
91+
Token\Delimiter::create('/'),
92+
],
93+
new Node\RootNode('/', [
94+
new Node\SubPattern(
95+
[
96+
new Node\LiteralCharacters('ab'),
97+
],
98+
true,
99+
new Node\SubPattern\Name('?P<', 'foo')
100+
),
101+
new Node\LiteralCharacters('='),
102+
new Node\SubPattern\Reference('\k{', 'foo'),
103+
], ''),
104+
['ab=ab'],
105+
];
106+
107+
yield 'Named back reference with \k\' notation' => [
108+
'/(?P<foo>ab)=\k\'foo\'/',
109+
[
110+
Token\Delimiter::create('/'),
111+
Token\SubPattern\Start::create(),
112+
Token\SubPattern\Named::fromName('?P<foo>'),
113+
Token\LiteralCharacters::create('ab'),
114+
Token\SubPattern\End::create(),
115+
Token\LiteralCharacters::create('='),
116+
Token\SubPattern\Reference::create('k\'foo\''),
117+
Token\Delimiter::create('/'),
118+
],
119+
new Node\RootNode('/', [
120+
new Node\SubPattern(
121+
[
122+
new Node\LiteralCharacters('ab'),
123+
],
124+
true,
125+
new Node\SubPattern\Name('?P<', 'foo')
126+
),
127+
new Node\LiteralCharacters('='),
128+
new Node\SubPattern\Reference('\k\'', 'foo'),
129+
], ''),
130+
['ab=ab'],
131+
];
132+
133+
yield 'Named back reference with \k< notation' => [
134+
'/(?P<foo>ab)=\k<foo>/',
135+
[
136+
Token\Delimiter::create('/'),
137+
Token\SubPattern\Start::create(),
138+
Token\SubPattern\Named::fromName('?P<foo>'),
139+
Token\LiteralCharacters::create('ab'),
140+
Token\SubPattern\End::create(),
141+
Token\LiteralCharacters::create('='),
142+
Token\SubPattern\Reference::create('k<foo>'),
143+
Token\Delimiter::create('/'),
144+
],
145+
new Node\RootNode('/', [
146+
new Node\SubPattern(
147+
[
148+
new Node\LiteralCharacters('ab'),
149+
],
150+
true,
151+
new Node\SubPattern\Name('?P<', 'foo')
152+
),
153+
new Node\LiteralCharacters('='),
154+
new Node\SubPattern\Reference('\k<', 'foo'),
155+
], ''),
156+
['ab=ab'],
157+
];
158+
159+
yield 'Named back reference with (?P=) notation' => [
160+
'/(?P<foo>ab)=(?P=foo)/',
161+
[
162+
Token\Delimiter::create('/'),
163+
Token\SubPattern\Start::create(),
164+
Token\SubPattern\Named::fromName('?P<foo>'),
165+
Token\LiteralCharacters::create('ab'),
166+
Token\SubPattern\End::create(),
167+
Token\LiteralCharacters::create('='),
168+
Token\SubPattern\Reference::forPNotation('?P=foo)'),
169+
Token\Delimiter::create('/'),
170+
],
171+
new Node\RootNode('/', [
172+
new Node\SubPattern(
173+
[
174+
new Node\LiteralCharacters('ab'),
175+
],
176+
true,
177+
new Node\SubPattern\Name('?P<', 'foo')
178+
),
179+
new Node\LiteralCharacters('='),
180+
new Node\SubPattern\Reference('(?P=', 'foo'),
181+
], ''),
182+
['ab=ab'],
183+
];
54184
}
55185
}

tests/Lexer/LexerFailureTest.php

+17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
use BackEndTea\Regexer\StringStream;
88
use BackEndTea\Regexer\Token\Exception\InvalidDelimiter;
9+
use BackEndTea\Regexer\Token\Exception\InvalidReference;
910
use BackEndTea\Regexer\Token\Exception\InvalidSubPattern;
1011
use BackEndTea\Regexer\Token\Exception\MissingEnd;
1112
use BackEndTea\Regexer\Token\Exception\MissingStart;
@@ -111,4 +112,20 @@ public function testCantEndOnBackslash(): void
111112
$this->expectException(MissingEnd::class);
112113
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/foo\\')));
113114
}
115+
116+
public function testInvalidKReference(): void
117+
{
118+
$lexer = new Lexer();
119+
120+
$this->expectException(InvalidReference::class);
121+
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/\kfoo/')));
122+
}
123+
124+
public function testCantEndOnUnescapedK(): void
125+
{
126+
$lexer = new Lexer();
127+
128+
$this->expectException(MissingEnd::class);
129+
Util::iterableToArray($lexer->regexToTokenStream(new StringStream('/\k')));
130+
}
114131
}

tests/StringStreamTest.php

+7
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,11 @@ public function testCanGetBetween(): void
2828

2929
$this->assertSame('{123}', $stream->getBetween(3, 7));
3030
}
31+
32+
public function testAtWillReturnNullIfOutOfBounds(): void
33+
{
34+
$stream = new StringStream('0123');
35+
36+
$this->assertNull($stream->at(4));
37+
}
3138
}

0 commit comments

Comments
 (0)