Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/Exception/InvalidTagContentException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

/*
* This file is part of the Behat Gherkin Parser.
* (c) Konstantin Kudryashov <ever.zet@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Behat\Gherkin\Exception;

class InvalidTagContentException extends ParserException
{
public function __construct(string $tag, ?string $file)
{
parent::__construct(
sprintf(
'Tags cannot include whitespace, found "%s"%s',
$tag,
is_string($file)
? "in file {$file}"
: ''
),
);
}
}
10 changes: 8 additions & 2 deletions src/Filter/TagFilter.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,18 @@ protected function isTagsMatchCondition(array $tags)
return true;
}

// If the file was parsed in legacy mode, the `@` prefix will have been removed from the individual tags on the
// parsed node. The tags in the filter expression still have their @ so we add the prefix back here if required.
// This can be removed once legacy parsing mode is removed.
$tags = array_map(
static fn (string $tag) => str_starts_with($tag, '@') ? $tag : '@' . $tag,
$tags
);

foreach (explode('&&', $this->filterString) as $andTags) {
$satisfiesComma = false;

foreach (explode(',', $andTags) as $tag) {
$tag = str_replace('@', '', trim($tag));

Comment on lines -124 to -125
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We no longer need to remove the @ from the tag filter expression, because the tags we're comparing to will now always have their leading @.

if ($tag[0] === '~') {
$tag = mb_substr($tag, 1, mb_strlen($tag, 'utf8') - 1, 'utf8');
$satisfiesComma = !in_array($tag, $tags, true) || $satisfiesComma;
Expand Down
24 changes: 24 additions & 0 deletions src/GherkinCompatibilityMode.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,28 @@ public function allowWhitespaceInLanguageTag(): bool
default => true,
};
}

/**
* @internal
*/
public function shouldRemoveTagPrefixChar(): bool
{
// Note: When this is removed we can also remove the code in TagFilter that handles tags with no leading @
return match ($this) {
self::LEGACY => true,
default => false,
};
}

/**
* @internal
*/
public function shouldThrowOnWhitespaceInTag(): bool
{
return match ($this) {
// Note, although we don't throw we have triggered an E_USER_DEPRECATED in Parser::guardTags since v4.9.0
self::LEGACY => false,
default => true,
};
}
}
20 changes: 17 additions & 3 deletions src/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -825,9 +825,23 @@ protected function scanTags()
}

$token = $this->takeToken('Tag');
$tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
$tags = array_map(trim(...), $tags);
$token['tags'] = $tags;

if ($this->compatibilityMode->shouldRemoveTagPrefixChar()) {
// Legacy behaviour
$tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
$tags = array_map(trim(...), $tags);
$token['tags'] = $tags;

return $token;
}

$tags = preg_split('/(?=@)/u', $line);
assert($tags !== false);
// Remove the empty content before the first tag prefix
array_shift($tags);

// Note: checking for whitespace in tags is done in the Parser to fit with existing logic
$token['tags'] = array_map(trim(...), $tags);
Comment on lines +838 to +844
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This broadly follows the logic in cucumber/gherkin except I have used a lookahead regex to save having to add the @ back onto the split parts. It also omits checking for whitespace here as it seemed more logical to put that alongside the existing deprecation in the Parser, at least until we remove that.


return $token;
}
Expand Down
7 changes: 6 additions & 1 deletion src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespace Behat\Gherkin;

use Behat\Gherkin\Exception\FilesystemException;
use Behat\Gherkin\Exception\InvalidTagContentException;
use Behat\Gherkin\Exception\LexerException;
use Behat\Gherkin\Exception\NodeException;
use Behat\Gherkin\Exception\ParserException;
Expand Down Expand Up @@ -584,8 +585,12 @@ protected function guardTags(array $tags)
{
foreach ($tags as $tag) {
if (preg_match('/\s/', $tag)) {
if ($this->compatibilityMode->shouldThrowOnWhitespaceInTag()) {
throw new InvalidTagContentException($tag, $this->file);
}

trigger_error(
sprintf('Whitespace in tags is deprecated, found "%s"', $tag),
sprintf('Whitespace in tags is deprecated, found "%s" in %s', $tag, $this->file ?? 'unknown file'),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed the deprecation message did not include the name of the file (if available) - I've added that to help users trace any of these before enabling the new mode.

E_USER_DEPRECATED
);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/Cucumber/CompatibilityTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ class CompatibilityTest extends TestCase
'whitespace_in_tags.feature' => '/Whitespace in tags is deprecated/',
],
'gherkin-32' => [
'whitespace_in_tags.feature' => '/Whitespace in tags is deprecated/',
],
];

Expand Down Expand Up @@ -142,6 +141,7 @@ public function testFeaturesParseTheSameAsCucumber(GherkinCompatibilityMode $mod
self::$featureNodeComparator->setGherkinCompatibilityMode($mode);
self::$stepNodeComparator->setGherkinCompatibilityMode($mode);
$this->parser->setGherkinCompatibilityMode($mode);
$this->ndJsonAstParser->setGherkinCompatibilityMode($mode);

$gherkinFile = $file->getPathname();
$actual = $this->parser->parseFile($gherkinFile);
Expand Down
62 changes: 38 additions & 24 deletions tests/Cucumber/NDJsonAstParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespace Tests\Behat\Gherkin\Cucumber;

use Behat\Gherkin\Exception\NodeException;
use Behat\Gherkin\GherkinCompatibilityMode;
use Behat\Gherkin\Node\ArgumentInterface;
use Behat\Gherkin\Node\BackgroundNode;
use Behat\Gherkin\Node\ExampleTableNode;
Expand Down Expand Up @@ -49,6 +50,13 @@
*/
class NDJsonAstParser
{
private GherkinCompatibilityMode $compatibilityMode = GherkinCompatibilityMode::LEGACY;

public function setGherkinCompatibilityMode(GherkinCompatibilityMode $mode): void
{
$this->compatibilityMode = $mode;
}

/**
* @return list<FeatureNode>
*/
Expand All @@ -57,10 +65,10 @@ public function load(string $resource): array
return array_values(
array_filter(
array_map(
static function ($line) use ($resource) {
function ($line) use ($resource) {
// As we load data from the official Cucumber project, we assume the data matches the JSON schema.
// @phpstan-ignore argument.type
return self::getFeature(json_decode($line, true, 512, \JSON_THROW_ON_ERROR), $resource);
return $this->getFeature(json_decode($line, true, 512, \JSON_THROW_ON_ERROR), $resource);
},
file($resource)
?: throw new RuntimeException("Could not load Cucumber json file: $resource."),
Expand All @@ -72,7 +80,7 @@ static function ($line) use ($resource) {
/**
* @phpstan-param TEnvelope $json
*/
private static function getFeature(array $json, string $filePath): ?FeatureNode
private function getFeature(array $json, string $filePath): ?FeatureNode
{
if (!isset($json['gherkinDocument']['feature'])) {
return null;
Expand All @@ -83,9 +91,9 @@ private static function getFeature(array $json, string $filePath): ?FeatureNode
return new FeatureNode(
$featureJson['name'],
$featureJson['description'],
self::getTags($featureJson),
self::getBackground($featureJson),
self::getScenarios($featureJson),
$this->getTags($featureJson),
$this->getBackground($featureJson),
$this->getScenarios($featureJson),
$featureJson['keyword'],
$featureJson['language'],
preg_replace('/(?<=\\.feature).*$/', '', $filePath),
Expand All @@ -98,10 +106,16 @@ private static function getFeature(array $json, string $filePath): ?FeatureNode
*
* @return list<string>
*/
private static function getTags(array $json): array
private function getTags(array $json): array
{
return array_map(
static fn (array $tag) => preg_replace('/^@/', '', $tag['name']) ?? $tag['name'],
fn (array $tag) => match ($this->compatibilityMode->shouldRemoveTagPrefixChar()) {
// The cucumber/gherkin testdata contains the @ prefix. We need to remove that to match our legacy
// parser. It's not ideal to modify the expected parser result here, but the custom comparator approach
// we have used for other parity variations is tricky because of the variety taggable nodes.
true => preg_replace('/^@/', '', $tag['name']) ?? $tag['name'],
false => $tag['name'],
},
$json['tags']
);
}
Expand All @@ -111,18 +125,18 @@ private static function getTags(array $json): array
*
* @return list<ScenarioInterface>
*/
private static function getScenarios(array $json): array
private function getScenarios(array $json): array
{
return array_values(
array_map(
static function ($child) {
$tables = self::getTables($child['scenario']['examples']);
function ($child) {
$tables = $this->getTables($child['scenario']['examples']);

if ($tables) {
return new OutlineNode(
$child['scenario']['name'],
self::getTags($child['scenario']),
self::getSteps($child['scenario']['steps']),
$this->getTags($child['scenario']),
$this->getSteps($child['scenario']['steps']),
$tables,
$child['scenario']['keyword'],
$child['scenario']['location']['line'],
Expand All @@ -132,8 +146,8 @@ static function ($child) {

return new ScenarioNode(
$child['scenario']['name'],
self::getTags($child['scenario']),
self::getSteps($child['scenario']['steps']),
$this->getTags($child['scenario']),
$this->getSteps($child['scenario']['steps']),
$child['scenario']['keyword'],
$child['scenario']['location']['line'],
$child['scenario']['description']
Expand All @@ -152,7 +166,7 @@ static function ($child) {
/**
* @phpstan-param TFeature $json
*/
private static function getBackground(array $json): ?BackgroundNode
private function getBackground(array $json): ?BackgroundNode
{
$backgrounds = array_filter(
$json['children'],
Expand All @@ -167,7 +181,7 @@ private static function getBackground(array $json): ?BackgroundNode

return new BackgroundNode(
$background['background']['name'],
self::getSteps($background['background']['steps']),
$this->getSteps($background['background']['steps']),
$background['background']['keyword'],
$background['background']['location']['line'],
$background['background']['description'],
Expand All @@ -179,13 +193,13 @@ private static function getBackground(array $json): ?BackgroundNode
*
* @return list<StepNode>
*/
private static function getSteps(array $items): array
private function getSteps(array $items): array
{
return array_map(
static fn (array $item) => new StepNode(
fn (array $item) => new StepNode(
$item['keyword'],
$item['text'],
self::getStepArguments($item),
$this->getStepArguments($item),
$item['location']['line'],
trim($item['keyword'])
),
Expand All @@ -198,7 +212,7 @@ private static function getSteps(array $items): array
*
* @return list<ArgumentInterface>
*/
private static function getStepArguments(array $step): array
private function getStepArguments(array $step): array
{
$args = [];

Expand All @@ -225,10 +239,10 @@ private static function getStepArguments(array $step): array
*
* @return list<ExampleTableNode>
*/
private static function getTables(array $items): array
private function getTables(array $items): array
{
return array_map(
static function ($tableJson): ExampleTableNode {
function ($tableJson): ExampleTableNode {
$headerRow = $tableJson['tableHeader'] ?? null;
$tableBody = $tableJson['tableBody'];

Expand All @@ -253,7 +267,7 @@ static function ($tableJson): ExampleTableNode {
return new ExampleTableNode(
$table,
$tableJson['keyword'],
self::getTags($tableJson),
$this->getTags($tableJson),
$tableJson['name'],
$tableJson['description'],
);
Expand Down
35 changes: 35 additions & 0 deletions tests/Filter/TagFilterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use Behat\Gherkin\Node\OutlineNode;
use Behat\Gherkin\Node\ScenarioNode;
use ErrorException;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

class TagFilterTest extends TestCase
Expand Down Expand Up @@ -300,6 +301,40 @@ public function testTagFilterThatIsAllWhitespaceIsIgnored(): void
$this->assertTrue($result);
}

/**
* @phpstan-return list<array{string, list<string>, bool}>
*/
public static function providerMatchWithoutRemovingPrefix(): array
{
// These cases rely on the bulk of the coverage being provided by the other tests, and the knowledge that the
// implementation ultimately uses the same logic to compare tags from all types of nodes. They are only intended
// to be temporary until we drop legacy parsing mode, at which point we can add the `@` to all the tags in the
// other tests in this file.
return [
['@wip', [], false],
['@wip', ['@slow'], false],
['@wip', ['@wip'], true],
['@wip', ['@slow', '@wip'], true],
['@tag1&&~@tag2&&@tag3', [], false],
['@tag1&&~@tag2&&@tag3', ['@tag1'], false],
['@tag1&&~@tag2&&@tag3', ['@tag1', '@tag3'], true],
['@tag1&&~@tag2&&@tag3', ['@tag1', '@tag2'], false],
['@tag1&&~@tag2&&@tag3', ['@tag1', '@tag4'], false],
['@tag1&&~@tag2&&@tag3', ['@tag1', '@tag2', '@tag3'], false],
];
}

/**
* @phpstan-param list<string> $tags
*/
#[DataProvider('providerMatchWithoutRemovingPrefix')]
public function testItMatchesTagsParsedWithoutRemovingPrefix(string $filter, array $tags, bool $expect): void
{
$feature = new FeatureNode(null, null, $tags, null, [], '', '', null, 1);
$tagFilter = new TagFilter($filter);
$this->assertSame($expect, $tagFilter->isFeatureMatch($feature));
}

private function expectDeprecationError(): void
{
set_error_handler(
Expand Down