diff --git a/java/Makefile b/java/Makefile index 2a0a66c59..a2ed500ab 100644 --- a/java/Makefile +++ b/java/Makefile @@ -7,8 +7,8 @@ SOURCE_FILES = $(shell find . -name "*.java" | grep -v $(GHERKIN_PARSER)) GHERKIN = bin/gherkin GHERKIN_GENERATE_TOKENS = bin/gherkin-generate-tokens -GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature") -BAD_FEATURE_FILES = $(shell find ../testdata/bad -name "*.feature") +GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature" -o -name "*.md") +BAD_FEATURE_FILES = $(shell find ../testdata/bad -name "*.feature" -o -name "*.md") TOKENS = $(patsubst ../testdata/%,acceptance/testdata/%.tokens,$(GOOD_FEATURE_FILES)) ASTS = $(patsubst ../testdata/%,acceptance/testdata/%.ast.ndjson,$(GOOD_FEATURE_FILES)) diff --git a/java/gherkin-java.razor b/java/gherkin-java.razor index 099f2ba9f..c6b84d8ff 100644 --- a/java/gherkin-java.razor +++ b/java/gherkin-java.razor @@ -65,7 +65,7 @@ class @Model.ParserClassName { } private final Builder builder; - + private final ITokenMatcher tokenMatcher; static class ParserContext { final ITokenScanner tokenScanner; final ITokenMatcher tokenMatcher; @@ -80,8 +80,9 @@ class @Model.ParserClassName { } } - Parser(Builder builder) { + Parser(Builder builder, ITokenMatcher tokenMatcher) { this.builder = builder; + this.tokenMatcher = tokenMatcher; } T parse(String source, String uri) { @@ -93,7 +94,7 @@ class @Model.ParserClassName { } T parse(ITokenScanner tokenScanner, String uri) { - return parse(tokenScanner, new TokenMatcher(), uri); + return parse(tokenScanner, this.tokenMatcher, uri); } T parse(String source, ITokenMatcher tokenMatcher, String uri) { diff --git a/java/src/main/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcher.java b/java/src/main/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcher.java new file mode 100644 index 000000000..a88e71fac --- /dev/null +++ b/java/src/main/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcher.java @@ -0,0 +1,313 @@ +package io.cucumber.gherkin; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.List; +import java.util.stream.Collectors; +import io.cucumber.messages.types.StepKeywordType; +import java.util.ArrayList; + +import static io.cucumber.gherkin.Parser.TokenType; +import static io.cucumber.gherkin.Parser.ITokenMatcher; +import static io.cucumber.gherkin.StringUtils.ltrim; + +class GherkinInMarkdownTokenMatcher implements ITokenMatcher{ + private enum KeywordPrefix { + BULLET("^(\\s*[*+-]\\s)"), + HEADER("^(#{1,6}\\s)"); + + private final String pattern; + + KeywordPrefix(String pattern) { + this.pattern = pattern; + } + + public String getPattern() { + return pattern; + } + } + + private final GherkinDialectProvider dialectProvider; + private GherkinDialect currentDialect; + private int indentToRemove = 0; + private final Pattern TABLE_ROW_PATTERN = Pattern.compile("^\\s{2,5}\\|.*\\S.*\\|$"); + private static final Pattern LANGUAGE_PATTERN = Pattern.compile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-]+)\\s*$"); + private boolean activeDocStringSeparator = false; + private boolean matchedFeatureLine = false; + + public GherkinInMarkdownTokenMatcher(GherkinDialectProvider dialectProvider) { + this.dialectProvider = dialectProvider; + reset(); + } + + public GherkinInMarkdownTokenMatcher() { + this(new GherkinDialectProvider()); + } + + public GherkinInMarkdownTokenMatcher(String defaultDialectName) { + this(new GherkinDialectProvider(defaultDialectName)); + } + + private GherkinDialect getCurrentDialect() { + return currentDialect; + } + + public boolean match_EOF(Token token) { + if (token.isEOF()) { + setTokenMatched(token, TokenType.EOF, null, null, null, null, null); + return true; + } + return false; + } + + + + @Override + public boolean match_FeatureLine(Token token) { + // Early return if we've already matched a feature line + if (matchedFeatureLine) { + setTokenMatched(token, null, null, null, null, null, null); + return true; + } + + if (token.line == null || token.line.isEmpty()) return false; + + // First try to match "# Feature: blah" + boolean result = matchTitleLine(token, TokenType.FeatureLine, currentDialect.getFeatureKeywords()); + + // If we didn't match "# Feature: blah", we still match this line as a FeatureLine + if (!result) { + setTokenMatched(token, TokenType.FeatureLine, ltrim(token.line.getLineText(0)), null, null, null, null); + result = true; + } + + // Remember that we've matched a feature line + matchedFeatureLine = result; + return result; + + } + + @Override + public boolean match_RuleLine(Token token) { + return matchTitleLine(token, TokenType.RuleLine, currentDialect.getRuleKeywords()); + } + + @Override + public boolean match_BackgroundLine(Token token) { + return matchTitleLine(token, TokenType.BackgroundLine, currentDialect.getBackgroundKeywords()); + } + + @Override + public boolean match_ExamplesLine(Token token) { + return matchTitleLine(token, TokenType.ExamplesLine, currentDialect.getExamplesKeywords()); + } + + @Override + public boolean match_ScenarioLine(Token token) { + return matchTitleLine(token, TokenType.ScenarioLine, currentDialect.getScenarioKeywords()) || + matchTitleLine(token, TokenType.ScenarioLine, currentDialect.getScenarioOutlineKeywords()); + } + + private boolean matchTitleLine(Token token, TokenType tokenType, List keywords) { + String pattern = KeywordPrefix.HEADER.getPattern() + "(" + + String.join("|", keywords) + "):(\\s+)(.*)"; + Pattern headerPattern = Pattern.compile(pattern); + Matcher matcher = headerPattern.matcher(token.line.getLineText(-1)); + if (matcher.find()) { + String keyword = matcher.group(2); + String text = matcher.group(4).trim(); + int indent = matcher.group(1).length(); + setTokenMatched(token, tokenType, text, keyword, indent, null, null); + return true; + } + return false; + } + + @Override + public boolean match_StepLine(Token token) { + + // Combine all step keywords + List stepKeywords = currentDialect.getStepKeywords(); + + // Build pattern: bullet point followed by step keyword + String pattern = KeywordPrefix.BULLET.getPattern() + + "(" + + //stream the step keywords and quote them then join them with | + stepKeywords.stream().map(keyword -> Pattern.quote(keyword)).collect(Collectors.joining("|")) + + ")" + + "(.*)" + ; + + Pattern stepPattern = Pattern.compile(pattern); + Matcher matcher = stepPattern.matcher(token.line.getLineText(0)); + + if (matcher.find()) { + String keyword = matcher.group(2); // The step keyword + List keywordTypes = currentDialect.getStepKeywordTypes(keyword); + StepKeywordType keywordType = (keywordTypes.size() > 1) ? StepKeywordType.UNKNOWN : keywordTypes.get(0); + String text = matcher.group(3).trim(); // The step text + int indent = matcher.group(1).length(); // Length of bullet + whitespace + + if (!text.isEmpty()) { + setTokenMatched(token, TokenType.StepLine, text, keyword, indent, keywordType, null); + return true; + } + } + return false; + } + + @Override + public boolean match_TableRow(Token token) { + Matcher matcher = TABLE_ROW_PATTERN.matcher(token.line.getLineText(0)); + if (matcher.find()) { + List tableCells = token.line.getTableCells(); + if (isGfmTableSeparator(tableCells)) return false; + + setTokenMatched(token, TokenType.TableRow, null, "|", null, null, tableCells); + return true; + } + return false; + } + + @Override + public boolean match_Empty(Token token) { + if (token.line == null) return false; + if (token.line.isEmpty()) { + setTokenMatched(token, TokenType.Empty, null, null, null, null, null); + return true; + } + return false; + } + + @Override + public boolean match_TagLine(Token token) { + if (token.line == null) return false; + + String lineText = token.line.getLineText(-1); + List items = new ArrayList<>(); + + // Pattern for backtick-wrapped tags + Pattern tagPattern = Pattern.compile("`(@[^\\s`]+)`"); + Matcher matcher = tagPattern.matcher(lineText); + + boolean found = false; + while (matcher.find()) { + found = true; + String tag = matcher.group(1); // Group 1 is the actual tag without backticks + int column = matcher.start(1) + 1; // +1 for 1-based column indexing + items.add(new GherkinLineSpan(column, tag)); + } + + if (found) { + setTokenMatched(token, TokenType.TagLine, null, null, null, null, items); + return true; + } + + return false; + } + + private void setTokenMatched(Token token, TokenType matchedType, String text, String keyword, Integer indent, + StepKeywordType keywordType, List items) { + token.matchedType = matchedType; + token.matchedKeyword = keyword; + token.matchedText = text; + token.keywordType = keywordType; + token.matchedIndent = indent != null ? indent : token.line == null ? 0 : token.line.indent(); + token.matchedItems = items; + token.matchedGherkinDialect = getCurrentDialect(); + token.location = new Location(token.location.getLine(), token.matchedIndent + 1); + ; + } + + @Override + public boolean match_DocStringSeparator(Token token) { + if (token.line == null) return false; + return !activeDocStringSeparator ? + // open + match_DocStringSeparator(token, true) + // close + : match_DocStringSeparator(token, false); + } + + private boolean match_DocStringSeparator(Token token, boolean isOpen) { + String separator = GherkinLanguageConstants.DOCSTRING_ALTERNATIVE_SEPARATOR; + String lineText = token.line.getLineText(0); + if (lineText.trim().equals(separator)) { + String mediaType = null; + if (isOpen) { + mediaType = token.line.getRestTrimmed(separator.length()); + activeDocStringSeparator = true; + indentToRemove = token.line.indent(); + } else { + activeDocStringSeparator = false; + indentToRemove = 0; + } + + setTokenMatched(token, TokenType.DocStringSeparator, mediaType, separator, null, null, null); + return true; + } + return false; + } + + @Override + public boolean match_Comment(Token token) { + if (token.line == null) return false; + + if (token.line.startsWith("|")) { + List tableCells = token.line.getTableCells(); + if (isGfmTableSeparator(tableCells)) { + setTokenMatched(token, TokenType.Comment, token.line.getLineText(0), null, 0, null, null); + return true; + } + } + return false; + } + + private boolean isGfmTableSeparator(List tableCells) { + if (tableCells == null || tableCells.isEmpty()) return false; + + // Check if all cells match the GFM separator pattern: :?-+:? + // Examples of valid separators: ---, :---, ---:, :---: + Pattern separatorPattern = Pattern.compile("^:?-+:?$"); + + return tableCells.stream() + .map(cell -> cell.text) + .allMatch(text -> separatorPattern.matcher(text.trim()).matches()); + } + + @Override + public boolean match_Other(Token token) { + if (token.line == null) return false; + + String text = token.line.getLineText(indentToRemove); // take the entire line, except removing DocString indents + setTokenMatched(token, TokenType.Other, text, null, 0, null, null); + return true; + } + + @Override + public boolean match_Language(Token token) { + if (token.line == null) return false; + + Matcher matcher = LANGUAGE_PATTERN.matcher(token.line.getLineText(0)); + if (matcher.matches()) { + String language = matcher.group(1); + setTokenMatched(token, TokenType.Language, language, null, null, null, null); + + // Update dialect + GherkinDialectProvider dialectProvider = new GherkinDialectProvider(language); + currentDialect = dialectProvider.getDialect(language) + .orElseThrow(() -> new ParserException.NoSuchLanguageException(language, token.location)); + + return true; + } + return false; + } + + @Override + public void reset() { + activeDocStringSeparator = false; + indentToRemove = 0; + currentDialect = dialectProvider.getDefaultDialect(); + } + +} \ No newline at end of file diff --git a/java/src/main/java/io/cucumber/gherkin/GherkinParser.java b/java/src/main/java/io/cucumber/gherkin/GherkinParser.java index 21e25ea2f..7b2cf0790 100644 --- a/java/src/main/java/io/cucumber/gherkin/GherkinParser.java +++ b/java/src/main/java/io/cucumber/gherkin/GherkinParser.java @@ -6,7 +6,9 @@ import io.cucumber.messages.types.GherkinDocument; import io.cucumber.messages.types.ParseError; import io.cucumber.messages.types.Source; +import io.cucumber.messages.types.SourceMediaType; import io.cucumber.messages.types.SourceReference; +import io.cucumber.gherkin.Parser.ITokenMatcher; import java.io.File; import java.io.IOException; @@ -19,10 +21,12 @@ import java.util.stream.Stream; import static io.cucumber.gherkin.EncodingParser.readWithEncodingFromSource; +import static io.cucumber.messages.types.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN; import static io.cucumber.messages.types.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toCollection; + /** * Main entry point for the Gherkin library */ @@ -99,6 +103,9 @@ public Stream parse(String uri, byte[] source) { requireNonNull(uri); requireNonNull(source); String withEncodingFromSource = readWithEncodingFromSource(source); + if (uri.endsWith(".md")) { + return parse(Envelope.of(new Source(uri, withEncodingFromSource, TEXT_X_CUCUMBER_GHERKIN_MARKDOWN))); + } return parse(Envelope.of(new Source(uri, withEncodingFromSource, TEXT_X_CUCUMBER_GHERKIN_PLAIN))); } @@ -119,13 +126,14 @@ public Stream parse(Envelope envelope) { } private List parse(Source source) { - return parse(source.getUri(), source.getData()); + return parse(source.getUri(), source.getData(), source.getMediaType()); } - private List parse(String uri, String data) { + private List parse(String uri, String data, SourceMediaType mediaType) { List messages = new ArrayList<>(); GherkinDocumentBuilder documentBuilder = new GherkinDocumentBuilder(idGenerator, uri); - Parser parser = new Parser<>(documentBuilder); + ITokenMatcher tokenMatcher = mediaType == TEXT_X_CUCUMBER_GHERKIN_MARKDOWN ? new GherkinInMarkdownTokenMatcher() : new TokenMatcher(); + Parser parser = new Parser<>(documentBuilder, tokenMatcher); try { GherkinDocument gherkinDocument = parser.parse(data, uri); diff --git a/java/src/main/java/io/cucumber/gherkin/Parser.java b/java/src/main/java/io/cucumber/gherkin/Parser.java index a4f2153df..08f42b402 100644 --- a/java/src/main/java/io/cucumber/gherkin/Parser.java +++ b/java/src/main/java/io/cucumber/gherkin/Parser.java @@ -78,7 +78,7 @@ static RuleType cast(TokenType tokenType) { } private final Builder builder; - + private final ITokenMatcher tokenMatcher; static class ParserContext { final ITokenScanner tokenScanner; final ITokenMatcher tokenMatcher; @@ -93,8 +93,9 @@ static class ParserContext { } } - Parser(Builder builder) { + Parser(Builder builder, ITokenMatcher tokenMatcher) { this.builder = builder; + this.tokenMatcher = tokenMatcher; } T parse(String source, String uri) { @@ -106,7 +107,7 @@ T parse(Reader source, String uri) { } T parse(ITokenScanner tokenScanner, String uri) { - return parse(tokenScanner, new TokenMatcher(), uri); + return parse(tokenScanner, this.tokenMatcher, uri); } T parse(String source, ITokenMatcher tokenMatcher, String uri) { diff --git a/java/src/test/java/io/cucumber/gherkin/GenerateTokens.java b/java/src/test/java/io/cucumber/gherkin/GenerateTokens.java index e358dfd10..39ba52ab2 100644 --- a/java/src/test/java/io/cucumber/gherkin/GenerateTokens.java +++ b/java/src/test/java/io/cucumber/gherkin/GenerateTokens.java @@ -9,11 +9,19 @@ public final class GenerateTokens { public static void main(String[] args) throws FileNotFoundException { TokenFormatterBuilder builder = new TokenFormatterBuilder(); - Parser parser = new Parser<>(builder); - TokenMatcher matcher = new TokenMatcher(); + TokenMatcher classicTokenMatcher = new TokenMatcher(); + GherkinInMarkdownTokenMatcher gherkinInMarkdownTokenMatcher = new GherkinInMarkdownTokenMatcher(); + Parser parser = null; + for (String fileName : args) { + + if(fileName.endsWith(".feature")) { + parser = new Parser<>(builder, classicTokenMatcher); + } else { + parser = new Parser<>(builder, gherkinInMarkdownTokenMatcher); + } Reader in = new InputStreamReader(new FileInputStream(fileName), StandardCharsets.UTF_8); - String result = parser.parse(in, matcher, fileName); + String result = parser.parse(in, fileName); Stdio.out.print(result); Stdio.out.flush(); // print doesn't autoflush } diff --git a/java/src/test/java/io/cucumber/gherkin/GherkinDocumentBuilderTest.java b/java/src/test/java/io/cucumber/gherkin/GherkinDocumentBuilderTest.java index 1843a6b9e..cbda19036 100644 --- a/java/src/test/java/io/cucumber/gherkin/GherkinDocumentBuilderTest.java +++ b/java/src/test/java/io/cucumber/gherkin/GherkinDocumentBuilderTest.java @@ -17,9 +17,9 @@ public class GherkinDocumentBuilderTest { @Test public void is_reusable() { - Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature")); TokenMatcher matcher = new TokenMatcher(); - + Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature"), matcher); + GherkinDocument d1 = parser.parse("Feature: 1", matcher, "1.feature"); GherkinDocument d2 = parser.parse("Feature: 2", matcher, "2.feature"); @@ -29,7 +29,8 @@ public void is_reusable() { @Test public void parses_rules() { - Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature")); + TokenMatcher matcher = new TokenMatcher(); + Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature"),matcher); String data = "" + "Feature: Some rules\n" + "\n" + @@ -67,7 +68,8 @@ public void parses_rules() { @Test public void parses_just_comments() { - Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature")); + TokenMatcher matcher = new TokenMatcher(); + Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature"),matcher); GherkinDocument doc = parser.parse("# Just a comment", "test.feature"); List children = doc.getComments(); assertEquals(1, children.size()); @@ -75,7 +77,8 @@ public void parses_just_comments() { @Test public void sets_empty_table_cells() { - Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature")); + TokenMatcher matcher = new TokenMatcher(); + Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature"),matcher); GherkinDocument doc = parser.parse("" + "Feature:\n" + " Scenario:\n" + diff --git a/java/src/test/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcherTest.java b/java/src/test/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcherTest.java new file mode 100644 index 000000000..c272fef16 --- /dev/null +++ b/java/src/test/java/io/cucumber/gherkin/GherkinInMarkdownTokenMatcherTest.java @@ -0,0 +1,603 @@ +package io.cucumber.gherkin; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import java.util.stream.Stream; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import java.lang.reflect.Method; +import io.cucumber.gherkin.Parser.TokenType; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import java.util.List; +import java.util.Arrays; +import java.util.stream.Collectors; +import static org.junit.jupiter.api.Assertions.assertNotNull; + + +class GherkinInMarkdownTokenMatcherTest { + + private enum HeaderType { + FEATURE(TokenType.FeatureLine, "Feature", "match_FeatureLine"), + RULE(TokenType.RuleLine, "Rule", "match_RuleLine"), + BACKGROUND(TokenType.BackgroundLine, "Background", "match_BackgroundLine"), + EXAMPLES(TokenType.ExamplesLine, "Examples", "match_ExamplesLine"), + SCENARIO(TokenType.ScenarioLine, "Scenario", "match_ScenarioLine"), + SCENARIO_OUTLINE(TokenType.ScenarioLine, "Scenario Outline", "match_ScenarioLine"); + + private final TokenType tokenType; + private final String keyword; + private final String methodName; + + HeaderType(TokenType tokenType, String keyword, String methodName) { + this.tokenType = tokenType; + this.keyword = keyword; + this.methodName = methodName; + } + } + + @ParameterizedTest(name = "line ''{0}'' should match: {1}") + @MethodSource("headerLineTestCases") + void testHeaderLineMatching(HeaderType headerType, String line, boolean shouldMatch, TokenType expectedType, + String expectedKeyword, String expectedText, Integer expectedIndent) throws Exception { + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(new GherkinLine(line, 1), new Location(1, 1)); + + Method method = matcher.getClass().getMethod(headerType.methodName, Token.class); + boolean result = (boolean) method.invoke(matcher, token); + + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match a feature line", + line, shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(expectedKeyword, token.matchedKeyword); + assertEquals(expectedText, token.matchedText); + assertEquals(expectedIndent, token.matchedIndent); + assertEquals("en", token.matchedGherkinDialect.getLanguage()); + assertEquals(expectedType, token.matchedType); + } + } + + @ParameterizedTest(name = "should match {0} feature keyword in {1}") + @MethodSource("featureDialectTestCases") + void testFeatureLineDialectMatching(String keyword, String dialect, boolean shouldMatch) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher(dialect); + String line = "# " + keyword + ": hello"; + Token token = new Token(new GherkinLine(line, 1), new Location(1, 1)); + + // Act + boolean result = matcher.match_FeatureLine(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("'%s' should%s match as feature in dialect '%s'", + line, shouldMatch ? "" : " not", dialect)); + } + + @Test + void testInvalidDialect() { + assertThrows(NullPointerException.class, () -> { + new GherkinInMarkdownTokenMatcher("xx"); + }); + } + + @Test + void testMatchEOF() { + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(null, new Location(1, 0)); + assertTrue(matcher.match_EOF(token), "Should match EOF when line is empty"); + } + + @Test + void testMatchNonEOF() { + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(new GherkinLine("hello", 1), new Location(1, 1)); + assertFalse(matcher.match_EOF(token), "Is not EOF"); + } + + @ParameterizedTest(name = "step ''{0}'' should match: {1}") + @MethodSource("stepLineTestCases") + void testMatchStepLine(String line, boolean shouldMatch, String expectedKeyword, String expectedText) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(new GherkinLine(line, 1), new Location(1, 1)); + + // Act + boolean result = matcher.match_StepLine(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match a step", line, shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.StepLine, token.matchedType); + assertEquals(expectedKeyword, token.matchedKeyword); + assertEquals(expectedText, token.matchedText); + assertEquals("en", token.matchedGherkinDialect.getLanguage()); + } + } + + @ParameterizedTest(name = "table row ''{0}'' should match: {1}") + @MethodSource("tableRowTestCases") + void testMatchTableRow(String line, boolean shouldMatch, List expectedCells) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(new GherkinLine(line, 1), new Location(1, 1)); + + // Act + boolean result = matcher.match_TableRow(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match a table row", line, shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.TableRow, token.matchedType); + assertNotNull(token.matchedItems, "Table cells should not be null"); + List actualCells = token.matchedItems.stream() + .map(item -> item.text) + .collect(Collectors.toList()); + assertEquals(expectedCells, actualCells, "Table cells should match"); + } + } + + @ParameterizedTest(name = "line ''{0}'' should match empty: {1}") + @MethodSource("emptyLineTestCases") + void testMatchEmpty(String line, boolean shouldMatch) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Act + boolean result = matcher.match_Empty(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match empty", + line == null ? "null" : "'" + line + "'", + shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.Empty, token.matchedType); + } + } + + @ParameterizedTest(name = "tag line ''{0}'' should match: {1}") + @MethodSource("tagLineTestCases") + void testMatchTagLine(String line, boolean shouldMatch, List expectedTags) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Act + boolean result = matcher.match_TagLine(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match tags", + line == null ? "null" : "'" + line + "'", + shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.TagLine, token.matchedType); + assertNotNull(token.matchedItems, "Tags should not be null"); + List actualTags = token.matchedItems.stream() + .map(item -> item.text) + .collect(Collectors.toList()); + assertEquals(expectedTags, actualTags, "Tags should match"); + } + } + + @ParameterizedTest(name = "language line ''{0}'' should match: {1}") + @MethodSource("languageLineTestCases") + void testMatchLanguage(String line, boolean shouldMatch, String expectedLanguage) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Act + boolean result = matcher.match_Language(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match language", + line == null ? "null" : "'" + line + "'", + shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.Language, token.matchedType); + assertEquals(expectedLanguage, token.matchedText); + } + } + + @Test + void testDocStringSeparatorSequence() { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + + // Test opening separator + Token openingToken = new Token(new GherkinLine("```", 1), new Location(1, 1)); + boolean openingResult = matcher.match_DocStringSeparator(openingToken); + assertTrue(openingResult, "Opening separator should match"); + assertEquals(TokenType.DocStringSeparator, openingToken.matchedType); + assertEquals("", openingToken.matchedText); + + // Test content line (should not match) + Token contentToken = new Token(new GherkinLine("some content", 2), new Location(2, 1)); + boolean contentResult = matcher.match_DocStringSeparator(contentToken); + assertFalse(contentResult, "Content line should not match as separator"); + + // Test closing separator + Token closingToken = new Token(new GherkinLine("```", 3), new Location(3, 1)); + boolean closingResult = matcher.match_DocStringSeparator(closingToken); + assertTrue(closingResult, "Closing separator should match"); + assertEquals(TokenType.DocStringSeparator, closingToken.matchedType); + assertEquals(null, closingToken.matchedText); + } + + @ParameterizedTest(name = "docstring separator ''{0}'' should match: {1}") + @MethodSource("docStringSeparatorTestCases") + void testMatchDocStringSeparator(String line, boolean shouldMatch, String expectedContentType) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Act + boolean result = matcher.match_DocStringSeparator(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match doc string separator", + line == null ? "null" : "'" + line + "'", + shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.DocStringSeparator, token.matchedType); + assertEquals(expectedContentType, token.matchedText); + } + } + + @ParameterizedTest(name = "other line ''{0}'' with indent to remove {1} should match with text: {2}") + @MethodSource("otherLineTestCases") + void testMatchOther(String line, int indentToRemove, String expectedText) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Set indentToRemove via DocString sequence if needed + if (indentToRemove > 0) { + // First match the DocString start to set indentToRemove + Token docStart = new Token(new GherkinLine(" ```", 1), new Location(1, 1)); + matcher.match_DocStringSeparator(docStart); + } + + // Act + boolean result = matcher.match_Other(token); + + // Assert + if (line != null) { + assertTrue(result, "Should match any non-null line"); + assertEquals(TokenType.Other, token.matchedType); + assertEquals(expectedText, token.matchedText); + assertEquals(0, token.matchedIndent); + } else { + assertFalse(result, "Should not match null line"); + } + } + + @ParameterizedTest(name = "comment line ''{0}'' should match: {1}") + @MethodSource("commentLineTestCases") + void testMatchComment(String line, boolean shouldMatch, String expectedText) { + // Arrange + GherkinInMarkdownTokenMatcher matcher = new GherkinInMarkdownTokenMatcher("en"); + Token token = new Token(line != null ? new GherkinLine(line, 1) : null, new Location(1, 1)); + + // Act + boolean result = matcher.match_Comment(token); + + // Assert + assertEquals(shouldMatch, result, + String.format("Line '%s' should%s match comment", + line == null ? "null" : "'" + line + "'", + shouldMatch ? "" : " not")); + if (shouldMatch) { + assertEquals(TokenType.Comment, token.matchedType); + assertEquals(expectedText, token.matchedText); + } + } + + private static Stream stepLineTestCases() { + return Stream.of( + // Valid Given steps + Arguments.of("* Given there is a test", true, "Given ", "there is a test"), + Arguments.of("- Given I have a step", true, "Given ", "I have a step"), + Arguments.of("+ Given some precondition", true, "Given ", "some precondition"), + + // Valid When steps + Arguments.of("* When something happens", true, "When ", "something happens"), + Arguments.of("- When I do an action", true, "When ", "I do an action"), + Arguments.of("+ When executing the test", true, "When ", "executing the test"), + + // Valid Then steps + Arguments.of("* Then I see results", true, "Then ", "I see results"), + Arguments.of("- Then I verify outcome", true, "Then ", "I verify outcome"), + Arguments.of("+ Then something is done", true, "Then ", "something is done"), + + // Valid And steps + Arguments.of("* And another condition", true, "And ", "another condition"), + Arguments.of("- And one more thing", true, "And ", "one more thing"), + + // Valid But steps + Arguments.of("* But there is an exception", true, "But ", "there is an exception"), + Arguments.of("- But something else", true, "But ", "something else"), + + // Valid with indentation + Arguments.of(" * Given indented step", true, "Given ", "indented step"), + Arguments.of("\t- When tabbed step", true, "When ", "tabbed step"), + + //Valid * steps + Arguments.of("* * indented step", true, "* ", "indented step"), + + // Invalid cases + Arguments.of("Given no bullet point", false, null, null), + Arguments.of("* given lowercase keyword", false, null, null), + Arguments.of("* extra space after * ", false, null, null), + Arguments.of("* Invalid keyword", false, null, null), + Arguments.of("** Given double asterisk", false, null, null), + Arguments.of("*Given no space after bullet", false, null, null), + Arguments.of("* ", false, null, null), // Empty step + Arguments.of("", false, null, null), // Empty line + Arguments.of("Some random text", false, null, null) // No keyword + ); + } + + private static Stream headerLineTestCases() { + return Stream.of(HeaderType.values()).filter(headerType -> headerType != HeaderType.FEATURE).flatMap(headerType -> Stream.of( + // Valid cases + Arguments.of(headerType, "# " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 2), + Arguments.of(headerType, "# " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 2), + Arguments.of(headerType, "## " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 3), + Arguments.of(headerType, "### " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 4), + Arguments.of(headerType, "#### " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 5), + Arguments.of(headerType, "##### " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 6), + Arguments.of(headerType, "###### " + headerType.keyword + ": hello", true, headerType.tokenType, + headerType.keyword, "hello", 7), + // Test cases for testing indentations + Arguments.of(headerType, " # " + headerType.keyword + ": indentation with spaces", true, + headerType.tokenType, headerType.keyword, "indentation with spaces", 2), + Arguments.of(headerType, "\t# " + headerType.keyword + ": indentation with tabs", true, + headerType.tokenType, headerType.keyword, "indentation with tabs", 2), + Arguments.of(headerType, " \t # " + headerType.keyword + ": mixed indentation with spaces", true, + headerType.tokenType, headerType.keyword, "mixed indentation with spaces", 2), + Arguments.of(headerType, " ## " + headerType.keyword + ": indentation with different header level", + true, headerType.tokenType, headerType.keyword, "indentation with different header level", 3), + + // Invalid cases + Arguments.of(headerType, "" + headerType.keyword + ": Missing header", false, null, null, null, null), // Missing + // header + Arguments.of(headerType, "####### " + headerType.keyword + ": too many #s", false, null, null, null, + null), // Too many #s + Arguments.of(headerType, "#" + headerType.keyword + ": Missing space after #", false, null, null, null, + null), // No space after # + Arguments.of(headerType, "# " + headerType.keyword + ": Extra space after #", false, null, null, null, + null), // Extra after # + Arguments.of(headerType, "# " + headerType.keyword.toLowerCase() + ": lower case feature", false, null, + null, null, null), // lowercase 'feature' + Arguments.of(headerType, "# " + headerType.keyword + "s: plural", false, null, null, null, null), // Plural + Arguments.of(headerType, "# Something: wrong keyword", false, null, null, null, null), // Wrong keyword + Arguments.of(headerType, "#", false, null, null, null, null), // Just a hash + Arguments.of(headerType, "Mising keyword", false, null, null, null, null) // No feature line at all + + )); + + } + + private static Stream featureDialectTestCases() { + return Stream.of( + // English + Arguments.of("Feature", "en", true), + // French + Arguments.of("Fonctionnalité", "fr", true), + Arguments.of("Feature", "fr", true), + // Spanish + Arguments.of("Característica", "es", true), + Arguments.of("Feature", "es", true)); + } + + private static Stream tableRowTestCases() { + return Stream.of( + // Valid table rows + Arguments.of(" | header1 | header2 |", true, Arrays.asList("header1", "header2")), + Arguments.of(" |col1|col2|", true, Arrays.asList("col1", "col2")), + Arguments.of(" | data 1 | data 2 |", true, Arrays.asList("data 1", "data 2")), + Arguments.of(" | spaced | cells |", true, Arrays.asList("spaced", "cells")), + + // Valid with indentation + Arguments.of(" | indented | row |", true, Arrays.asList("indented", "row")), + Arguments.of("\t\t| tabbed | row |", true, Arrays.asList("tabbed", "row")), + + // Valid empty cells + Arguments.of(" | | empty cell |", true, Arrays.asList("", "empty cell")), + Arguments.of(" | cell | |", true, Arrays.asList("cell", "")), + Arguments.of(" |||", true, Arrays.asList("", "")), + + // Invalid table rows less 2 or greater than 5 spaces indenting + Arguments.of(" | header1 | header2 |", false, null), + Arguments.of("|col1|col2|", false, null), + Arguments.of(" | spaced | cells |", false, null), + // Invalid cases + Arguments.of("not a table row", false, null), + Arguments.of("|incomplete", false, null), + Arguments.of("incomplete|", false, null), + Arguments.of("", false, null), + Arguments.of("|", false, null), + Arguments.of("||", false, null) // Requires at least one character between pipes + + ); + } + + private static Stream emptyLineTestCases() { + return Stream.of( + // Valid empty lines + Arguments.of("", true), + Arguments.of(" ", true), + Arguments.of(" ", true), + Arguments.of("\t", true), + Arguments.of(" \t ", true), + + // Invalid cases (non-empty lines) + Arguments.of("not empty", false), + Arguments.of(" text ", false), + Arguments.of("\ttext", false), + Arguments.of("# Heading", false), + Arguments.of("* bullet", false), + Arguments.of("|table|", false), + Arguments.of(null, false) + ); + } + + private static Stream tagLineTestCases() { + return Stream.of( + // Valid tag lines + Arguments.of("`@tag`", true, Arrays.asList("@tag")), + Arguments.of("`@tag1` `@tag2`", true, Arrays.asList("@tag1", "@tag2")), + Arguments.of("`@feature` `@important` `@test`", true, Arrays.asList("@feature", "@important", "@test")), + + // Valid with indentation + Arguments.of(" `@tag`", true, Arrays.asList("@tag")), + Arguments.of("\t`@tag`", true, Arrays.asList("@tag")), + Arguments.of(" `@tag1` `@tag2` ", true, Arrays.asList("@tag1", "@tag2")), + + // Valid complex tags + Arguments.of("`@with.dot`", true, Arrays.asList("@with.dot")), + Arguments.of("`@with_underscore`", true, Arrays.asList("@with_underscore")), + Arguments.of("`@with-dash`", true, Arrays.asList("@with-dash")), + Arguments.of("`@with123`", true, Arrays.asList("@with123")), + + //Valid with separated tags + Arguments.of("`@tag1`,`@tag2`", true, Arrays.asList("@tag1", "@tag2")), + Arguments.of("`@tag#invalid`", true , Arrays.asList("@tag#invalid")), + + // Invalid cases + Arguments.of("@tag", false, null), // Missing backticks + Arguments.of("`@tag", false, null), // Missing closing backtick + Arguments.of("@tag`", false, null), // Missing opening backtick + Arguments.of("`@ invalid`", false, null), // Invalid tag format + Arguments.of("`@`", false, null), // Just @ symbol + Arguments.of("` @tag `", false, null), // Space after backtick + Arguments.of("not a tag", false, null), + Arguments.of("", false, null), + Arguments.of(null, false, null), + Arguments.of("`#@tag`", false, null) // Must start with @ + ); + } + + private static Stream languageLineTestCases() { + return Stream.of( + // Valid language lines + Arguments.of("# language: en", true, "en"), + Arguments.of("# language: fr", true, "fr"), + Arguments.of("# language: es", true, "es"), + Arguments.of("#language:pt", true, "pt"), // No spaces + + // Valid with indentation + Arguments.of(" # language: de", true, "de"), + Arguments.of("\t# language: nl", true, "nl"), + + // Invalid cases + Arguments.of("# Language: en", false, null), // Capital L + Arguments.of("#language en", false, null), // Missing colon + Arguments.of("# language:", false, null), // Missing language code + Arguments.of("# language: ", false, null), // Empty language code + Arguments.of("language: en", false, null), // Missing # + Arguments.of("# something: en", false, null), // Wrong keyword + Arguments.of("", false, null), + Arguments.of(null, false, null) + ); + } + + private static Stream docStringSeparatorTestCases() { + return Stream.of( + // Valid separators + Arguments.of("```", true, ""), // Basic separator + Arguments.of(" ``` ", true, ""), // Extra spaces + + // Invalid cases + Arguments.of("``", false, null), // Too few backticks + Arguments.of("`````", false, null), // Too many backticks + Arguments.of("~~~", false, null), // Wrong characters + Arguments.of("``` python", false, null), // Space after backticks + Arguments.of("```python script", false, null), // Space in content type + Arguments.of("", false, null), // Empty line + Arguments.of(null, false, null) // Null line + ); + } + + private static Stream otherLineTestCases() { + return Stream.of( + // Basic text lines + Arguments.of("Simple text", 0, "Simple text"), + Arguments.of(" Indented text", 0, " Indented text"), + Arguments.of("\tTabbed text", 0, "\tTabbed text"), + + // DocString content (with indentToRemove) + Arguments.of(" Some content", 3, "Some content"), + Arguments.of(" More content", 3, " More content"), + Arguments.of(" Less content", 3, "Less content"), + + // Special characters + Arguments.of("Text with * asterisk", 0, "Text with * asterisk"), + Arguments.of("Text with # hash", 0, "Text with # hash"), + Arguments.of("Text with ``` backticks", 0, "Text with ``` backticks"), + + // Edge cases + Arguments.of("", 0, ""), + Arguments.of(" ", 0, " "), + Arguments.of(null, 0, null) + ); + } + + private static Stream commentLineTestCases() { + return Stream.of( + // Valid GFM table separators - Basic + Arguments.of("| --- |", true, "| --- |"), + Arguments.of("| --- | --- |", true, "| --- | --- |"), + Arguments.of("|---|---|", true, "|---|---|"), + + // Valid GFM table separators - Alignment syntax + Arguments.of("|:---|", true, "|:---|"), // Left align + Arguments.of("|---:|", true, "|---:|"), // Right align + Arguments.of("|:---:|", true, "|:---:|"), // Center align + Arguments.of("|:--- |:--- |", true, "|:--- |:--- |"), // Multiple columns + Arguments.of("| :---: | :---: |", true, "| :---: | :---: |"), + + // Valid with different dash counts + Arguments.of("| ---- |", true, "| ---- |"), // More dashes + Arguments.of("| - |", true, "| - |"), // Single dash + + // Valid with indentation + Arguments.of(" | --- |", true, " | --- |"), + Arguments.of("\t| --- |", true, "\t| --- |"), + + // Invalid cases + Arguments.of("| abc |", false, null), // Not a separator + Arguments.of("| --- abc |", false, null), // Mixed content + Arguments.of("| -- - |", false, null), // Broken separator + Arguments.of("| :-- : |", false, null), // Invalid alignment + Arguments.of("|", false, null), // Just pipe + Arguments.of("---", false, null), // No pipes + Arguments.of("# Comment", false, null), // Regular comment + Arguments.of("", false, null), // Empty line + Arguments.of(null, false, null), // Null line + + // Edge cases + Arguments.of("| :---: | abc |", false, null), // Mixed separator and content + Arguments.of("| --- || --- |", false, null), // Double pipe + Arguments.of("|::---|", false, null), // Multiple colons + Arguments.of("| ---- :", false, null) // Missing end pipe + ); + } +} \ No newline at end of file diff --git a/java/src/test/java/io/cucumber/gherkin/GherkinParserTest.java b/java/src/test/java/io/cucumber/gherkin/GherkinParserTest.java index 996db99c8..d99817353 100644 --- a/java/src/test/java/io/cucumber/gherkin/GherkinParserTest.java +++ b/java/src/test/java/io/cucumber/gherkin/GherkinParserTest.java @@ -17,6 +17,7 @@ import java.util.Optional; import java.util.stream.Stream; +import static io.cucumber.messages.types.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN; import static io.cucumber.messages.types.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -36,6 +37,17 @@ public void use_this_in_readme() { assertEquals(1, pickles.count()); } + @Test public void can_parse_markdown_files() { + String feature = "# Feature: Minimal\n" + + "\n" + + " ## Scenario: minimalistic\n" + + " * Given the minimalism\n"; + Envelope mdEnvelope = Envelope.of(new Source("minimal.feature.md", feature, TEXT_X_CUCUMBER_GHERKIN_MARKDOWN)); + GherkinParser parser = GherkinParser.builder().build(); + Stream pickles = parser.parse(mdEnvelope).filter(a -> a.getPickle().isPresent()); + assertEquals(1, pickles.count()); + } + @Test public void can_parse_streams() throws IOException { try (InputStream is = new ByteArrayInputStream(feature.getBytes(StandardCharsets.UTF_8))){ diff --git a/java/src/test/java/io/cucumber/gherkin/ParserTest.java b/java/src/test/java/io/cucumber/gherkin/ParserTest.java index 2d7f436cf..5528d8d0e 100644 --- a/java/src/test/java/io/cucumber/gherkin/ParserTest.java +++ b/java/src/test/java/io/cucumber/gherkin/ParserTest.java @@ -12,7 +12,7 @@ public class ParserTest { public void change_default_language() { TokenMatcher matcher = new TokenMatcher("no"); IdGenerator idGenerator = new IncrementingIdGenerator(); - Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature")); + Parser parser = new Parser<>(new GherkinDocumentBuilder(idGenerator, "test.feature"), matcher); GherkinDocument gherkinDocument = parser.parse("Egenskap: i18n support\n", matcher, "test.feature"); assertEquals("no", gherkinDocument.getFeature().get().getLanguage());