cucumber · mpkorstanje · Apr 4, 2025 · Feb 11, 2025 · Feb 11, 2025 · Feb 11, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
 
 ## [Unreleased]
 ### Changed
+- [Java] Optimize GherkinLine performance ([#361](https://github.com/cucumber/gherkin/issues/361))
 - [Java] Optimize number of array copies ([#388](https://github.com/cucumber/gherkin/pull/388))
 - [Java] Optimize Location performance ([#385](https://github.com/cucumber/gherkin/pull/385))
 - [Java] Optimize AstNode performance ([#383](https://github.com/cucumber/gherkin/pull/383))

diff --git a/java/src/main/java/io/cucumber/gherkin/GherkinLanguageConstants.java b/java/src/main/java/io/cucumber/gherkin/GherkinLanguageConstants.java
@@ -2,7 +2,8 @@
 
 interface GherkinLanguageConstants {
     String TAG_PREFIX = "@";
-    String COMMENT_PREFIX = "#";
+    char COMMENT_PREFIX_CHAR = '#';
+    String COMMENT_PREFIX = "" + COMMENT_PREFIX_CHAR;
     String TITLE_KEYWORD_SEPARATOR = ":";
     String TABLE_CELL_SEPARATOR = "|";
     String DOCSTRING_SEPARATOR = "\"\"\"";

diff --git a/java/src/main/java/io/cucumber/gherkin/GherkinLine.java b/java/src/main/java/io/cucumber/gherkin/GherkinLine.java
@@ -4,86 +4,113 @@
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map.Entry;
 import java.util.PrimitiveIterator;
 
-import static io.cucumber.gherkin.GherkinLanguageConstants.COMMENT_PREFIX;
 import static io.cucumber.gherkin.GherkinLanguageConstants.TAG_PREFIX;
-import static io.cucumber.gherkin.StringUtils.ltrim;
-import static io.cucumber.gherkin.StringUtils.ltrimKeepNewLines;
+import static io.cucumber.gherkin.GherkinLanguageConstants.TITLE_KEYWORD_SEPARATOR;
+import static io.cucumber.gherkin.Locations.COLUMN_OFFSET;
+import static io.cucumber.gherkin.StringUtils.containsWhiteSpace;
 import static io.cucumber.gherkin.StringUtils.rtrim;
-import static io.cucumber.gherkin.StringUtils.rtrimKeepNewLines;
-import static io.cucumber.gherkin.StringUtils.symbolCount;
-import static io.cucumber.gherkin.StringUtils.trim;
+import static io.cucumber.gherkin.StringUtils.trimAndIndent;
+import static io.cucumber.gherkin.StringUtils.trimAndIndentKeepNewLines;
+import static java.util.Collections.emptyList;
 import static java.util.Objects.requireNonNull;
 
 class GherkinLine {
-    // TODO: set this to 0 when/if we change to 0-indexed columns
-    private static final int OFFSET = 1;
-    private final String lineText;
-    private final String trimmedLineText;
+
+    /**
+     * The line text, including all leading and trailing whitespace characters.
+     */
+    private final String rawText;
+    private final Location location;
+    private final boolean empty;
+
+    /**
+     * The line text with any whitespace characters trimmed.
+     */
+    private final String text;
+
+    /**
+     * The offset in code-points of the first non-whitespace character in this
+     * line.
+     */
     private final int indent;
-    private final Location line;
 
-    public GherkinLine(String lineText, Location line) {
-        this.lineText = requireNonNull(lineText);
-        this.trimmedLineText = trim(lineText);
-        this.line = requireNonNull(line);
-        indent = symbolCount(lineText) - symbolCount(ltrim(lineText));
+    GherkinLine(String rawText, Location location) {
+        this.rawText = requireNonNull(rawText);
+        this.location = requireNonNull(location);
+        Entry<String, Integer> trimmedIndent = trimAndIndent(rawText);
+        this.text = trimmedIndent.getKey();
+        this.indent = trimmedIndent.getValue();
+        this.empty = text.isEmpty();
     }
 
-    public int indent() {
+    int getIndent() {
         return indent;
     }
 
-    public String getLineText(int indentToRemove) {
-        if (indentToRemove < 0 || indentToRemove > indent())
-            return trimmedLineText;
-        return lineText.substring(indentToRemove);
+    String getText() {
+        return text;
+    }
+
+    String getRawText() {
+        return rawText;
     }
 
-    public boolean isEmpty() {
-        return trimmedLineText.isEmpty();
+    String getRawTextSubstring(int beginIndex) {
+        return rawText.substring(beginIndex);
     }
 
-    public boolean startsWith(String prefix) {
-        return trimmedLineText.startsWith(prefix);
+    boolean isEmpty() {
+        return empty;
     }
 
-    public String getRestTrimmed(int length) {
-        return trimmedLineText.substring(length).trim();
+    boolean startsWith(String prefix) {
+        return text.startsWith(prefix);
     }
 
-    public List<GherkinLineSpan> getTags() {
+    String substringTrimmed(int beginIndex) {
+        return text.substring(beginIndex).trim();
+    }
 
-        String uncommentedLine = trimmedLineText.split("\\s" + COMMENT_PREFIX, 2)[0];
-        List<GherkinLineSpan> tags = new ArrayList<>();
+    List<GherkinLineSpan> parseTags() {
+        // in most cases, the line contains no tag, so the code is optimized for this situation
+        if (empty) {
+            return emptyList();
+        }
+        String uncommentedLine = StringUtils.removeComments(text);
         int indexInUncommentedLine = 0;
 
         String[] elements = uncommentedLine.split(TAG_PREFIX);
+        if (elements.length == 0) {
+            return emptyList();
+        }
+        List<GherkinLineSpan> tags = new ArrayList<>(elements.length);
         for (String element : elements) {
             String token = rtrim(element);
             if (token.isEmpty()) {
                 continue;
             }
             int symbolLength = uncommentedLine.codePointCount(0, indexInUncommentedLine);
-            int column = indent() + symbolLength + 1;
-            if (!token.matches("^\\S+$")) {
-                throw new ParserException("A tag may not contain whitespace", Locations.atColumn(line, column));
+            int column = indent + symbolLength + COLUMN_OFFSET;
+            if (containsWhiteSpace(token)) {
+                throw new ParserException("A tag may not contain whitespace", Locations.atColumn(location, column));
             }
             tags.add(new GherkinLineSpan(column, TAG_PREFIX + token));
             indexInUncommentedLine += element.length() + 1;
         }
         return tags;
     }
 
-    public List<GherkinLineSpan> getTableCells() {
+    List<GherkinLineSpan> parseTableCells() {
         List<GherkinLineSpan> lineSpans = new ArrayList<>();
         StringBuilder cellBuilder = new StringBuilder();
         boolean beforeFirst = true;
         int col = 0;
         int cellStart = 0;
         boolean escape = false;
-        PrimitiveIterator.OfInt iterator = lineText.codePoints().iterator();
+        PrimitiveIterator.OfInt iterator = text.codePoints().iterator();
         while (iterator.hasNext()) {
             int c = iterator.next();
             if (escape) {
@@ -112,10 +139,9 @@ public List<GherkinLineSpan> getTableCells() {
                         // Skip the first empty span
                         beforeFirst = false;
                     } else {
-                        String cell = cellBuilder.toString();
-                        String leftTrimmedCell = ltrimKeepNewLines(cell);
-                        int cellIndent = symbolCount(cell) - symbolCount(leftTrimmedCell);
-                        lineSpans.add(new GherkinLineSpan(cellStart + cellIndent + OFFSET, rtrimKeepNewLines(leftTrimmedCell)));
+                        Entry<String, Integer> trimmedCellIndent = trimAndIndentKeepNewLines(cellBuilder.toString());
+                        int column = indent + cellStart + trimmedCellIndent.getValue() + COLUMN_OFFSET;
+                        lineSpans.add(new GherkinLineSpan(column, trimmedCellIndent.getKey()));
                     }
                     cellBuilder = new StringBuilder();
                     cellStart = col + 1;
@@ -128,11 +154,11 @@ public List<GherkinLineSpan> getTableCells() {
         return lineSpans;
     }
 
-    public boolean startsWithTitleKeyword(String text) {
-        int textLength = text.length();
-        return trimmedLineText.length() > textLength &&
-                trimmedLineText.startsWith(text) &&
-                trimmedLineText.startsWith(GherkinLanguageConstants.TITLE_KEYWORD_SEPARATOR, textLength);
+    boolean startsWithTitleKeyword(String keyword) {
+        int keywordLength = keyword.length();
+        return text.length() > keywordLength &&
+                text.startsWith(keyword) &&
+                text.startsWith(TITLE_KEYWORD_SEPARATOR, keywordLength);
     }
 
 }
diff --git a/java/src/main/java/io/cucumber/gherkin/GherkinLineSpan.java b/java/src/main/java/io/cucumber/gherkin/GherkinLineSpan.java
@@ -1,13 +1,17 @@
 package io.cucumber.gherkin;
 
 class GherkinLineSpan {
-    // One-based line position
-    public final int column;
+    /**
+     * Index-1 based position in codepoints.
+      */ 
+    final int column;
 
-    // text part of the line
-    public final String text;
+    /**
+     * Text part of the line 
+     */
+    final String text;
 
-    public GherkinLineSpan(int column, String text) {
+    GherkinLineSpan(int column, String text) {
         this.column = column;
         this.text = text;
     }

diff --git a/java/src/main/java/io/cucumber/gherkin/Locations.java b/java/src/main/java/io/cucumber/gherkin/Locations.java
@@ -6,6 +6,11 @@
 
 class Locations {
 
+    /**
+     * Columns are index-1 based.
+     */
+    static final int COLUMN_OFFSET = 1;
+
     /**
      * Cache of Long objects for the range 0-4095. This is used
      * to avoid creating a huge amount of Long objects in getLocation().

diff --git a/java/src/main/java/io/cucumber/gherkin/ParserException.java b/java/src/main/java/io/cucumber/gherkin/ParserException.java
@@ -6,6 +6,7 @@
 
 import io.cucumber.messages.types.Location;
 
+import static io.cucumber.gherkin.Locations.COLUMN_OFFSET;
 import static io.cucumber.gherkin.Locations.atColumn;
 
 class ParserException extends RuntimeException {
@@ -58,13 +59,16 @@ static class UnexpectedTokenException extends ParserException {
         private static String getMessage(Token receivedToken, List<String> expectedTokenTypes) {
             return String.format("expected: %s, got '%s'",
                     String.join(", ", expectedTokenTypes),
-                    receivedToken.getTokenValue().trim());
+                    receivedToken.getTokenValue()
+            );
         }
 
         private static Location getLocation(Token receivedToken) {
-            return receivedToken.location.getColumn().isPresent()
-                    ? receivedToken.location
-                    : atColumn(receivedToken.location, receivedToken.line.indent() + 1);
+            if (receivedToken.location.getColumn().isPresent()) {
+                return receivedToken.location;
+            }
+            int column = COLUMN_OFFSET + receivedToken.line.getIndent();
+            return atColumn(receivedToken.location, column);
         }
     }
 

diff --git a/java/src/main/java/io/cucumber/gherkin/PickleCompiler.java b/java/src/main/java/io/cucumber/gherkin/PickleCompiler.java
@@ -27,7 +27,6 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.EnumMap;
 import java.util.List;
 import java.util.Map;