Skip to content

Commit d3c4c69

Browse files
committed
Simplify #re to pass through literally with newline stripping
Remove escape processing from scanRegexContents. All characters now pass through as-is except newlines which are stripped, allowing text block patterns to be split across lines without trailing backslashes. Update docs to clearly document the behavior: no escape evaluation, newlines stripped, use \n for matching newlines in regex.
1 parent cf7ede2 commit d3c4c69

8 files changed

Lines changed: 56 additions & 46 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"root":["./src/app.stories.tsx","./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/landing-page/serviceexample.tsx","./src/components/landing-page/index.tsx","./src/components/landing-page/usesupportedlanguageshook.tsx","./src/components/landing-page/features/index.stories.tsx","./src/components/landing-page/features/index.tsx","./src/components/landing-page/footer/index.stories.tsx","./src/components/landing-page/footer/index.tsx","./src/components/landing-page/heading/diagram.tsx","./src/components/landing-page/heading/tagline.tsx","./src/components/landing-page/heading/index.tsx","./src/components/landing-page/informationcircles/index.stories.tsx","./src/components/landing-page/informationcircles/index.tsx","./src/components/landing-page/quote/index.stories.tsx","./src/components/landing-page/quote/index.tsx","./src/components/landing-page/subheading/index.stories.tsx","./src/components/landing-page/subheading/index.tsx","./src/components/landing-page/subheading/web.stories.tsx","./src/components/landing-page/subheading/web.tsx","./src/components/navigation/hamburgermenu.tsx","./src/components/navigation/menulinks.tsx","./src/components/navigation/index.tsx","./src/components/navigation/navigation.stories.tsx","./src/components/svg/arrow.stories.tsx","./src/components/svg/arrow.tsx","./src/components/svg/line.stories.tsx","./src/components/svg/line.tsx","./src/components/svg/spider.stories.tsx","./src/components/svg/spider.tsx","./src/components/svg/svg.tsx","./src/components/svg/useresizeobserver.tsx","./src/components/svg/wheel.stories.tsx","./src/components/svg/wheel.tsx","./src/components/ui/circlesection.stories.tsx","./src/components/ui/circlesection.tsx","./src/components/ui/section.stories.tsx","./src/components/ui/section.tsx","./src/components/ui/smithyglow.stories.tsx","./src/components/ui/smithyglow.tsx","./src/components/ui/smithypopgradient.stories.tsx","./src/components/ui/smithypopgradient.tsx","./src/components/ui/brand.stories.tsx","./src/components/ui/brand.tsx","./src/components/ui/button.stories.tsx","./src/components/ui/button.tsx","./src/components/ui/card.stories.tsx","./src/components/ui/card.tsx","./src/components/ui/icons.stories.tsx","./src/components/ui/icons.tsx","./src/components/ui/ide-panel.stories.tsx","./src/components/ui/ide-panel.tsx","./src/components/ui/navigation-menu.tsx","./src/components/ui/sheet.tsx","./src/i18n/client.tsx","./src/i18n/index.tsx","./src/i18n/ssr.tsx","./src/lib/utils.ts"],"version":"6.0.2"}
1+
{"root":["./src/app.stories.tsx","./src/app.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/landing-page/serviceexample.tsx","./src/components/landing-page/index.tsx","./src/components/landing-page/usesupportedlanguageshook.tsx","./src/components/landing-page/features/index.stories.tsx","./src/components/landing-page/features/index.tsx","./src/components/landing-page/footer/index.stories.tsx","./src/components/landing-page/footer/index.tsx","./src/components/landing-page/heading/diagram.tsx","./src/components/landing-page/heading/tagline.tsx","./src/components/landing-page/heading/index.tsx","./src/components/landing-page/informationcircles/index.stories.tsx","./src/components/landing-page/informationcircles/index.tsx","./src/components/landing-page/quote/index.stories.tsx","./src/components/landing-page/quote/index.tsx","./src/components/landing-page/subheading/index.stories.tsx","./src/components/landing-page/subheading/index.tsx","./src/components/landing-page/subheading/web.stories.tsx","./src/components/landing-page/subheading/web.tsx","./src/components/navigation/hamburgermenu.tsx","./src/components/navigation/menulinks.tsx","./src/components/navigation/index.tsx","./src/components/navigation/navigation.stories.tsx","./src/components/svg/arrow.stories.tsx","./src/components/svg/arrow.tsx","./src/components/svg/line.stories.tsx","./src/components/svg/line.tsx","./src/components/svg/spider.stories.tsx","./src/components/svg/spider.tsx","./src/components/svg/svg.tsx","./src/components/svg/useresizeobserver.tsx","./src/components/svg/wheel.stories.tsx","./src/components/svg/wheel.tsx","./src/components/ui/circlesection.stories.tsx","./src/components/ui/circlesection.tsx","./src/components/ui/section.stories.tsx","./src/components/ui/section.tsx","./src/components/ui/smithyglow.stories.tsx","./src/components/ui/smithyglow.tsx","./src/components/ui/smithypopgradient.stories.tsx","./src/components/ui/smithypopgradient.tsx","./src/components/ui/brand.stories.tsx","./src/components/ui/brand.tsx","./src/components/ui/button.stories.tsx","./src/components/ui/button.tsx","./src/components/ui/card.stories.tsx","./src/components/ui/card.tsx","./src/components/ui/icons.stories.tsx","./src/components/ui/icons.tsx","./src/components/ui/ide-panel.stories.tsx","./src/components/ui/ide-panel.tsx","./src/components/ui/navigation-menu.tsx","./src/components/ui/sheet.tsx","./src/i18n/client.tsx","./src/i18n/index.tsx","./src/i18n/ssr.tsx","./src/lib/utils.ts"],"version":"6.0.3"}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"root":["./vite.config.ts"],"version":"6.0.2"}
1+
{"root":["./vite.config.ts"],"version":"6.0.3"}

docs/source-2.0/spec/idl.rst

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,8 +2575,9 @@ Smithy defines the following built-in tags:
25752575
* - Tag
25762576
- Description
25772577
* - ``#re``
2578-
- Regular expression literal. Backslash sequences are passed through
2579-
literally, allowing regex patterns to be written without double-escaping.
2578+
- Regular expression literal. All characters are passed through literally
2579+
with no escape evaluation. Newlines are stripped, allowing patterns to
2580+
span multiple lines in text blocks.
25802581
* - ``#b``
25812582
- Binary literal. Interprets the string as a sequence of bytes using
25822583
hex escapes (``\xHH``), octal escapes (``\OOO``), and named escapes.
@@ -2598,11 +2599,11 @@ tagged string literals require version 2.1 or later.
25982599
``#re`` tag
25992600
-----------
26002601

2601-
The ``#re`` tag treats backslash sequences as literal characters rather than
2602-
escape sequences. This allows regular expression patterns to be written
2603-
naturally without double-escaping. Only ``\"`` (escaped quote) and ``\\``
2604-
(escaped backslash) are interpreted; all other ``\X`` sequences are passed
2605-
through as two literal characters.
2602+
The ``#re`` tag passes all characters through literally without any escape
2603+
evaluation. This allows regular expression patterns to be written naturally
2604+
without double-escaping. Newlines are stripped from the content, so patterns
2605+
in text blocks are concatenated across lines. To match a newline character in
2606+
the regex, use ``\n``.
26062607

26072608
.. code-block:: smithy
26082609
@@ -2616,17 +2617,31 @@ The above is equivalent to:
26162617
@pattern("^\\d{5}(-\\d{4})?$")
26172618
string ZipCode
26182619
2619-
Text blocks can be used with ``#re`` for multiline patterns. Escaped newlines
2620-
(a backslash immediately before a newline) are removed from the output,
2621-
allowing long patterns to be split across lines:
2620+
.. rubric:: Escaping in ``#re``
2621+
2622+
Because the content is passed through literally, the only special handling is
2623+
at the tokenizer level: a ``\"`` sequence prevents the string from being
2624+
terminated but is included in the output as-is. In regular expressions,
2625+
``\"`` is equivalent to ``"`` so this does not change the pattern's meaning.
2626+
In text blocks, quotes do not need escaping.
2627+
2628+
.. rubric:: Text blocks
2629+
2630+
In text blocks, newlines are stripped and lines are concatenated. This lets
2631+
you split long patterns across multiple lines without any special syntax:
26222632

26232633
.. code-block:: smithy
26242634
26252635
@pattern(#re """
2626-
^\d{5}\
2627-
(-\d{4})?$
2636+
[A-Z]:\\\\
2637+
([\w\s\d]+\\)*
2638+
([\w\s\d]+(\.[\w\d]+)?)?
26282639
""")
2629-
string ZipCode
2640+
string WindowsPath
2641+
2642+
The above produces the pattern ``[A-Z]:\\\\([\w\s\d]+\\)*([\w\s\d]+(\.[\w\d]+)?)?``
2643+
with all lines joined together. If you need a literal newline in your regex,
2644+
use ``\n``.
26302645

26312646

26322647
``#b`` tag

smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ private IdlToken parseRawString() {
426426
}
427427

428428
/**
429-
* Reads raw content between quotes without applying escape processing.
429+
* Reads raw content between quotes without evaluating escape sequences.
430430
*/
431431
private CharSequence parseRawStringContents(boolean triple) {
432432
int start = parser.position();

smithy-model/src/main/java/software/amazon/smithy/model/loader/TaggedStringLiteral.java

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,32 +64,14 @@ static Result scan(String tag, CharSequence lexeme, boolean isTextBlock) {
6464
}
6565

6666
/**
67-
* Scans regex string contents. {@code \"} produces a literal double quote,
68-
* {@code \\} produces a literal backslash, and all other backslash sequences
69-
* are passed through as two literal characters.
67+
* Scans regex string contents. All characters are passed through literally
68+
* except newlines which are stripped, allowing multiline patterns in text blocks.
7069
*/
7170
private static Result scanRegexContents(CharSequence lexeme) {
7271
StringBuilder result = new StringBuilder(lexeme.length());
7372
for (int i = 0; i < lexeme.length(); i++) {
7473
char c = lexeme.charAt(i);
75-
if (c == '\\' && i + 1 < lexeme.length()) {
76-
char next = lexeme.charAt(i + 1);
77-
if (next == '"') {
78-
result.append('"');
79-
i++;
80-
} else if (next == '\\') {
81-
result.append('\\');
82-
i++;
83-
} else if (next == '\n') {
84-
// Escaped newline: skip both backslash and newline.
85-
i++;
86-
} else {
87-
// Pass through literally as two characters.
88-
result.append(c);
89-
result.append(next);
90-
i++;
91-
}
92-
} else {
74+
if (c != '\n') {
9375
result.append(c);
9476
}
9577
}

smithy-model/src/test/java/software/amazon/smithy/model/loader/TaggedStringLiteralTest.java

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ public static Stream<Arguments> reTagTests() {
2727
return Stream.of(
2828
Arguments.of("^\\d{5}$", "^\\d{5}$"),
2929
Arguments.of("\\w+\\s\\d", "\\w+\\s\\d"),
30-
Arguments.of("\\\\", "\\"),
31-
Arguments.of("\\\"", "\""),
32-
Arguments.of("a\\\"b", "a\"b"),
30+
// Backslashes pass through literally (no escape processing)
31+
Arguments.of("\\\\", "\\\\"),
32+
// Escaped quote passes through literally (tokenizer handles termination)
33+
Arguments.of("\\\"", "\\\""),
34+
Arguments.of("a\\\"b", "a\\\"b"),
3335
Arguments.of("hello", "hello"),
3436
Arguments.of("", ""),
3537
Arguments.of("[a-z]+\\.(\\d{1,3}\\.){3}\\d{1,3}", "[a-z]+\\.(\\d{1,3}\\.){3}\\d{1,3}"));
@@ -44,17 +46,28 @@ public void parsesReTaggedStrings(String input, String expected) {
4446
}
4547

4648
@Test
47-
public void regexEscapedNewlineIsRemoved() {
48-
TaggedStringLiteral.Result result = TaggedStringLiteral.scan("re", "abc\\\ndef", false);
49+
public void regexNewlinesAreStripped() {
50+
// Newlines are stripped automatically (no backslash needed)
51+
TaggedStringLiteral.Result result = TaggedStringLiteral.scan("re", "abc\ndef", false);
4952
assertThat(result.stringValue.toString(), equalTo("abcdef"));
5053
}
5154

5255
@Test
53-
public void regexTextBlock() {
56+
public void regexTextBlockStripsNewlinesAndConcatenatesLines() {
5457
// Text block raw content starts with \n (as it comes from the tokenizer).
58+
// After text block normalization, lines are concatenated by stripping newlines.
5559
TaggedStringLiteral.Result result = TaggedStringLiteral.scan("re", "\n ^\\d{5}$\n ", true);
5660
assertThat(result.token, is(IdlToken.STRING));
57-
assertThat(result.stringValue.toString(), equalTo("^\\d{5}$\n"));
61+
assertThat(result.stringValue.toString(), equalTo("^\\d{5}$"));
62+
}
63+
64+
@Test
65+
public void regexTextBlockMultilinePattern() {
66+
// Multiline pattern: lines are joined without needing trailing backslash
67+
TaggedStringLiteral.Result result = TaggedStringLiteral.scan("re",
68+
"\n [A-Z]:\\\\\\\\\n ([\\w\\s\\d]+\\\\)*\n ", true);
69+
assertThat(result.token, is(IdlToken.STRING));
70+
assertThat(result.stringValue.toString(), equalTo("[A-Z]:\\\\\\\\([\\w\\s\\d]+\\\\)*"));
5871
}
5972

6073
// --- #b tag tests ---

smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/2.1/tagged-literals-textblock.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"smithy.example#ZipCode": {
55
"type": "string",
66
"traits": {
7-
"smithy.api#pattern": "^\\d{5}(-\\d{4})?$\n"
7+
"smithy.api#pattern": "^\\d{5}(-\\d{4})?$"
88
}
99
},
1010
"smithy.example#CborExample": {

smithy-model/src/test/resources/software/amazon/smithy/model/loader/valid/2.1/tagged-literals.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"smithy.example#QuoteInRegex": {
1717
"type": "string",
1818
"traits": {
19-
"smithy.api#pattern": "a\"b"
19+
"smithy.api#pattern": "a\\\"b"
2020
}
2121
},
2222
"smithy.example#TaggedLiteralsExample": {

0 commit comments

Comments
 (0)