Skip to content

Commit c458c49

Browse files
committed
Fix a bug in the formatter that was removing newlines escapes
Fixes smithy-lang#2524
1 parent cf921a3 commit c458c49

8 files changed

Lines changed: 96 additions & 10 deletions

File tree

smithy-model/src/main/java/software/amazon/smithy/model/loader/DefaultTokenizer.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class DefaultTokenizer implements IdlTokenizer {
2020
private int currentTokenColumn = -1;
2121
private Number currentTokenNumber;
2222
private CharSequence currentTokenStringSlice;
23+
private CharSequence currentTextBlockContents;
2324
private String currentTokenError;
2425

2526
DefaultTokenizer(String filename, CharSequence model) {
@@ -96,6 +97,17 @@ public final CharSequence getCurrentTokenStringSlice() {
9697
}
9798
}
9899

100+
@Override
101+
public final CharSequence getCurrentTextBlockContents() {
102+
getCurrentToken();
103+
if (currentTextBlockContents != null) {
104+
return currentTextBlockContents;
105+
} else {
106+
throw syntax("The current token must be text block: "
107+
+ currentTokenType.getDebug(getCurrentTokenLexeme()), getCurrentTokenLocation());
108+
}
109+
}
110+
99111
@Override
100112
public final Number getCurrentTokenNumberValue() {
101113
getCurrentToken();
@@ -124,6 +136,7 @@ public final boolean hasNext() {
124136
@Override
125137
public IdlToken next() {
126138
currentTokenStringSlice = null;
139+
currentTextBlockContents = null;
127140
currentTokenNumber = null;
128141
currentTokenColumn = parser.column();
129142
currentTokenLine = parser.line();
@@ -362,7 +375,7 @@ private IdlToken parseString() {
362375

363376
try {
364377
// Parse the contents of a quoted string.
365-
currentTokenStringSlice = parseQuotedTextAndTextBlock(false);
378+
currentTokenStringSlice = parseQuotedTextAndTextBlock(null);
366379
currentTokenEnd = parser.position();
367380
return currentTokenType = IdlToken.STRING;
368381
} catch (RuntimeException e) {
@@ -374,7 +387,9 @@ private IdlToken parseString() {
374387

375388
private IdlToken parseTextBlock() {
376389
try {
377-
currentTokenStringSlice = parseQuotedTextAndTextBlock(true);
390+
StringBuilder builder = new StringBuilder();
391+
currentTextBlockContents = builder;
392+
currentTokenStringSlice = parseQuotedTextAndTextBlock(builder);
378393
currentTokenEnd = parser.position();
379394
return currentTokenType = IdlToken.TEXT_BLOCK;
380395
} catch (RuntimeException e) {
@@ -385,8 +400,9 @@ private IdlToken parseTextBlock() {
385400
}
386401

387402
// Parses both quoted_text and text_block
388-
private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
403+
private CharSequence parseQuotedTextAndTextBlock(StringBuilder textBlockContents) {
389404
int start = parser.position();
405+
boolean triple = textBlockContents != null;
390406

391407
while (!parser.eof()) {
392408
char next = parser.peek();
@@ -409,6 +425,6 @@ private CharSequence parseQuotedTextAndTextBlock(boolean triple) {
409425
parser.expect('"');
410426
}
411427

412-
return IdlStringLexer.scanStringContents(result, triple);
428+
return IdlStringLexer.scanStringContents(result, textBlockContents);
413429
}
414430
}

smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlStringLexer.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,16 @@ CharSequence getResult() {
4646
}
4747
}
4848

49-
static CharSequence scanStringContents(CharSequence lexeme, boolean scanningTextBlock) {
49+
static CharSequence scanStringContents(
50+
CharSequence lexeme,
51+
StringBuilder textBlockContents
52+
) {
53+
boolean scanningTextBlock = textBlockContents != null;
5054
lexeme = normalizeLineEndings(lexeme);
5155

5256
// Format the text block and remove incidental whitespace.
5357
if (scanningTextBlock) {
54-
lexeme = formatTextBlock(lexeme);
58+
lexeme = formatTextBlock(lexeme, textBlockContents);
5559
}
5660

5761
//StringBuilder result = new StringBuilder(lexeme.length());
@@ -176,14 +180,13 @@ private static boolean containsCarriageReturn(CharSequence lexeme) {
176180
return false;
177181
}
178182

179-
private static CharSequence formatTextBlock(CharSequence lexeme) {
183+
private static CharSequence formatTextBlock(CharSequence lexeme, StringBuilder buffer) {
180184
if (lexeme.length() == 0) {
181185
throw new RuntimeException("Text block is empty");
182186
} else if (lexeme.charAt(0) != '\n') {
183187
throw new RuntimeException("Text block must start with a new line");
184188
}
185189

186-
StringBuilder buffer = new StringBuilder();
187190
int longestPadding = Integer.MAX_VALUE;
188191
List<CharSequence> lines = lines(lexeme);
189192

smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,17 @@ default CharSequence getCurrentTokenLexeme() {
151151
*/
152152
CharSequence getCurrentTokenStringSlice();
153153

154+
/**
155+
* If the current token is a text block, get the formatted content as a CharSequence.
156+
* with any incidental leading whitespace already removed.
157+
*
158+
* @return Returns the parsed string content associated with the current token.
159+
* @throws ModelSyntaxException if the current token is not a text block.
160+
*/
161+
default CharSequence getCurrentTextBlockContents() {
162+
throw new UnsupportedOperationException();
163+
}
164+
154165
/**
155166
* If the current token is a number, get the associated parsed number.
156167
*

smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturedToken.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public final class CapturedToken implements FromSourceLocation, ToSmithyBuilder<
3333
private final int endColumn;
3434
private final CharSequence lexeme;
3535
private final String stringContents;
36+
private final String textBlockContents;
3637
private final String errorMessage;
3738
private final Number numberValue;
3839

@@ -46,6 +47,7 @@ private CapturedToken(
4647
int endColumn,
4748
CharSequence lexeme,
4849
String stringContents,
50+
String formattedTextBlockContents,
4951
Number numberValue,
5052
String errorMessage
5153
) {
@@ -64,6 +66,7 @@ private CapturedToken(
6466
} else {
6567
this.stringContents = stringContents;
6668
}
69+
this.textBlockContents = formattedTextBlockContents;
6770

6871
if (errorMessage == null && token == IdlToken.ERROR) {
6972
this.errorMessage = "";
@@ -92,6 +95,7 @@ public static final class Builder implements SmithyBuilder<CapturedToken> {
9295
private int endColumn;
9396
private CharSequence lexeme;
9497
private String stringContents;
98+
private String formattedTextBlockContents;
9599
private String errorMessage;
96100
private Number numberValue;
97101

@@ -109,6 +113,7 @@ public CapturedToken build() {
109113
endColumn,
110114
lexeme,
111115
stringContents,
116+
formattedTextBlockContents,
112117
numberValue,
113118
errorMessage);
114119
}
@@ -158,6 +163,11 @@ public Builder stringContents(String stringContents) {
158163
return this;
159164
}
160165

166+
public Builder textBlockContents(String formattedTextBlockContents) {
167+
this.formattedTextBlockContents = formattedTextBlockContents;
168+
return this;
169+
}
170+
161171
public Builder errorMessage(String errorMessage) {
162172
this.errorMessage = errorMessage;
163173
return this;
@@ -200,6 +210,9 @@ public static CapturedToken from(IdlTokenizer tokenizer, Function<CharSequence,
200210
.stringContents(tok == IdlToken.STRING || tok == IdlToken.TEXT_BLOCK || tok == IdlToken.IDENTIFIER
201211
? stringTable.apply(tokenizer.getCurrentTokenStringSlice())
202212
: null)
213+
.textBlockContents(tok == IdlToken.TEXT_BLOCK
214+
? stringTable.apply(tokenizer.getCurrentTextBlockContents())
215+
: null)
203216
.numberValue(tok == IdlToken.NUMBER ? tokenizer.getCurrentTokenNumberValue() : null)
204217
.errorMessage(tok == IdlToken.ERROR ? tokenizer.getCurrentTokenError() : null)
205218
.build();
@@ -218,7 +231,8 @@ public Builder toBuilder() {
218231
.lexeme(lexeme)
219232
.errorMessage(errorMessage)
220233
.numberValue(numberValue)
221-
.stringContents(stringContents);
234+
.stringContents(stringContents)
235+
.textBlockContents(textBlockContents);
222236
}
223237

224238
/**
@@ -277,10 +291,21 @@ public CharSequence getLexeme() {
277291
*
278292
* @return Returns the string contents of the lexeme, or null if not a string|text block|identifier.
279293
*/
294+
// Can we capture the formatted block as well?
280295
public String getStringContents() {
281296
return stringContents;
282297
}
283298

299+
/**
300+
* Get the String contents of a TEXT_BLOCK token already with any incidental leading whitespace
301+
* already removed.
302+
*
303+
* @return Returns the string contents of a TEXT_BLOCK lexeme, or null if not a text block.
304+
*/
305+
public String getTextBlockContents() {
306+
return textBlockContents;
307+
}
308+
284309
/**
285310
* Gets the associated error message with the token if it's an error.
286311
*

smithy-syntax/src/main/java/software/amazon/smithy/syntax/CapturingTokenizer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ public int getCurrentTokenEnd() {
9696
return getPosition();
9797
}
9898

99+
@Override
100+
public CharSequence getCurrentTextBlockContents() {
101+
return getToken().getTextBlockContents();
102+
}
103+
99104
@Override
100105
public CharSequence getCurrentTokenStringSlice() {
101106
return getToken().getStringContents();

smithy-syntax/src/main/java/software/amazon/smithy/syntax/FormatVisitor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ Doc visit(TreeCursor cursor) {
462462
.tokens()
463463
.findFirst()
464464
.orElseThrow(() -> new RuntimeException("TEXT_BLOCK cursor does not have an IDL token"))
465-
.getStringContents();
465+
.getTextBlockContents();
466466

467467
// If the last character is a newline, then the closing triple quote must be on the next line.
468468
boolean endQuoteOnNextLine = stringValue.endsWith("\n") || stringValue.endsWith("\r");

smithy-syntax/src/test/resources/software/amazon/smithy/syntax/formatter/trait-textblock.formatted.smithy

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,16 @@ string ExtraTrailingNewlines
8686
"""
8787
)
8888
string EmptyTextBlock
89+
90+
// Ensure that the escaped new lines are preserved and the contents are aligned with the opening quote.
91+
@pattern(
92+
"""
93+
^[a-z\
94+
A-Z]+\
95+
[a-z\
96+
A-Z\
97+
0-9]+\
98+
$
99+
"""
100+
)
101+
string identifier

smithy-syntax/src/test/resources/software/amazon/smithy/syntax/formatter/trait-textblock.smithy

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,16 @@ string ExtraTrailingNewlines
7676
@documentation("""
7777
""")
7878
string EmptyTextBlock
79+
80+
// Ensure that the escaped new lines are preserved and the contents are aligned with the opening quote.
81+
@pattern(
82+
"""
83+
^[a-z\
84+
A-Z]+\
85+
[a-z\
86+
A-Z\
87+
0-9]+\
88+
$
89+
"""
90+
)
91+
string identifier

0 commit comments

Comments
 (0)