Skip to content

Commit 37cdf9c

Browse files
authored
[RTG][circt-tblgen] Improve whitespace handling (#9887)
1 parent fa297ef commit 37cdf9c

File tree

4 files changed

+94
-18
lines changed

4 files changed

+94
-18
lines changed

test/Tools/circt-tblgen/rtg-instruction-methods-errors.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def ErrorMissingSpaceOp : TestOp<"error_missing_space"> {
209209
let arguments = (ins TestRegType:$reg1, TestRegType:$reg2);
210210
let isaAssemblyFormat = "$reg1$reg2";
211211
}
212-
// MISSING_SPACE: [[@LINE-2]]:33: error: expected ' '
212+
// MISSING_SPACE: [[@LINE-2]]:33: error: expected whitespace
213213
#endif
214214

215215
// RUN: not circt-tblgen -gen-rtg-instruction-methods -I %S/../../../include -I %S/../../../llvm/mlir/include %s -DTEST_EMPTY_OPERAND_NAME 2>&1 | FileCheck %s --check-prefix=EMPTY_OPERAND_NAME

test/Tools/circt-tblgen/rtg-instruction-methods.td

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,65 @@ def Test22_AnyImmediateOp : TestOp<"any_immediate"> {
354354
// CHECK: ::llvm::SmallVector<char> strBuf;
355355
// CHECK-NEXT: ::llvm::cast<::circt::rtg::ImmediateAttr>(adaptor.getImm()).getValue().toStringSigned(strBuf);
356356
// CHECK-NEXT: os << strBuf;
357+
358+
def Test23_MultipleSpacesOp : TestOp<"multiple_spaces"> {
359+
let isaAssemblyFormat = "` ` \t `, `";
360+
}
361+
// CHECK-LABEL: void MultipleSpacesOp::printInstructionAssembly
362+
// CHECK-NEXT: os << " ";
363+
// CHECK-NEXT: os << ", ";
364+
365+
def Test24_MultiLineFormatOp : TestOp<"multi_line_format"> {
366+
let isaBinaryFormat = [{
367+
0b001
368+
0b010
369+
0b011
370+
}];
371+
let isaAssemblyFormat = "`a`\n`b`\n";
372+
}
373+
// CHECK-LABEL: void MultiLineFormatOp::printInstructionAssembly
374+
// CHECK-NEXT: os << "a";
375+
// CHECK-NEXT: os << "b";
376+
377+
// CHECK-LABEL: void MultiLineFormatOp::printInstructionBinary
378+
// CHECK: binary = binary.concat(llvm::APInt(3, 1));
379+
// CHECK-NEXT: binary = binary.concat(llvm::APInt(3, 2));
380+
// CHECK-NEXT: binary = binary.concat(llvm::APInt(3, 3));
381+
382+
// Test: Binary literal followed by tab
383+
def Test25_BinaryLiteralTabOp : TestOp<"binary_literal_tab"> {
384+
let isaBinaryFormat = "0b111 0b000";
385+
let isaAssemblyFormat = "` `";
386+
}
387+
// CHECK-LABEL: void BinaryLiteralTabOp::printInstructionBinary
388+
// CHECK: binary = binary.concat(llvm::APInt(3, 7));
389+
// CHECK-NEXT: binary = binary.concat(llvm::APInt(3, 0));
390+
391+
def Test26_IfThenElseNewlinesOp : TestOp<"if_then_else_newlines"> {
392+
let arguments = (ins ImmediateOfWidth<1>:$cond);
393+
let isaAssemblyFormat = "if\n$cond\nthen\n(`a`\n`b`)\nelse\n( `c` )";
394+
}
395+
// CHECK-LABEL: void IfThenElseNewlinesOp::printInstructionAssembly
396+
// CHECK: ::llvm::APInt cond = ::llvm::cast<::circt::rtg::ImmediateAttr>(adaptor.getCond()).getValue();
397+
// CHECK: if (cond.isOne()) {
398+
// CHECK-NEXT: os << "a";
399+
// CHECK-NEXT: os << "b";
400+
// CHECK-NEXT: } else {
401+
// CHECK-NEXT: os << "c";
402+
403+
def Test27_CustomDirectiveNewlinesOp : TestOp<"custom_directive_newlines"> {
404+
let arguments = (ins Arg<TestRegType, "", [SourceReg]>:$rs1,
405+
Arg<TestRegType, "", [SourceReg]>:$rs2);
406+
let isaAssemblyFormat = "custom<myFunc>($rs1\n$rs2)";
407+
}
408+
// CHECK-LABEL: void CustomDirectiveNewlinesOp::printInstructionAssembly
409+
// CHECK: myFunc(adaptor.getRs1(), adaptor.getRs2(), os);
410+
411+
def Test28_SignednessNewlineOp : TestOp<"signedness_newline"> {
412+
let arguments = (ins ImmediateOfWidth<12>:$imm);
413+
let isaAssemblyFormat = "` `\nsigned($imm)";
414+
}
415+
// CHECK-LABEL: void SignednessNewlineOp::printInstructionAssembly
416+
// CHECK-NEXT: os << " ";
417+
// CHECK: ::llvm::SmallVector<char> strBuf;
418+
// CHECK-NEXT: ::llvm::cast<::circt::rtg::ImmediateAttr>(adaptor.getImm()).getValue().toStringSigned(strBuf);

tools/circt-tblgen/RTGInstructionFormat.cpp

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,33 @@ SMLoc FormatParser::getLoc() {
4040
return SMLoc::getFromPointer(loc.getPointer() + pos);
4141
}
4242

43+
bool FormatParser::isWhitespace(char c) {
44+
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
45+
}
46+
4347
void FormatParser::skipWhitespace() {
44-
while (pos < input.size() && (input[pos] == ' ' || input[pos] == '\t'))
48+
while (pos < input.size() && isWhitespace(input[pos]))
4549
++pos;
4650
}
4751

52+
void FormatParser::consumeAtLeastOneWhitespace() {
53+
if (pos >= input.size() || !isWhitespace(input[pos]))
54+
PrintFatalError(getLoc(), "expected whitespace");
55+
skipWhitespace();
56+
}
57+
4858
void FormatParser::consume(StringRef str, bool skipWhitespaceFirst) {
4959
auto loc = getLoc();
5060
if (!tryConsume(str, skipWhitespaceFirst))
5161
PrintFatalError(loc, "expected '" + str + "'");
5262
}
5363

5464
bool FormatParser::tryConsume(StringRef str, bool skipWhitespaceFirst) {
55-
auto savedPos = pos;
65+
assert(!str.empty() && "cannot consume empty string");
66+
assert(llvm::none_of(str, [&](char c) { return isWhitespace(c); }) &&
67+
"string to consume must not be whitespace");
5668

57-
// In case the user tries to consume whitespace.
58-
if (input.substr(pos).starts_with(str)) {
59-
pos += str.size();
60-
return true;
61-
}
69+
auto savedPos = pos;
6270

6371
if (skipWhitespaceFirst)
6472
skipWhitespace();
@@ -160,7 +168,7 @@ FormatNode *FormatParser::parseOptionalBinaryLiteral() {
160168
return nullptr;
161169

162170
// Check for invalid characters after the binary digits
163-
if (!isAtScopeEnd() && input[pos] != ' ')
171+
if (!isAtScopeEnd() && !isWhitespace(input[pos]))
164172
PrintFatalError(getLoc(), "invalid character ('" +
165173
input.substr(pos, pos + 1) +
166174
"') after binary literal");
@@ -194,10 +202,10 @@ FormatNode *FormatParser::parseOptionalSignednessSpecifier() {
194202
bool isSigned = false;
195203
StringRef specifierName;
196204

197-
if (tryConsume("signed(", false)) {
205+
if (tryConsume("signed(")) {
198206
isSigned = true;
199207
specifierName = "'signed'";
200-
} else if (tryConsume("unsigned(", false)) {
208+
} else if (tryConsume("unsigned(")) {
201209
isSigned = false;
202210
specifierName = "'unsigned'";
203211
} else {
@@ -254,10 +262,9 @@ FormatNode *FormatParser::parseNextNode() {
254262
"unexpected character '" +
255263
Twine(input[std::min(pos, input.size() - 1)]) + "'");
256264

257-
// Elements in the format are separated by a space.
265+
// Elements in the format are separated by whitespace.
258266
if (!isAtScopeEnd())
259-
consume(" ");
260-
skipWhitespace();
267+
consumeAtLeastOneWhitespace();
261268

262269
return node;
263270
}
@@ -267,7 +274,7 @@ FormatNode *FormatParser::parseOptionalIfThenElse() {
267274
if (!tryConsume("if"))
268275
return nullptr;
269276

270-
consume(" ");
277+
consumeAtLeastOneWhitespace();
271278

272279
// Parse the condition operand (must be a 1-bit immediate)
273280
FormatNode *condNode = parseOptionalOperand();
@@ -276,7 +283,7 @@ FormatNode *FormatParser::parseOptionalIfThenElse() {
276283

277284
auto *condition = cast<OperandNode>(condNode);
278285

279-
consume(" ");
286+
consumeAtLeastOneWhitespace();
280287

281288
SmallVector<FormatNode *> thenBranch;
282289
bool hasThen = tryConsume("then");
@@ -328,9 +335,10 @@ FormatNode *FormatParser::parseOptionalCustomDirective() {
328335
auto *operandNode = cast<OperandNode>(argNode);
329336
arguments.push_back(operandNode);
330337

331-
// Arguments are separated by spaces, but the last one is followed by ')'
338+
// Arguments are separated by whitespace, but the last one is followed by
339+
// ')'
332340
if (!tryConsume(")")) {
333-
consume(" ");
341+
consumeAtLeastOneWhitespace();
334342
continue;
335343
}
336344
break;

tools/circt-tblgen/RTGInstructionFormat.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ struct FormatParser {
428428
/// \return True if the string was consumed, false otherwise
429429
bool tryConsume(StringRef str, bool skipWhitespaceFirst = true);
430430

431+
/// Consume all but at least one whitespace character at the current position.
432+
void consumeAtLeastOneWhitespace();
433+
431434
/// Try to parse an unsigned integer literal at the current position.
432435
/// \return True if an unsigned integer was parsed, false otherwise
433436
bool tryParseUIntLiteral(uint64_t &value);
@@ -446,6 +449,9 @@ struct FormatParser {
446449
/// Check if the current position is at the end of a scope.
447450
bool isAtScopeEnd();
448451

452+
/// Check if a character is whitespace.
453+
bool isWhitespace(char c);
454+
449455
/// Get the current source location in the input string.
450456
/// \return The current source location for error reporting
451457
llvm::SMLoc getLoc();

0 commit comments

Comments
 (0)