Skip to content

Commit 52247de

Browse files
committed
properly handle escape sequences in string literals
1 parent 67680e4 commit 52247de

File tree

6 files changed

+1945
-1596
lines changed

6 files changed

+1945
-1596
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ To run tests simply run `nix-shell --run 'tree-sitter test'`.
1414

1515
* use [Unicode® Standard Annex #31](https://www.unicode.org/reports/tr31/) (augmented with '-')for identifiers
1616
* add [operations](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#operations)
17-
* add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions) and express string literals using them
17+
* add [template expressions](https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#template-expressions)

grammar.js

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,24 @@ module.exports = grammar({
7676

7777
numeric_lit: $ => /[0-9]+(\.[0-9]+([eE][-+]?[0-9]+)?)?/,
7878

79-
string_lit: $ => (seq('"', token.immediate(repeat(choice(/[^\\"\n]/, /\\(.|\n)/))), '"')),
79+
string_lit: $ => seq(
80+
'"',
81+
repeat(choice(token.immediate(prec(1, /[^\\"\n\r\t]+/)), $.escape_sequence)),
82+
'"',
83+
),
84+
85+
escape_sequence: $ => token.immediate(seq(
86+
'\\',
87+
choice(
88+
'\\',
89+
'"',
90+
'n',
91+
'r',
92+
't',
93+
/u[0-9a-fA-F]{4}/,
94+
/U[0-9a-fA-F]{8}/
95+
)
96+
)),
8097

8198
bool_lit: $ => choice('true', 'false'),
8299

src/grammar.json

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,22 +255,26 @@
255255
"value": "\""
256256
},
257257
{
258-
"type": "IMMEDIATE_TOKEN",
258+
"type": "REPEAT",
259259
"content": {
260-
"type": "REPEAT",
261-
"content": {
262-
"type": "CHOICE",
263-
"members": [
264-
{
265-
"type": "PATTERN",
266-
"value": "[^\\\\\"\\n]"
267-
},
268-
{
269-
"type": "PATTERN",
270-
"value": "\\\\(.|\\n)"
260+
"type": "CHOICE",
261+
"members": [
262+
{
263+
"type": "IMMEDIATE_TOKEN",
264+
"content": {
265+
"type": "PREC",
266+
"value": 1,
267+
"content": {
268+
"type": "PATTERN",
269+
"value": "[^\\\\\"\\n\\r\\t]+"
270+
}
271271
}
272-
]
273-
}
272+
},
273+
{
274+
"type": "SYMBOL",
275+
"name": "escape_sequence"
276+
}
277+
]
274278
}
275279
},
276280
{
@@ -279,6 +283,51 @@
279283
}
280284
]
281285
},
286+
"escape_sequence": {
287+
"type": "IMMEDIATE_TOKEN",
288+
"content": {
289+
"type": "SEQ",
290+
"members": [
291+
{
292+
"type": "STRING",
293+
"value": "\\"
294+
},
295+
{
296+
"type": "CHOICE",
297+
"members": [
298+
{
299+
"type": "STRING",
300+
"value": "\\"
301+
},
302+
{
303+
"type": "STRING",
304+
"value": "\""
305+
},
306+
{
307+
"type": "STRING",
308+
"value": "n"
309+
},
310+
{
311+
"type": "STRING",
312+
"value": "r"
313+
},
314+
{
315+
"type": "STRING",
316+
"value": "t"
317+
},
318+
{
319+
"type": "PATTERN",
320+
"value": "u[0-9a-fA-F]{4}"
321+
},
322+
{
323+
"type": "PATTERN",
324+
"value": "U[0-9a-fA-F]{8}"
325+
}
326+
]
327+
}
328+
]
329+
}
330+
},
282331
"bool_lit": {
283332
"type": "CHOICE",
284333
"members": [

src/node-types.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,17 @@
497497
{
498498
"type": "string_lit",
499499
"named": true,
500-
"fields": {}
500+
"fields": {},
501+
"children": {
502+
"multiple": true,
503+
"required": false,
504+
"types": [
505+
{
506+
"type": "escape_sequence",
507+
"named": true
508+
}
509+
]
510+
}
501511
},
502512
{
503513
"type": "tuple",
@@ -585,6 +595,10 @@
585595
"type": "ellipsis",
586596
"named": true
587597
},
598+
{
599+
"type": "escape_sequence",
600+
"named": true
601+
},
588602
{
589603
"type": "false",
590604
"named": false

0 commit comments

Comments
 (0)