-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathgrammar.js
More file actions
172 lines (149 loc) · 4.67 KB
/
grammar.js
File metadata and controls
172 lines (149 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
// Note: if you ever need to explicitly match line breaks, those are system specific.
// use a regex like this \r\n?|\n to match on windows too.
function commaSep1(rule) {
return sep1(rule, ',');
}
function sep1(rule, separator) {
return seq(rule, repeat(seq(separator, rule)))
}
module.exports = grammar ({
name: 'jinja2',
rules: {
source_file: $ => repeat(
choice(
$._jinja_value,
$.jinja_expression,
$._jinja_comment,
$._text
)
),
_jinja_value: $ => seq(
'{{',
$._expr,
'}}'
),
// This is awkward regex because we aren't parsing anything
// in between the expression markers like _jinja_value does
jinja_expression: $ => seq(
'{%',
new RegExp(
'(' + // capture group
'[^%]' + // any character that isn't a `%`
'|' + // or
'%[^}]' + // a `%` followed by any character that isn't `}`
')*' + // zero or more of the previous capture group
'%}' // followed by a `%` then a `}`
)
),
// comment regex is special because a comment can end
// with #} ##} #######} etc.
_jinja_comment: $ => seq(
'{#', // comments start with `{#`
new RegExp(
'(' + // capture group
'(' + // capture group
'[^#]' + // any character that isn't `#`
'|' + // or
'#[^}]' + // a `#` character followed by another character that isn't `}`
')*' + // zero or more of the previous capture group
')#+}' // followed by at least one `#` and a `}`
)
),
_expr: $ => choice(
$.fn_call,
$.list,
$.dict,
$.lit_string,
$.bool,
$.integer,
$.float,
),
fn_call: $ => seq(
field('fn_name', $.identifier),
field('argument_list', $.argument_list)
),
argument_list: $ => seq(
'(',
optional(commaSep1(
choice(
$._expr,
$.kwarg
)
)),
optional(','),
')'
),
lit_string: $ => choice(
seq(
"'", // single quote string start
/([^']|\\')*/, // either not a `'` or a `\` followed by a `'` zero or more times
"'", // single quote string start
),
seq(
'"', // double quote string start
/([^"]|\\")*/, // either not a `"` or a `\` followed by a `"` zero or more times
'"', // double quote string start
)
),
bool: $ => choice(
'True',
'False'
),
list: $ => seq(
'[',
optional(commaSep1($._expr)),
optional(','),
']'
),
dict: $ => seq(
'{',
optional(commaSep1($.pair)),
optional(','),
'}'
),
pair: $ => seq(
field('key', $.lit_string),
':',
field('value', $._expr)
),
identifier: $ => $._identifier,
// This regex is fine until we allow user-named variables and functions.
// Once we do that we may want to allow Unicode identifiers like python does: /[_\p{XID_Start}][_\p{XID_Continue}]*/
_identifier: $ => token(new RegExp(
'[a-zA-Z_]' + // starts with a lower or upper letter or an underscore
'[a-zA-Z0-9_]*' // all following characters must be a lower or upper letter, digit, or underscore.
)),
kwarg: $ => seq(
field("key", $.identifier),
'=',
field("value", $._expr),
),
// matches everything but jinja
_text: $ => new RegExp(
'(' + // capture group
'[^{]' + // match any character that is not `{`
'|' + // or
'[{][^{%#]' + // match a character that IS `{` and isn't followed by `{`, `%`, or`#`
')' + // end capture group
'+' // one or more times. using this instead of * because tree-sitter can hang when matching the empty string.
),
integer: $ => token(
seq(
optional(/[\+-]/),
repeat1(/_?[0-9]+/),
)
),
float: $ => {
const digits = repeat1(/[0-9]+_?/);
const exponent = seq(/[eE][\+-]?/, digits)
const sign = /[\+-]/
return token(
choice(
seq(optional(sign), digits, '.', optional(digits), optional(exponent)),
seq(optional(sign), optional(digits), '.', digits, optional(exponent)),
seq(digits, exponent)
)
)
},
}
});