-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrammar.js
139 lines (112 loc) · 5.04 KB
/
grammar.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
const REG_ALPHABETIC = /[a-zA-Z]/
const REG_NUMERIC = /[0-9]/
const REG_ASCII_PUNCTUATION = /[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`\{|\}~]/
// Modifiers except for visibility (in order).
const MODIFIERS = {
Unconstrained: 'unconstrained',
Comptime: 'comptime',
Mut: 'mut',
}
// Noir no longer allows arbitrarily-sized integers. Also, `U128` is a struct not a numeric type.
const NUMERIC_TYPES = [
// Signed.
'u1',
'u8',
'u32',
'u64',
// Unsigned
'i1',
'i8',
'i32',
'i64',
]
// TODO: Attributes have some further captures in the Noir lexer, e.g. `foreign` captures a name afterwards. So do that also (and for the secondary attributes).
// Functions can only have one primary attribute.
const PRIMARY_ATTRIBUTES = [
'foreign',
'builtin',
'oracle',
'test',
'recursive',
'fold',
'no_predicates',
'test',
'field',
]
// Functions can have any number of secondary attributes.
const SECONDARY_ATTRIBUTES = ['deprecated', 'contract_library_method', 'abi', 'export']
// TODO: Code whitespace is \t \n \r and literal space.
// TODO: Whitespace attributes like #[bing bong] are valid.. cancer. What does the canonical noir compiler think the attribute is called? `bing bong`?
// Keyword::Pub
module.exports = grammar({
name: 'noir',
extras: ($) => [/\s/],
word: ($) => $.identifier,
rules: {
// Conceptually a `program` (in Noir's parser parlance).
source_file: ($) => repeat($._statement),
// Conceptually a `module` (in Noir's parser parlance).
_statement: ($) => choice($._expression_statement, $._declaration_statement),
_expression_statement: ($) => seq($._expression, ';'),
_declaration_statement: ($) => choice($.function_definition),
_expression: ($) => 'foo',
// * * * * * * * * * * * * * * * * * * * * * * * * * DECLARATIONS
// TODO: Complete coverage.
function_definition: ($) =>
seq(
// TODO: Attributes.
optional($.visibility_modifier),
optional($.function_modifiers),
'fn',
field('name', $.identifier),
// TODO: Generics.
$.parameter_list,
// TODO: Function return type.
$.block,
),
visibility_modifier: ($) => seq('pub', optional('(crate)')),
// TODO: Make this a granular list instead of all leaf nodes currently being anonymous?
// TODO: Is comptime function-specific? I don't think it is.
// TODO: Need to enforce the order of this.
// function_modifiers: ($) => repeat1(choice('unconstrained', 'comptime')),
// OPT: I personally don't think tree-sitter should report back a syntax tree as being correct if it isn't, and there's currently no easy way to have epsilon rules (save maybe a custom scanner). Look into this later. Other major languages like Rust don't have this in their tree-sitter grammar either, so for example: `pub unsafe async fn main()` is _invalid_ Rust syntax but tree-sitter will parse that and produce a CST without an error node, the correct form is `pub async unsafe fn main()` which tree-sitter also parses (this time correctly) to a CST without an error node.
function_modifiers: ($) => repeat1(choice(MODIFIERS.Unconstrained, MODIFIERS.Comptime)),
parameter_list: ($) =>
seq(
'(',
// TODO: Parameters.
')',
),
// TODO: Does Noir support empty blocks?
block: ($) =>
seq(
'{',
// repeat($._statement),
'}',
),
attribute: ($) =>
seq(
'#',
optional('!'), // Marks an InnerAttribute.
'[',
optional("'"), // Marks an attribute Tag.
alias(repeat1(choice(' ', REG_ALPHABETIC, REG_NUMERIC, REG_ASCII_PUNCTUATION)), $.content),
']',
),
// TODO: Actual logic for this, nesting, aliases etc after all the rest of the grammar is complete.
use_tree: ($) => seq('use', /.*/, ';'),
// TODO: When mostly done see if this is generic or specific to attributes, i.e. rename to just `path` and remove all the aliases elsewhere?
// TODO: Come back to a field name for this later when what's going on with attributes is more locked down.
attribute_path: ($) => seq(repeat1(choice(' ', REG_ALPHABETIC, REG_NUMERIC, REG_ASCII_PUNCTUATION))),
// _statement: $ => choice(
// $.return_statement
// // TODO: Other statements.
// ),
// TODO: Change this to explicit or implicit return statement (with or without return keyword) as well as `;` affecting returning a value or not.
// return_statement: $ => seq(
// ),
// * * * * * * * * * * * * * * * * * * * * * * * * * EXPRESSIONS
// Currently this is the canonical identifier representation.
identifier: ($) => /[a-zA-Z_][a-zA-Z0-9_]*/,
},
})