Skip to content

Commit 06a2f6e

Browse files
authored
Merge pull request #94 from amaanq/fixes
Some scanner fixes
2 parents f1be302 + bae4cdc commit 06a2f6e

File tree

1 file changed

+31
-35
lines changed

1 file changed

+31
-35
lines changed

src/scanner.c

+31-35
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ enum TokenType {
1616

1717
/* Pretty much all of this code is taken from the Julia tree-sitter
1818
parser.
19-
19+
2020
Julia has similar problems with multiline comments that can be nested,
2121
line comments, as well as line and multiline strings.
2222
@@ -59,9 +59,9 @@ static void free_stack(Stack *stack) {
5959
free(stack);
6060
}
6161

62-
static void push(Stack *stack, char c, bool triple) {
62+
static void push(Stack *stack, char chr, bool triple) {
6363
if (stack->len >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) exit(1);
64-
stack->arr[stack->len++] = triple ? (c + 1) : c;
64+
stack->arr[stack->len++] = (Delimiter)(triple ? (chr + 1) : chr);
6565
}
6666

6767
static Delimiter pop(Stack *stack) {
@@ -128,18 +128,17 @@ static bool scan_string_content(TSLexer *lexer, Stack *stack) {
128128
// otherwise, if this is the start, determine if it is an
129129
// interpolated identifier.
130130
// otherwise, it's just string content, so continue
131-
else {
132-
advance(lexer);
133-
if (iswalpha(lexer->lookahead) || lexer->lookahead == '{') {
134-
// this must be a string interpolation, let's
135-
// fail so we parse it as such
136-
return false;
137-
}
138-
lexer->result_symbol = STRING_CONTENT;
139-
mark_end(lexer);
140-
return true;
131+
advance(lexer);
132+
if (iswalpha(lexer->lookahead) || lexer->lookahead == '{') {
133+
// this must be a string interpolation, let's
134+
// fail so we parse it as such
135+
return false;
141136
}
142-
} else if (lexer->lookahead == '\\') {
137+
lexer->result_symbol = STRING_CONTENT;
138+
mark_end(lexer);
139+
return true;
140+
}
141+
if (lexer->lookahead == '\\') {
143142
// if we see a \, then this might possibly escape a dollar sign
144143
// in which case, we need to not defer to the interpolation
145144
has_content = true;
@@ -167,7 +166,7 @@ static bool scan_string_content(TSLexer *lexer, Stack *stack) {
167166
^
168167
where we are at the `f`, we should quit after
169168
reading `foo`, and ascribe it to STRING_CONTENT.
170-
169+
171170
Then, we restart and try to read the end.
172171
This is to prevent `foo` from being absorbed into
173172
the STRING_END token.
@@ -192,27 +191,25 @@ static bool scan_string_content(TSLexer *lexer, Stack *stack) {
192191
}
193192
pop(stack);
194193
return true;
195-
} else {
196-
if (has_content) {
197-
mark_end(lexer);
198-
lexer->result_symbol = STRING_CONTENT;
199-
return true;
200-
} else {
201-
pop(stack);
202-
advance(lexer);
203-
mark_end(lexer);
204-
lexer->result_symbol = STRING_END;
205-
return true;
206-
}
207194
}
195+
if (has_content) {
196+
mark_end(lexer);
197+
lexer->result_symbol = STRING_CONTENT;
198+
return true;
199+
}
200+
pop(stack);
201+
advance(lexer);
202+
mark_end(lexer);
203+
lexer->result_symbol = STRING_END;
204+
return true;
208205
}
209206
advance(lexer);
210207
has_content = true;
211208
}
212209
return false;
213210
}
214211

215-
bool scan_multiline_comment(TSLexer *lexer) {
212+
static bool scan_multiline_comment(TSLexer *lexer) {
216213
if (lexer->lookahead != '/') return false;
217214
advance(lexer);
218215
if (lexer->lookahead != '*') return false;
@@ -262,13 +259,12 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
262259

263260
if (lexer->lookahead == '/') {
264261
return false;
265-
} else {
266-
return true;
267262
}
263+
return true;
268264
}
269265
}
270266

271-
bool scan_for_word(TSLexer *lexer, char* word, unsigned len) {
267+
static bool scan_for_word(TSLexer *lexer, const char* word, unsigned len) {
272268
skip(lexer);
273269
for (unsigned i = 0; i < len; i++) {
274270
if (lexer->lookahead != word[i]) return false;
@@ -277,7 +273,7 @@ bool scan_for_word(TSLexer *lexer, char* word, unsigned len) {
277273
return true;
278274
}
279275

280-
bool scan_automatic_semicolon(TSLexer *lexer) {
276+
static bool scan_automatic_semicolon(TSLexer *lexer) {
281277
lexer->result_symbol = AUTOMATIC_SEMICOLON;
282278
lexer->mark_end(lexer);
283279

@@ -403,7 +399,7 @@ bool scan_automatic_semicolon(TSLexer *lexer) {
403399
}
404400
}
405401

406-
bool scan_safe_nav(TSLexer *lexer) {
402+
static bool scan_safe_nav(TSLexer *lexer) {
407403
lexer->result_symbol = SAFE_NAV;
408404
lexer->mark_end(lexer);
409405

@@ -427,7 +423,7 @@ bool scan_safe_nav(TSLexer *lexer) {
427423
return true;
428424
}
429425

430-
bool scan_line_sep(TSLexer *lexer) {
426+
static bool scan_line_sep(TSLexer *lexer) {
431427
// Line Seps: [ CR, LF, CRLF ]
432428
int state = 0;
433429
while (true) {
@@ -461,7 +457,7 @@ bool scan_line_sep(TSLexer *lexer) {
461457
}
462458
}
463459

464-
bool scan_import_list_delimiter(TSLexer *lexer) {
460+
static bool scan_import_list_delimiter(TSLexer *lexer) {
465461
// Import lists are terminated either by an empty line or a non import statement
466462
lexer->result_symbol = IMPORT_LIST_DELIMITER;
467463
lexer->mark_end(lexer);

0 commit comments

Comments
 (0)