-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.c
More file actions
177 lines (155 loc) · 4.87 KB
/
lexer.c
File metadata and controls
177 lines (155 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#include "lexer.h"
#include <string.h>
#include <stdbool.h>
void lisa_lexer_init(lisa_lexer *lexer, const char *source) {
lexer->start = source;
lexer->current = source;
lexer->line = 1;
}
static bool is_at_end(lisa_lexer *lexer) {
return *lexer->current == '\0';
}
static char advance(lisa_lexer *lexer) {
return *lexer->current++;
}
static char peek(lisa_lexer *lexer) {
return *lexer->current;
}
static lisa_token make_token(lisa_lexer *lexer, lisa_token_type type) {
lisa_token token;
token.type = type;
token.start = lexer->start;
token.length = (int)(lexer->current - lexer->start);
token.line = lexer->line;
return token;
}
static lisa_token error_token(lisa_lexer *lexer, const char *message) {
lisa_token token;
token.type = TOKEN_ERROR;
token.start = message;
token.length = (int)strlen(message);
token.line = lexer->line;
return token;
}
static void skip_whitespace(lisa_lexer *lexer) {
for (;;) {
char c = peek(lexer);
switch (c) {
case ' ':
case '\t':
case '\r':
case ',': /* commas are whitespace in Clojure */
advance(lexer);
break;
case '\n':
lexer->line++;
advance(lexer);
break;
case ';': /* line comment */
while (!is_at_end(lexer) && peek(lexer) != '\n') {
advance(lexer);
}
break;
default:
return;
}
}
}
static bool is_digit(char c) {
return c >= '0' && c <= '9';
}
static bool is_symbol_char(char c) {
if (c == '\0') return false;
if (c <= ' ') return false;
switch (c) {
case '(': case ')': case '[': case ']':
case '"': case ';': case ',':
return false;
default:
return true;
}
}
static lisa_token_type check_keyword(lisa_lexer *lexer, int start, int rest_len,
const char *rest, lisa_token_type type) {
int token_len = (int)(lexer->current - lexer->start);
if (token_len == start + rest_len &&
memcmp(lexer->start + start, rest, (size_t)rest_len) == 0) {
return type;
}
return TOKEN_SYMBOL;
}
static lisa_token_type identifier_type(lisa_lexer *lexer) {
int len = (int)(lexer->current - lexer->start);
switch (lexer->start[0]) {
case 'd':
if (len > 1) {
switch (lexer->start[1]) {
case 'e': return check_keyword(lexer, 2, 1, "f", TOKEN_DEF);
case 'o': if (len == 2) return TOKEN_DO; break;
}
}
break;
case 'f':
if (len > 1) {
switch (lexer->start[1]) {
case 'n': if (len == 2) return TOKEN_FN; break;
case 'a': return check_keyword(lexer, 2, 3, "lse", TOKEN_FALSE);
}
}
break;
case 'i': return check_keyword(lexer, 1, 1, "f", TOKEN_IF);
case 'l': return check_keyword(lexer, 1, 2, "et", TOKEN_LET);
case 'n': return check_keyword(lexer, 1, 2, "il", TOKEN_NIL);
case 't': return check_keyword(lexer, 1, 3, "rue", TOKEN_TRUE);
}
return TOKEN_SYMBOL;
}
static lisa_token number(lisa_lexer *lexer) {
bool has_dot = false;
while (!is_at_end(lexer) && (is_digit(peek(lexer)) || peek(lexer) == '.')) {
if (peek(lexer) == '.') {
if (has_dot) break;
has_dot = true;
}
advance(lexer);
}
return make_token(lexer, has_dot ? TOKEN_DOUBLE : TOKEN_NUMBER);
}
static lisa_token string(lisa_lexer *lexer) {
while (!is_at_end(lexer) && peek(lexer) != '"') {
if (peek(lexer) == '\n') lexer->line++;
if (peek(lexer) == '\\' && *(lexer->current + 1) != '\0') {
advance(lexer); /* skip backslash */
}
advance(lexer);
}
if (is_at_end(lexer)) return error_token(lexer, "Unterminated string.");
advance(lexer); /* closing quote */
return make_token(lexer, TOKEN_STRING);
}
static lisa_token symbol(lisa_lexer *lexer) {
while (!is_at_end(lexer) && is_symbol_char(peek(lexer))) {
advance(lexer);
}
return make_token(lexer, identifier_type(lexer));
}
lisa_token lisa_lexer_next(lisa_lexer *lexer) {
skip_whitespace(lexer);
lexer->start = lexer->current;
if (is_at_end(lexer)) return make_token(lexer, TOKEN_EOF);
char c = advance(lexer);
switch (c) {
case '(': return make_token(lexer, TOKEN_LPAREN);
case ')': return make_token(lexer, TOKEN_RPAREN);
case '[': return make_token(lexer, TOKEN_LBRACKET);
case ']': return make_token(lexer, TOKEN_RBRACKET);
case '"': return string(lexer);
}
if (is_digit(c)) return number(lexer);
/* Negative number: '-' followed by digit */
if (c == '-' && !is_at_end(lexer) && is_digit(peek(lexer))) {
return number(lexer);
}
if (is_symbol_char(c)) return symbol(lexer);
return error_token(lexer, "Unexpected character.");
}