Skip to content

Commit 2f893d6

Browse files
committed
Refactor Lexer
The lexer needed some cleanup, I found myself doing this as part of a Unicode RFC, but factoring all that out to make the Unicode RFC PR easier to follow. * Always use hexadecimal form for code values. * Remove use of `isNaN` for checking source over-reads. * Defines `isSourceCharacter` * Add more documentation and comments, also replaces regex with lexical grammar * Simplifies error messages * Adds additional tests
1 parent d8478dc commit 2f893d6

File tree

4 files changed

+570
-463
lines changed

4 files changed

+570
-463
lines changed

src/language/__tests__/lexer-test.ts

+74-32
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ function expectSyntaxError(text: string) {
3030
describe('Lexer', () => {
3131
it('disallows uncommon control characters', () => {
3232
expectSyntaxError('\u0007').to.deep.equal({
33-
message: 'Syntax Error: Cannot contain the invalid character "\\u0007".',
33+
message: 'Syntax Error: Invalid character: U+0007.',
3434
locations: [{ line: 1, column: 1 }],
3535
});
3636
});
3737

38-
it('accepts BOM header', () => {
38+
it('ignores BOM header', () => {
3939
expect(lexOne('\uFEFF foo')).to.contain({
4040
kind: TokenKind.NAME,
4141
start: 2,
@@ -139,6 +139,13 @@ describe('Lexer', () => {
139139
value: 'foo',
140140
});
141141

142+
expect(lexOne('\t\tfoo\t\t')).to.contain({
143+
kind: TokenKind.NAME,
144+
start: 2,
145+
end: 5,
146+
value: 'foo',
147+
});
148+
142149
expect(
143150
lexOne(`
144151
#comment
@@ -167,7 +174,7 @@ describe('Lexer', () => {
167174
caughtError = error;
168175
}
169176
expect(String(caughtError)).to.equal(dedent`
170-
Syntax Error: Cannot parse the unexpected character "?".
177+
Syntax Error: Unexpected character: "?".
171178
172179
GraphQL request:3:5
173180
2 |
@@ -187,7 +194,7 @@ describe('Lexer', () => {
187194
caughtError = error;
188195
}
189196
expect(String(caughtError)).to.equal(dedent`
190-
Syntax Error: Cannot parse the unexpected character "?".
197+
Syntax Error: Unexpected character: "?".
191198
192199
foo.js:13:6
193200
12 |
@@ -206,7 +213,7 @@ describe('Lexer', () => {
206213
caughtError = error;
207214
}
208215
expect(String(caughtError)).to.equal(dedent`
209-
Syntax Error: Cannot parse the unexpected character "?".
216+
Syntax Error: Unexpected character: "?".
210217
211218
foo.js:1:5
212219
1 | ?
@@ -294,13 +301,13 @@ describe('Lexer', () => {
294301

295302
expectSyntaxError('"contains unescaped \u0007 control char"').to.deep.equal(
296303
{
297-
message: 'Syntax Error: Invalid character within String: "\\u0007".',
304+
message: 'Syntax Error: Invalid character within String: U+0007.',
298305
locations: [{ line: 1, column: 21 }],
299306
},
300307
);
301308

302309
expectSyntaxError('"null-byte is not \u0000 end of file"').to.deep.equal({
303-
message: 'Syntax Error: Invalid character within String: "\\u0000".',
310+
message: 'Syntax Error: Invalid character within String: U+0000.',
304311
locations: [{ line: 1, column: 19 }],
305312
});
306313

@@ -315,38 +322,38 @@ describe('Lexer', () => {
315322
});
316323

317324
expectSyntaxError('"bad \\z esc"').to.deep.equal({
318-
message: 'Syntax Error: Invalid character escape sequence: \\z.',
319-
locations: [{ line: 1, column: 7 }],
325+
message: 'Syntax Error: Invalid character escape sequence: "\\z".',
326+
locations: [{ line: 1, column: 6 }],
320327
});
321328

322329
expectSyntaxError('"bad \\x esc"').to.deep.equal({
323-
message: 'Syntax Error: Invalid character escape sequence: \\x.',
324-
locations: [{ line: 1, column: 7 }],
330+
message: 'Syntax Error: Invalid character escape sequence: "\\x".',
331+
locations: [{ line: 1, column: 6 }],
325332
});
326333

327334
expectSyntaxError('"bad \\u1 esc"').to.deep.equal({
328-
message: 'Syntax Error: Invalid character escape sequence: \\u1 es.',
329-
locations: [{ line: 1, column: 7 }],
335+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1 es".',
336+
locations: [{ line: 1, column: 6 }],
330337
});
331338

332339
expectSyntaxError('"bad \\u0XX1 esc"').to.deep.equal({
333-
message: 'Syntax Error: Invalid character escape sequence: \\u0XX1.',
334-
locations: [{ line: 1, column: 7 }],
340+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u0XX1".',
341+
locations: [{ line: 1, column: 6 }],
335342
});
336343

337344
expectSyntaxError('"bad \\uXXXX esc"').to.deep.equal({
338-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXX.',
339-
locations: [{ line: 1, column: 7 }],
345+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXX".',
346+
locations: [{ line: 1, column: 6 }],
340347
});
341348

342349
expectSyntaxError('"bad \\uFXXX esc"').to.deep.equal({
343-
message: 'Syntax Error: Invalid character escape sequence: \\uFXXX.',
344-
locations: [{ line: 1, column: 7 }],
350+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uFXXX".',
351+
locations: [{ line: 1, column: 6 }],
345352
});
346353

347354
expectSyntaxError('"bad \\uXXXF esc"').to.deep.equal({
348-
message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
349-
locations: [{ line: 1, column: 7 }],
355+
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".',
356+
locations: [{ line: 1, column: 6 }],
350357
});
351358
});
352359

@@ -482,14 +489,14 @@ describe('Lexer', () => {
482489
expectSyntaxError(
483490
'"""contains unescaped \u0007 control char"""',
484491
).to.deep.equal({
485-
message: 'Syntax Error: Invalid character within String: "\\u0007".',
492+
message: 'Syntax Error: Invalid character within String: U+0007.',
486493
locations: [{ line: 1, column: 23 }],
487494
});
488495

489496
expectSyntaxError(
490497
'"""null-byte is not \u0000 end of file"""',
491498
).to.deep.equal({
492-
message: 'Syntax Error: Invalid character within String: "\\u0000".',
499+
message: 'Syntax Error: Invalid character within String: U+0000.',
493500
locations: [{ line: 1, column: 21 }],
494501
});
495502
});
@@ -625,7 +632,7 @@ describe('Lexer', () => {
625632
});
626633

627634
expectSyntaxError('+1').to.deep.equal({
628-
message: 'Syntax Error: Cannot parse the unexpected character "+".',
635+
message: 'Syntax Error: Unexpected character: "+".',
629636
locations: [{ line: 1, column: 1 }],
630637
});
631638

@@ -650,7 +657,7 @@ describe('Lexer', () => {
650657
});
651658

652659
expectSyntaxError('.123').to.deep.equal({
653-
message: 'Syntax Error: Cannot parse the unexpected character ".".',
660+
message: 'Syntax Error: Unexpected character: ".".',
654661
locations: [{ line: 1, column: 1 }],
655662
});
656663

@@ -674,6 +681,11 @@ describe('Lexer', () => {
674681
locations: [{ line: 1, column: 5 }],
675682
});
676683

684+
expectSyntaxError('1.0e"').to.deep.equal({
685+
message: "Syntax Error: Invalid number, expected digit but got: '\"'.",
686+
locations: [{ line: 1, column: 5 }],
687+
});
688+
677689
expectSyntaxError('1.2e3e').to.deep.equal({
678690
message: 'Syntax Error: Invalid number, expected digit but got: "e".',
679691
locations: [{ line: 1, column: 6 }],
@@ -708,7 +720,7 @@ describe('Lexer', () => {
708720
locations: [{ line: 1, column: 2 }],
709721
});
710722
expectSyntaxError('1\u00DF').to.deep.equal({
711-
message: 'Syntax Error: Cannot parse the unexpected character "\\u00DF".',
723+
message: 'Syntax Error: Unexpected character: U+00DF.',
712724
locations: [{ line: 1, column: 2 }],
713725
});
714726
expectSyntaxError('1.23f').to.deep.equal({
@@ -816,22 +828,27 @@ describe('Lexer', () => {
816828

817829
it('lex reports useful unknown character error', () => {
818830
expectSyntaxError('..').to.deep.equal({
819-
message: 'Syntax Error: Cannot parse the unexpected character ".".',
831+
message: 'Syntax Error: Unexpected character: ".".',
820832
locations: [{ line: 1, column: 1 }],
821833
});
822834

823835
expectSyntaxError('?').to.deep.equal({
824-
message: 'Syntax Error: Cannot parse the unexpected character "?".',
836+
message: 'Syntax Error: Unexpected character: "?".',
825837
locations: [{ line: 1, column: 1 }],
826838
});
827839

828-
expectSyntaxError('\u203B').to.deep.equal({
829-
message: 'Syntax Error: Cannot parse the unexpected character "\\u203B".',
840+
expectSyntaxError('\u00AA').to.deep.equal({
841+
message: 'Syntax Error: Unexpected character: U+00AA.',
842+
locations: [{ line: 1, column: 1 }],
843+
});
844+
845+
expectSyntaxError('\u0AAA').to.deep.equal({
846+
message: 'Syntax Error: Unexpected character: U+0AAA.',
830847
locations: [{ line: 1, column: 1 }],
831848
});
832849

833-
expectSyntaxError('\u200b').to.deep.equal({
834-
message: 'Syntax Error: Cannot parse the unexpected character "\\u200B".',
850+
expectSyntaxError('\u203B').to.deep.equal({
851+
message: 'Syntax Error: Unexpected character: U+203B.',
835852
locations: [{ line: 1, column: 1 }],
836853
});
837854
});
@@ -894,6 +911,31 @@ describe('Lexer', () => {
894911
TokenKind.EOF,
895912
]);
896913
});
914+
915+
it('lexes comments', () => {
916+
expect(lexOne('# Comment').prev).to.contain({
917+
kind: TokenKind.COMMENT,
918+
start: 0,
919+
end: 9,
920+
value: ' Comment',
921+
});
922+
expect(lexOne('# Comment\nAnother line').prev).to.contain({
923+
kind: TokenKind.COMMENT,
924+
start: 0,
925+
end: 9,
926+
value: ' Comment',
927+
});
928+
expect(lexOne('# Comment\r\nAnother line').prev).to.contain({
929+
kind: TokenKind.COMMENT,
930+
start: 0,
931+
end: 9,
932+
value: ' Comment',
933+
});
934+
expectSyntaxError('# \u0007').to.deep.equal({
935+
message: 'Syntax Error: Invalid character: U+0007.',
936+
locations: [{ line: 1, column: 3 }],
937+
});
938+
});
897939
});
898940

899941
describe('isPunctuatorTokenKind', () => {

src/language/ast.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ export class Token {
9696
end: number,
9797
line: number,
9898
column: number,
99-
prev: Token | null,
10099
value?: string,
101100
) {
102101
this.kind = kind;
@@ -105,7 +104,7 @@ export class Token {
105104
this.line = line;
106105
this.column = column;
107106
this.value = value as string;
108-
this.prev = prev;
107+
this.prev = null;
109108
this.next = null;
110109
}
111110

0 commit comments

Comments
 (0)