Skip to content

Commit f760476

Browse files
authored
fix(lexer): Correct handling of comments (#108)
1 parent 2122883 commit f760476

19 files changed

+334
-891
lines changed

lib/lang/__samples__/groovy-01.out.json

Lines changed: 27 additions & 730 deletions
Large diffs are not rendered by default.

lib/lexer/bracket.spec.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import type { BracketOption, StatesMap } from './types';
55

66
describe('lexer/bracket', () => {
77
it('works', () => {
8-
const states: StatesMap = { $: { unknown: fallbackRule } };
8+
const states: StatesMap = {
9+
$: { unknown: { ...fallbackRule, type: 'unknown' } },
10+
};
911
const bracketOptions: BracketOption[] = [
1012
{ startsWith: '{', endsWith: '}' },
1113
{ startsWith: '[', endsWith: ']' },

lib/lexer/bracket.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { sortStateRules } from './rules';
21
import type {
32
BracketOption,
43
StateDefinition,
@@ -41,13 +40,17 @@ export function configBrackets(
4140
const leftTokenKey = `${tokenKey}$left`;
4241
const leftTokenRule: StringRule = {
4342
t: 'string',
43+
type: leftTokenKey,
4444
match: option.startsWith,
45+
chunk: option.startsWith,
4546
};
4647

4748
const rightTokenKey = `${tokenKey}$right`;
4849
const rightTokenRule: StringRule = {
4950
t: 'string',
51+
type: rightTokenKey,
5052
match: option.endsWith,
53+
chunk: option.endsWith,
5154
};
5255

5356
bracketDefs[leftTokenKey] = leftTokenRule;
@@ -56,9 +59,9 @@ export function configBrackets(
5659

5760
return {
5861
...states,
59-
$: sortStateRules({
62+
$: {
6063
...states.$,
6164
...bracketDefs,
62-
}),
65+
},
6366
};
6467
}

lib/lexer/comment.spec.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@ import type { CommentOption, StatesMap } from './types';
66

77
describe('lexer/comment', () => {
88
const states: StatesMap = {
9-
$: {
10-
unknown: fallbackRule,
11-
},
9+
$: { unknown: { ...fallbackRule, type: 'unknown' } },
1210
};
1311

1412
const opts: CommentOption[] = [

lib/lexer/comment.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { sortStateRules } from './rules';
21
import type {
32
CommentOption,
43
RegexRule,
@@ -22,26 +21,30 @@ export function configComments(
2221
const anyChars = '.*?';
2322
const rule: RegexRule = {
2423
t: 'regex',
24+
type: tokenName,
2525
match: new RegExp(`${start}${anyChars}$`),
26+
chunk: option.startsWith,
2627
};
2728
commentRules[tokenName] = rule;
2829
} else if (option.type === 'multiline-comment') {
2930
const anyChars = '[^]*?';
3031
const end = esc(option.endsWith);
3132
const rule: RegexRule = {
3233
t: 'regex',
34+
type: tokenName,
3335
match: new RegExp(`${start}${anyChars}${end}`),
3436
lineBreaks: true,
37+
chunk: option.startsWith,
3538
};
3639
commentRules[tokenName] = rule;
3740
}
3841
});
3942

4043
return {
4144
...states,
42-
$: sortStateRules({
45+
$: {
4346
...states.$,
4447
...commentRules,
45-
}),
48+
},
4649
};
4750
}

lib/lexer/index.ts

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,51 @@ import { configBrackets } from './bracket';
33
import { configComments } from './comment';
44
import { configNumbers } from './number';
55
import { configOperators } from './operator';
6-
import { fallbackRule } from './rules';
6+
import { createOrderedStateMap, fallbackRule } from './rules';
77
import { configStrings } from './string';
88
import { configSymbols } from './symbol';
99
import { coerceToken } from './token';
10-
import type { Lexer, LexerConfig, RegexRule, StatesMap, Token } from './types';
10+
import type {
11+
Lexer,
12+
LexerConfig,
13+
OrderedStatesMap,
14+
RegexRule,
15+
StatesMap,
16+
Token,
17+
} from './types';
1118

1219
export * from './token';
1320
export * from './types';
1421

15-
export function configureLexerRules(lexerConfig: LexerConfig): StatesMap {
22+
export function configureLexerRules(
23+
lexerConfig: LexerConfig
24+
): OrderedStatesMap {
1625
const whitespace: RegexRule = lexerConfig.joinLines
1726
? {
1827
t: 'regex',
28+
type: 'whitespace',
1929
match: new RegExp(`(?:${lexerConfig.joinLines}\\r?\\n|[ \\t\\r])+`),
2030
lineBreaks: true,
31+
chunk: null,
2132
}
22-
: { t: 'regex', match: /[ \t\r]+/ };
33+
: {
34+
t: 'regex',
35+
type: 'whitespace',
36+
match: /[ \t\r]+/,
37+
chunk: null,
38+
};
2339

2440
let result: StatesMap = {
2541
$: {
2642
whitespace,
27-
newline: { t: 'regex', match: /\r?\n/, lineBreaks: true },
28-
_: fallbackRule,
43+
newline: {
44+
t: 'regex',
45+
type: 'newline',
46+
match: /\r?\n/,
47+
chunk: null,
48+
lineBreaks: true,
49+
},
50+
_: { ...fallbackRule, type: '_' },
2951
},
3052
};
3153

@@ -37,12 +59,14 @@ export function configureLexerRules(lexerConfig: LexerConfig): StatesMap {
3759
result = configBrackets(result, brackets);
3860
result = configStrings(result, strings);
3961
result = configNumbers(result, { match: numbers });
40-
return result;
62+
63+
const orderedResult = createOrderedStateMap(result);
64+
return orderedResult;
4165
}
4266

4367
export function createLexer(options: LexerConfig): Lexer {
4468
const rules = configureLexerRules(options);
45-
const mooLexer = mooStates(rules);
69+
const mooLexer = mooStates(rules as never);
4670

4771
const result: Lexer = {
4872
reset(input?: string) {

lib/lexer/number.spec.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import type { NumberOption, StatesMap } from './types';
55

66
describe('lexer/number', () => {
77
it('works', () => {
8-
const states: StatesMap = { $: { unknown: fallbackRule } };
8+
const states: StatesMap = {
9+
$: { unknown: { ...fallbackRule, type: 'unknown' } },
10+
};
911
const symbolOption: NumberOption = { match: /[0-9]+/ };
1012
const rules = configNumbers(states, symbolOption);
1113
const input = '40+2';

lib/lexer/number.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,13 @@ export function configNumbers(
77
return {
88
...states,
99
$: {
10-
number: { t: 'regex', match },
1110
...states.$,
11+
number: {
12+
t: 'regex',
13+
type: 'number',
14+
match,
15+
chunk: null,
16+
},
1217
},
1318
};
1419
}

lib/lexer/operator.spec.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import { tokenize } from '../../test/test-utils';
55

66
describe('lexer/operator', () => {
77
it('works', () => {
8-
const states: StatesMap = { $: { unknown: fallbackRule } };
8+
const states: StatesMap = {
9+
$: { unknown: { ...fallbackRule, type: 'unknown' } },
10+
};
911
const bracketOptions: OperatorOption[] = ['+'];
1012
const rules = configOperators(states, bracketOptions);
1113
const input = '2+2';

lib/lexer/operator.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { sortStateRules } from './rules';
21
import type {
32
OperatorOption,
43
StateDefinition,
@@ -13,15 +12,20 @@ export function configOperators(
1312
const operatorRules: StateDefinition = {};
1413
operators.map((op, idx) => {
1514
const tokenName = `op$${idx}`;
16-
const tokenRule: StringRule = { t: 'string', match: op };
15+
const tokenRule: StringRule = {
16+
t: 'string',
17+
type: tokenName,
18+
match: op,
19+
chunk: op,
20+
};
1721
operatorRules[tokenName] = tokenRule;
1822
});
1923

2024
return {
2125
...states,
22-
$: sortStateRules({
26+
$: {
2327
...states.$,
2428
...operatorRules,
25-
}),
29+
},
2630
};
2731
}

0 commit comments

Comments
 (0)