Skip to content

Commit 56bfb3e

Browse files
committed
refactor: simplify lexer and parser
1 parent b6c5bd7 commit 56bfb3e

File tree

3 files changed

+94
-102
lines changed

3 files changed

+94
-102
lines changed

src/main/java/com/github/lppedd/idea/pomsky/lang/annotator/PomskyRootAnnotator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public void visitIdentifier(@NotNull final PomskyIdentifierPsiElement element) {
9191
return;
9292
}
9393

94-
final var message = "Unknown character class `%s`".formatted(element.getName());
94+
final var message = "Unknown character class '%s'".formatted(element.getName());
9595
holder.newAnnotation(HighlightSeverity.ERROR, message)
9696
.range(element.getTextRange())
9797
.create();

src/main/java/com/github/lppedd/idea/pomsky/lang/lexer/pomsky.flex

Lines changed: 91 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,9 @@ import com.intellij.psi.TokenType;
3636
// Primitives
3737
Whitespace = \s+
3838
Number = [0-9_]+
39-
NonPrintable = [nrtaef]
4039
CodePoint = U{Whitespace}*\+{Whitespace}*[a-fA-F0-9]{1,6}
4140
Identifier = [\p{Alpha}_][\p{Alpha}\p{N}_]*
42-
GroupName = [\p{Alpha}\p{N}_-]* // This is a relaxed variant. The correct regexp is [a-zA-Z][a-zA-Z0-9]*
41+
GroupName = [\p{Alpha}\p{N}_-]+ // This is a relaxed variant. The correct regexp is [a-zA-Z][a-zA-Z0-9]*
4342

4443
// Complex tokens
4544
Comment = #.*
@@ -64,131 +63,125 @@ Keyword = let
6463

6564
%%
6665

67-
<YYINITIAL> {
68-
{Whitespace} {
69-
return TokenType.WHITE_SPACE;
70-
}
71-
72-
{Comment} {
73-
return PomskyTypes.COMMENT;
74-
}
66+
{Whitespace} {
67+
return TokenType.WHITE_SPACE;
68+
}
7569

76-
{Keyword} {
77-
return PomskyTypes.KEYWORD;
78-
}
70+
{Comment} {
71+
return PomskyTypes.COMMENT;
72+
}
7973

80-
{CodePoint} {
81-
return PomskyTypes.CODE_POINT;
82-
}
74+
{Keyword} {
75+
return PomskyTypes.KEYWORD;
76+
}
8377

84-
[\^$] | \!?% | Start | End {
85-
return PomskyTypes.BOUNDARY;
86-
}
78+
{CodePoint} {
79+
return PomskyTypes.CODE_POINT;
80+
}
8781

88-
{Identifier} {
89-
return PomskyTypes.IDENTIFIER;
90-
}
82+
[\^$] | \!?% | Start | End {
83+
return PomskyTypes.BOUNDARY;
84+
}
9185

92-
{NonPrintable} {
93-
return PomskyTypes.NON_PRINTABLE;
94-
}
86+
{Identifier} {
87+
return PomskyTypes.IDENTIFIER;
88+
}
9589

96-
' {
97-
yybegin(STRING_SINGLE);
98-
}
90+
' {
91+
yybegin(STRING_SINGLE);
92+
}
9993

100-
\" {
101-
yybegin(STRING_DOUBLE);
102-
}
94+
\" {
95+
yybegin(STRING_DOUBLE);
96+
}
10397

104-
[0-9]+ {
105-
return PomskyTypes.NUMBER;
106-
}
98+
[0-9]+ {
99+
return PomskyTypes.NUMBER;
100+
}
107101

108-
::({GroupName} | {Number})? {
109-
return PomskyTypes.GROUP_REFERENCE;
110-
}
102+
::({GroupName} | {Number})? {
103+
return PomskyTypes.GROUP_REFERENCE;
104+
}
111105

112-
, {
113-
return PomskyTypes.COMMA;
114-
}
106+
, {
107+
return PomskyTypes.COMMA;
108+
}
115109

116-
; {
117-
return PomskyTypes.SEMICOLON;
118-
}
110+
; {
111+
return PomskyTypes.SEMICOLON;
112+
}
119113

120-
: {
121-
yybegin(GROUP_EXPRESSION);
122-
return PomskyTypes.COLON;
123-
}
114+
: {
115+
yybegin(GROUP_EXPRESSION);
116+
return PomskyTypes.COLON;
117+
}
124118

125-
= {
126-
return PomskyTypes.EQ;
127-
}
119+
= {
120+
return PomskyTypes.EQ;
121+
}
128122

129-
[*+?] {
130-
return PomskyTypes.QUANTIFIER;
131-
}
123+
[*+?] {
124+
return PomskyTypes.QUANTIFIER;
125+
}
132126

133-
\| {
134-
return PomskyTypes.UNION;
135-
}
127+
\| {
128+
return PomskyTypes.UNION;
129+
}
136130

137-
\[ {
138-
return PomskyTypes.CLASS_BEGIN;
139-
}
131+
\[ {
132+
return PomskyTypes.CLASS_BEGIN;
133+
}
140134

141-
] {
142-
return PomskyTypes.CLASS_END;
143-
}
135+
] {
136+
return PomskyTypes.CLASS_END;
137+
}
144138

145-
\( {
146-
return PomskyTypes.GROUP_BEGIN;
147-
}
139+
\( {
140+
return PomskyTypes.GROUP_BEGIN;
141+
}
148142

149-
\) {
150-
return PomskyTypes.GROUP_END;
151-
}
143+
\) {
144+
return PomskyTypes.GROUP_END;
145+
}
152146

153-
\{ {
154-
return PomskyTypes.LBRACE;
155-
}
147+
\{ {
148+
return PomskyTypes.LBRACE;
149+
}
156150

157-
\} {
158-
return PomskyTypes.RBRACE;
159-
}
151+
\} {
152+
return PomskyTypes.RBRACE;
153+
}
160154

161-
>> {
162-
return PomskyTypes.LOOKAHEAD;
163-
}
155+
>> {
156+
return PomskyTypes.LOOKAHEAD;
157+
}
164158

165-
\<< {
166-
return PomskyTypes.LOOKBEHIND;
167-
}
159+
\<< {
160+
return PomskyTypes.LOOKBEHIND;
161+
}
168162

169-
\!>> {
170-
return PomskyTypes.LOOKAHEAD_NEGATED;
171-
}
163+
\!>> {
164+
return PomskyTypes.LOOKAHEAD_NEGATED;
165+
}
172166

173-
\!<< {
174-
return PomskyTypes.LOOKBEHIND_NEGATED;
175-
}
167+
\!<< {
168+
return PomskyTypes.LOOKBEHIND_NEGATED;
169+
}
176170

177-
\! {
178-
return PomskyTypes.NEGATION;
179-
}
171+
\! {
172+
return PomskyTypes.NEGATION;
173+
}
180174

181-
- {
182-
return PomskyTypes.RANGE_SEPARATOR;
183-
}
175+
- {
176+
return PomskyTypes.RANGE_SEPARATOR;
177+
}
184178

185-
\. {
186-
return PomskyTypes.DOT;
187-
}
179+
\. {
180+
return PomskyTypes.DOT;
181+
}
188182

189-
[^] {
190-
return PlainTextTokenTypes.PLAIN_TEXT;
191-
}
183+
[^] {
184+
return PlainTextTokenTypes.PLAIN_TEXT;
192185
}
193186

194187
// A literal string in the form: 'example of string' or 'example of \'string\''

src/main/java/com/github/lppedd/idea/pomsky/lang/parser/pomsky.bnf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
BOUNDARY = "regexp:[\^$]|!?%|Start|End"
4141
STRING = "regexp:'.*'"
4242
NUMBER = "regexp:[0-9_]+"
43-
NON_PRINTABLE = "regexp:[nrtaef]"
4443
CODE_POINT = "regexp:U\+?[a-fA-F0-9]{1,6}"
4544
IDENTIFIER = "regexp:[\p{Alpha}_][\p{Alpha}\p{N}_]*"
4645
GROUP_NAME = "regexp:[a-zA-Z][a-zA-Z0-9]*"
@@ -193,7 +192,7 @@ private character_set_expression_inner ::=
193192

194193
private character_set_inner ::=
195194
character_set_range
196-
| NON_PRINTABLE
195+
| IDENTIFIER
197196
| CODE_POINT
198197
| NEGATION? STRING
199198
| NEGATION? IDENTIFIER
@@ -204,7 +203,7 @@ character_set_range ::=
204203

205204
private single_char ::=
206205
STRING
207-
| NON_PRINTABLE
206+
| IDENTIFIER
208207
| CODE_POINT
209208

210209
// *****************************************************************************

0 commit comments

Comments
 (0)