1
1
// There are 2 types of Token, constant ones (with no "value") and the ones with attached
2
- // value that is variable (e.g. IDENT, INT, FLOAT, STRING, COMMENT).
3
- // We'd use the upcoming unique https://tip.golang.org/doc/go1.23#new-unique-package
4
- // but we want this to run on 1.22 and earlier.
2
+ // value that is variable (e.g. IDENT, INT, FLOAT, STRING, * COMMENT).
3
+ // We might use the upcoming unique https://tip.golang.org/doc/go1.23#new-unique-package
4
+ // but we want this to run on 1.22 and earlier and rolled our own not multi threaded .
5
5
package token
6
6
7
7
import (
@@ -17,13 +17,14 @@ type noCopy struct{}
17
17
func (* noCopy ) Lock () {}
18
18
func (* noCopy ) Unlock () {}
19
19
20
- type Type uint8
20
+ type Type byte
21
21
22
22
type Token struct {
23
23
// Allows go vet to flag accidental copies of this type,
24
24
// though with an error about lock value which can be confusing
25
25
_ noCopy
26
26
tokenType Type
27
+ charCode byte // 1 byte serialization printable code.
27
28
literal string
28
29
}
29
30
@@ -42,7 +43,11 @@ func InternToken(t *Token) *Token {
42
43
}
43
44
44
45
func Intern (t Type , literal string ) * Token {
45
- return InternToken (& Token {tokenType : t , literal : literal })
46
+ code := tToCode [t ]
47
+ if code == 0 {
48
+ panic ("no code for " + t .String ())
49
+ }
50
+ return InternToken (& Token {tokenType : t , literal : literal , charCode : code })
46
51
}
47
52
48
53
func ResetInterning () {
@@ -56,9 +61,10 @@ const (
56
61
startValueTokens
57
62
58
63
// Identifiers + literals. with attached value.
59
- IDENT // add, foobar, x, y, ...
60
- INT // 1343456
61
- FLOAT // 1. 1e3
64
+ IDENT // add, foobar, x, y, ...
65
+ INT // 1343456
66
+ FLOAT // .5, 3.14159,...
67
+ STRING // "foo bar"
62
68
LINECOMMENT
63
69
BLOCKCOMMENT
64
70
@@ -112,7 +118,6 @@ const (
112
118
IF
113
119
ELSE
114
120
RETURN
115
- STRING
116
121
MACRO
117
122
// Macro magic.
118
123
QUOTE
@@ -139,56 +144,95 @@ var (
139
144
)
140
145
141
146
var (
142
- keywords map [string ]* Token
143
- cTokens map [byte ]* Token
144
- c2Tokens map [[2 ]byte ]* Token
145
- tToChar map [Type ]byte
146
- tToT map [Type ]* Token // for all token that are constant.
147
+ keywords map [string ]* Token
148
+ cTokens [256 ]* Token // for all token that are single char.
149
+ tToCode [256 ]byte // iota code to char code.
150
+ codeToT [256 ]Type // char code to Type
151
+ c2Tokens map [[2 ]byte ]* Token
152
+ tToChar map [Type ]byte
153
+ tToT map [Type ]* Token // for all token that are constant.
154
+ tokensCount int
147
155
)
148
156
149
157
func init () {
150
158
Init ()
151
159
}
152
160
161
+ func assocCodeAndToken (t Type , code byte ) {
162
+ if tToCode [t ] != 0 {
163
+ panic ("duplicate code for " + t .String () + " " + string (tToCode [t ]) + " vs " + string (code ))
164
+ }
165
+ tToCode [t ] = code
166
+ if codeToT [code ] != 0 {
167
+ panic ("duplicate token for " + string (code ) + " " + codeToT [code ].String () + " vs " + t .String ())
168
+ }
169
+ codeToT [code ] = t
170
+ tokensCount ++
171
+ }
172
+
153
173
func assoc (t Type , c byte ) {
154
174
tToChar [t ] = c
155
- tok := & Token {tokenType : t , literal : string (c )}
175
+ tok := & Token {tokenType : t , literal : string (c ), charCode : c }
156
176
cTokens [c ] = tok
157
177
tToT [t ] = tok
178
+ assocCodeAndToken (t , c )
158
179
}
159
180
160
- func assocS (t Type , s string ) * Token {
161
- tok := & Token {tokenType : t , literal : s }
181
+ func assocS (t Type , s string , code byte ) * Token {
182
+ tok := & Token {tokenType : t , literal : s , charCode : code }
162
183
old := InternToken (tok )
163
184
if old != tok {
164
185
panic ("duplicate token for " + s )
165
186
}
166
187
tToT [t ] = tok
188
+ assocCodeAndToken (t , code )
167
189
return tok
168
190
}
169
191
170
- func assocC2 (t Type , str string ) {
192
+ func assocC2 (t Type , str string , code byte ) {
171
193
if len (str ) != 2 {
172
194
panic ("assocC2: expected 2 char string" )
173
195
}
174
- tok := & Token {tokenType : t , literal : str }
196
+ tok := & Token {tokenType : t , literal : str , charCode : code }
175
197
old := InternToken (tok )
176
198
if old != tok {
177
199
panic ("duplicate token for " + str )
178
200
}
179
201
tToT [t ] = tok
180
202
c2Tokens [[2 ]byte {str [0 ], str [1 ]}] = tok
203
+ assocCodeAndToken (t , code )
181
204
}
182
205
183
- func Init () {
206
+ func Init () { //nolint:funlen // we need all this.
184
207
ResetInterning ()
208
+ tokensCount = 0
185
209
keywords = make (map [string ]* Token )
186
- cTokens = make (map [byte ]* Token )
210
+ clear (cTokens [:])
211
+ clear (tToCode [:])
212
+ clear (codeToT [:])
187
213
c2Tokens = make (map [[2 ]byte ]* Token )
188
214
tToChar = make (map [Type ]byte )
189
215
tToT = make (map [Type ]* Token )
190
216
for i := startIdentityTokens + 1 ; i < endIdentityTokens ; i ++ {
191
- t := assocS (i , strings .ToLower (i .String ()))
217
+ str := i .String ()
218
+ code := str [0 ] // will be dup for some, we're fixing this below.
219
+ switch i { //nolint:exhaustive // we're only fixing the ones conflicting.
220
+ case TRUE :
221
+ code = 't'
222
+ case FALSE :
223
+ code = 'f'
224
+ case FIRST :
225
+ code = '1'
226
+ case REST :
227
+ code = '2'
228
+ case PRINTLN :
229
+ code = 'N'
230
+ case ERROR :
231
+ code = 'e'
232
+ case LOG :
233
+ code = 'p'
234
+ }
235
+ t := assocS (i , strings .ToLower (str ), code )
192
236
keywords [t .literal ] = t
193
237
}
194
238
TRUET = tToT [TRUE ]
@@ -218,8 +262,8 @@ func Init() {
218
262
if ! ok {
219
263
panic ("missing single character token char lookup for " + i .String ())
220
264
}
221
- v , ok := cTokens [b ]
222
- if ! ok {
265
+ v := cTokens [b ]
266
+ if v == nil {
223
267
panic ("missing single character token for " + i .String ())
224
268
}
225
269
if v .tokenType != i {
@@ -238,14 +282,25 @@ func Init() {
238
282
}
239
283
}
240
284
// Multi character non identity tokens.
241
- assocC2 (LTEQ , "<=" )
242
- assocC2 (GTEQ , ">=" )
243
- assocC2 (EQ , "==" )
244
- assocC2 (NOTEQ , "!=" )
245
- assocC2 (INCR , "++" )
246
- assocC2 (DECR , "--" )
285
+ assocC2 (LTEQ , "<=" , 'l' )
286
+ assocC2 (GTEQ , ">=" , 'g' )
287
+ assocC2 (EQ , "==" , '_' )
288
+ assocC2 (NOTEQ , "!=" , 'n' )
289
+ assocC2 (INCR , "++" , 'i' )
290
+ assocC2 (DECR , "--" , 'd' )
247
291
// Special alias for := to be same as ASSIGN.
248
292
c2Tokens [[2 ]byte {':' , '=' }] = cTokens ['=' ]
293
+ // Valued tokens.:
294
+ assocCodeAndToken (IDENT , ' ' )
295
+ assocCodeAndToken (INT , '0' )
296
+ assocCodeAndToken (FLOAT , '.' )
297
+ assocCodeAndToken (STRING , 's' )
298
+ assocCodeAndToken (LINECOMMENT , 'c' )
299
+ assocCodeAndToken (BLOCKCOMMENT , 'b' )
300
+
301
+ assocCodeAndToken (ILLEGAL , 1 ) // doesn't need to be printable.
302
+ assocCodeAndToken (EOL , 10 ) // nl but not printed anyway
303
+ assocCodeAndToken (EOF , 'z' ) // also not used/visible
249
304
}
250
305
251
306
//go:generate stringer -type=Type
@@ -256,14 +311,14 @@ func LookupIdent(ident string) *Token {
256
311
if t , ok := keywords [ident ]; ok {
257
312
return t
258
313
}
259
- return InternToken (& Token {tokenType : IDENT , literal : ident })
314
+ return InternToken (& Token {tokenType : IDENT , literal : ident , charCode : ' ' })
260
315
}
261
316
262
317
// ByType is the cheapest lookup for all the tokens whose type
263
318
// only have one possible instance/value
264
319
// (ie all the tokens except for the first 4 value tokens).
265
320
// TODO: codegen all the token constants to avoid needing this function.
266
- // (even though that's better than string comparaisons ).
321
+ // (even though that's better than string comparisons ).
267
322
func ByType (t Type ) * Token {
268
323
return tToT [t ]
269
324
}
@@ -276,6 +331,10 @@ func (t *Token) Type() Type {
276
331
return t .tokenType
277
332
}
278
333
334
+ func (t * Token ) Code () byte {
335
+ return t .charCode
336
+ }
337
+
279
338
func ConstantTokenChar (literal byte ) * Token {
280
339
return cTokens [literal ]
281
340
}
@@ -287,3 +346,7 @@ func ConstantTokenChar2(c1, c2 byte) *Token {
287
346
func (t * Token ) DebugString () string {
288
347
return t .Type ().String () + ":" + strconv .Quote (t .Literal ())
289
348
}
349
+
350
+ func NumTokens () int {
351
+ return tokensCount
352
+ }
0 commit comments