Skip to content

Commit 89b7d26

Browse files
committed
Add unique single printable character to Token for serialization
1 parent f5798dc commit 89b7d26

File tree

3 files changed

+147
-74
lines changed

3 files changed

+147
-74
lines changed

token/token.go

+95-32
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// There are 2 types of Token, constant ones (with no "value") and the ones with attached
2-
// value that is variable (e.g. IDENT, INT, FLOAT, STRING, COMMENT).
3-
// We'd use the upcoming unique https://tip.golang.org/doc/go1.23#new-unique-package
4-
// but we want this to run on 1.22 and earlier.
2+
// value that is variable (e.g. IDENT, INT, FLOAT, STRING, *COMMENT).
3+
// We might use the upcoming unique https://tip.golang.org/doc/go1.23#new-unique-package
4+
// but we want this to run on 1.22 and earlier and rolled our own not multi threaded.
55
package token
66

77
import (
@@ -17,13 +17,14 @@ type noCopy struct{}
1717
func (*noCopy) Lock() {}
1818
func (*noCopy) Unlock() {}
1919

20-
type Type uint8
20+
type Type byte
2121

2222
type Token struct {
2323
// Allows go vet to flag accidental copies of this type,
2424
// though with an error about lock value which can be confusing
2525
_ noCopy
2626
tokenType Type
27+
charCode byte // 1 byte serialization printable code.
2728
literal string
2829
}
2930

@@ -42,7 +43,11 @@ func InternToken(t *Token) *Token {
4243
}
4344

4445
func Intern(t Type, literal string) *Token {
45-
return InternToken(&Token{tokenType: t, literal: literal})
46+
code := tToCode[t]
47+
if code == 0 {
48+
panic("no code for " + t.String())
49+
}
50+
return InternToken(&Token{tokenType: t, literal: literal, charCode: code})
4651
}
4752

4853
func ResetInterning() {
@@ -56,9 +61,10 @@ const (
5661
startValueTokens
5762

5863
// Identifiers + literals. with attached value.
59-
IDENT // add, foobar, x, y, ...
60-
INT // 1343456
61-
FLOAT // 1. 1e3
64+
IDENT // add, foobar, x, y, ...
65+
INT // 1343456
66+
FLOAT // .5, 3.14159,...
67+
STRING // "foo bar"
6268
LINECOMMENT
6369
BLOCKCOMMENT
6470

@@ -112,7 +118,6 @@ const (
112118
IF
113119
ELSE
114120
RETURN
115-
STRING
116121
MACRO
117122
// Macro magic.
118123
QUOTE
@@ -139,56 +144,95 @@ var (
139144
)
140145

141146
var (
142-
keywords map[string]*Token
143-
cTokens map[byte]*Token
144-
c2Tokens map[[2]byte]*Token
145-
tToChar map[Type]byte
146-
tToT map[Type]*Token // for all token that are constant.
147+
keywords map[string]*Token
148+
cTokens [256]*Token // for all token that are single char.
149+
tToCode [256]byte // iota code to char code.
150+
codeToT [256]Type // char code to Type
151+
c2Tokens map[[2]byte]*Token
152+
tToChar map[Type]byte
153+
tToT map[Type]*Token // for all token that are constant.
154+
tokensCount int
147155
)
148156

149157
func init() {
150158
Init()
151159
}
152160

161+
func assocCodeAndToken(t Type, code byte) {
162+
if tToCode[t] != 0 {
163+
panic("duplicate code for " + t.String() + " " + string(tToCode[t]) + " vs " + string(code))
164+
}
165+
tToCode[t] = code
166+
if codeToT[code] != 0 {
167+
panic("duplicate token for " + string(code) + " " + codeToT[code].String() + " vs " + t.String())
168+
}
169+
codeToT[code] = t
170+
tokensCount++
171+
}
172+
153173
func assoc(t Type, c byte) {
154174
tToChar[t] = c
155-
tok := &Token{tokenType: t, literal: string(c)}
175+
tok := &Token{tokenType: t, literal: string(c), charCode: c}
156176
cTokens[c] = tok
157177
tToT[t] = tok
178+
assocCodeAndToken(t, c)
158179
}
159180

160-
func assocS(t Type, s string) *Token {
161-
tok := &Token{tokenType: t, literal: s}
181+
func assocS(t Type, s string, code byte) *Token {
182+
tok := &Token{tokenType: t, literal: s, charCode: code}
162183
old := InternToken(tok)
163184
if old != tok {
164185
panic("duplicate token for " + s)
165186
}
166187
tToT[t] = tok
188+
assocCodeAndToken(t, code)
167189
return tok
168190
}
169191

170-
func assocC2(t Type, str string) {
192+
func assocC2(t Type, str string, code byte) {
171193
if len(str) != 2 {
172194
panic("assocC2: expected 2 char string")
173195
}
174-
tok := &Token{tokenType: t, literal: str}
196+
tok := &Token{tokenType: t, literal: str, charCode: code}
175197
old := InternToken(tok)
176198
if old != tok {
177199
panic("duplicate token for " + str)
178200
}
179201
tToT[t] = tok
180202
c2Tokens[[2]byte{str[0], str[1]}] = tok
203+
assocCodeAndToken(t, code)
181204
}
182205

183-
func Init() {
206+
func Init() { //nolint:funlen // we need all this.
184207
ResetInterning()
208+
tokensCount = 0
185209
keywords = make(map[string]*Token)
186-
cTokens = make(map[byte]*Token)
210+
clear(cTokens[:])
211+
clear(tToCode[:])
212+
clear(codeToT[:])
187213
c2Tokens = make(map[[2]byte]*Token)
188214
tToChar = make(map[Type]byte)
189215
tToT = make(map[Type]*Token)
190216
for i := startIdentityTokens + 1; i < endIdentityTokens; i++ {
191-
t := assocS(i, strings.ToLower(i.String()))
217+
str := i.String()
218+
code := str[0] // will be dup for some, we're fixing this below.
219+
switch i { //nolint:exhaustive // we're only fixing the ones conflicting.
220+
case TRUE:
221+
code = 't'
222+
case FALSE:
223+
code = 'f'
224+
case FIRST:
225+
code = '1'
226+
case REST:
227+
code = '2'
228+
case PRINTLN:
229+
code = 'N'
230+
case ERROR:
231+
code = 'e'
232+
case LOG:
233+
code = 'p'
234+
}
235+
t := assocS(i, strings.ToLower(str), code)
192236
keywords[t.literal] = t
193237
}
194238
TRUET = tToT[TRUE]
@@ -218,8 +262,8 @@ func Init() {
218262
if !ok {
219263
panic("missing single character token char lookup for " + i.String())
220264
}
221-
v, ok := cTokens[b]
222-
if !ok {
265+
v := cTokens[b]
266+
if v == nil {
223267
panic("missing single character token for " + i.String())
224268
}
225269
if v.tokenType != i {
@@ -238,14 +282,25 @@ func Init() {
238282
}
239283
}
240284
// Multi character non identity tokens.
241-
assocC2(LTEQ, "<=")
242-
assocC2(GTEQ, ">=")
243-
assocC2(EQ, "==")
244-
assocC2(NOTEQ, "!=")
245-
assocC2(INCR, "++")
246-
assocC2(DECR, "--")
285+
assocC2(LTEQ, "<=", 'l')
286+
assocC2(GTEQ, ">=", 'g')
287+
assocC2(EQ, "==", '_')
288+
assocC2(NOTEQ, "!=", 'n')
289+
assocC2(INCR, "++", 'i')
290+
assocC2(DECR, "--", 'd')
247291
// Special alias for := to be same as ASSIGN.
248292
c2Tokens[[2]byte{':', '='}] = cTokens['=']
293+
// Valued tokens.:
294+
assocCodeAndToken(IDENT, ' ')
295+
assocCodeAndToken(INT, '0')
296+
assocCodeAndToken(FLOAT, '.')
297+
assocCodeAndToken(STRING, 's')
298+
assocCodeAndToken(LINECOMMENT, 'c')
299+
assocCodeAndToken(BLOCKCOMMENT, 'b')
300+
301+
assocCodeAndToken(ILLEGAL, 1) // doesn't need to be printable.
302+
assocCodeAndToken(EOL, 10) // nl but not printed anyway
303+
assocCodeAndToken(EOF, 'z') // also not used/visible
249304
}
250305

251306
//go:generate stringer -type=Type
@@ -256,14 +311,14 @@ func LookupIdent(ident string) *Token {
256311
if t, ok := keywords[ident]; ok {
257312
return t
258313
}
259-
return InternToken(&Token{tokenType: IDENT, literal: ident})
314+
return InternToken(&Token{tokenType: IDENT, literal: ident, charCode: ' '})
260315
}
261316

262317
// ByType is the cheapest lookup for all the tokens whose type
263318
// only have one possible instance/value
264319
// (ie all the tokens except for the first 4 value tokens).
265320
// TODO: codegen all the token constants to avoid needing this function.
266-
// (even though that's better than string comparaisons).
321+
// (even though that's better than string comparisons).
267322
func ByType(t Type) *Token {
268323
return tToT[t]
269324
}
@@ -276,6 +331,10 @@ func (t *Token) Type() Type {
276331
return t.tokenType
277332
}
278333

334+
func (t *Token) Code() byte {
335+
return t.charCode
336+
}
337+
279338
func ConstantTokenChar(literal byte) *Token {
280339
return cTokens[literal]
281340
}
@@ -287,3 +346,7 @@ func ConstantTokenChar2(c1, c2 byte) *Token {
287346
func (t *Token) DebugString() string {
288347
return t.Type().String() + ":" + strconv.Quote(t.Literal())
289348
}
349+
350+
func NumTokens() int {
351+
return tokensCount
352+
}

token/token_test.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestMultiCharTokens(t *testing.T) {
7979
{"--", DECR},
8080
}
8181
for _, tt := range tests {
82-
tok := &Token{tokenType: tt.expected, literal: tt.input}
82+
tok := &Token{tokenType: tt.expected, literal: tt.input, charCode: tToCode[tt.expected]}
8383
tok2 := InternToken(tok)
8484
if tok == tok2 {
8585
t.Errorf("Intern[%s] was unexpectedly created", tt.input)
@@ -138,3 +138,13 @@ func TestColonEqualAlias(t *testing.T) {
138138
t.Errorf("ConstantTokenStr[:=] returned %v, expected '='", tok.Literal())
139139
}
140140
}
141+
142+
func TestNumTokens(t *testing.T) {
143+
Init()
144+
n := NumTokens()
145+
// 49 tokens in total so far
146+
expected := int(EOF) - 8 + 1 // 8 markers to subtract + 1 for ILLEGAL==0.
147+
if n != expected {
148+
t.Errorf("NumTokens() returned %d, expected %d", n, expected)
149+
}
150+
}

token/type_string.go

+41-41
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)