-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkk_token.py
More file actions
142 lines (109 loc) · 3.74 KB
/
kk_token.py
File metadata and controls
142 lines (109 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""KitKat Token stuff."""
import error
__author__ = 'dan.barella@gmail.com (Dan Barella)'
ESCAPE = '\''
UP = '^'
RIGHT = '>'
DOWN = 'v'
LEFT = '<'
TOKENS = {
UP: 'T_UP',
RIGHT: 'T_RIGHT',
DOWN: 'T_DOWN',
LEFT: 'T_LEFT',
'\'': 'T_SINGLE_QUOTE',
'.': 'T_PERIOD',
',': 'T_COMMA',
'\n': 'T_NEWLINE',
Ellipsis: 'T_CHAR',
}
class Token(object):
"""Represents a KitKat token."""
# Translate special characters to printable strings
PRINTABLE_TOKEN_KINDS = {
'T_SINGLE_QUOTE',
'T_COMMA',
'T_NEWLINE',
'T_CHAR',
}
def __init__(self, kind, character):
"""Init a Token.
- kind is taken from the TOKENS table.
- character is either ch or a translated escape sequence.
If this Token represents an escape sequence, then self.character will be
the translated escape sequence. Or at least, should be. There isn't any
code here to stop you from doing something stupid.
e.g.:
t = Token('\'', 'n')
=> t.kind = T_CHAR, t.character = '\n'
Args:
kind (string): The T_ID of this token.
character (char): The character(s) this Token represents.
"""
if kind not in TOKENS.values():
raise error.TokenException('{} is not a valid Token kind'.format(kind))
self.kind = kind
self.character = character
def is_directional(self):
"""Return True if this token is a directional token, False otherwise."""
return self.kind in [TOKENS[UP], TOKENS[RIGHT], TOKENS[DOWN], TOKENS[LEFT]]
def is_kind(self, kind):
"""Return True if self.kind == kind, False otherwise."""
return self.kind == kind
def is_printable(self):
"""Return True if it makes sense to print this token."""
return self.kind in self.PRINTABLE_TOKEN_KINDS
def __repr__(self):
return '<Token {0}: {1} >'.format(self.kind, self.character)
def __str__(self):
return self.character
class DFA(object):
"""Maintains state in one place so that no one else has to suffer unduly."""
# Escaped chars are sequences formed by escaping alphabetical characters.
# For example, 'n translates to newline
# So, we map the lookahead character (the n, in the case of newline) to its
# actual kind and character representation.
TRANSLATE_ESCAPE = {
UP: UP,
RIGHT: RIGHT,
DOWN: DOWN,
LEFT: LEFT,
'\'': '\'',
'.': '.',
',': ',',
'n': '\n',
}
# Translate the output string for certain tokens. This is different from
# translating escape sequences because (for example) T_COMMA is interpreted as
# a space, and escaped to a comma.
TRANSLATE_SPECIAL_CHAR = {
',': ' ', # Comma translates to space by default
}
def __init__(self):
self.escape = False
def step(self, ch):
"""Steps the DFA based on the incoming character ch.
Args:
ch (char): The incoming character.
Returns (Token): A Token if one can be emitted, None otherwise.
"""
if self.escape:
if ch in self.TRANSLATE_ESCAPE: # Legit escape sequence, emit
self.escape = False
ch_translate = self.TRANSLATE_ESCAPE[ch]
return Token(TOKENS[...], ch_translate)
else: # Bad escape sequence, complain
raise error.DFAException(
"{0}{1} is an invalid escape sequence".format(ESCAPE, ch))
else: # Not escaped state
if ch == ESCAPE: # Possible escape sequence, wait for more input
self.escape = True
return None
else:
if ch not in TOKENS: # Just a regular character
return Token(TOKENS[...], ch)
else: # Special character
if ch in self.TRANSLATE_SPECIAL_CHAR:
return Token(TOKENS[ch], self.TRANSLATE_SPECIAL_CHAR[ch])
else:
return Token(TOKENS[ch], ch)