Skip to content

Commit 04890fb

Browse files
authored
Update TokenStream.java
" "
1 parent 3b619f4 commit 04890fb

File tree

1 file changed

+22
-51
lines changed

1 file changed

+22
-51
lines changed

src/main/java/com/scanner/project/TokenStream.java

Lines changed: 22 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package com.scanner.project;
22

3-
// TokenStream.java
4-
5-
// Implementation of the Scanner for KAY
3+
// scanner for KAY language
64

75
import java.io.BufferedReader;
86
import java.io.FileNotFoundException;
@@ -11,18 +9,14 @@
119

1210
public class TokenStream {
1311

14-
// Instance variables
15-
private boolean isEof = false; // is end of file
16-
private char nextChar = ' '; // next character in input stream
12+
private boolean isEof = false;
13+
private char nextChar = ' ';
1714
private BufferedReader input;
1815

19-
// This function was added to make the demo file work
2016
public boolean isEoFile() {
2117
return isEof;
2218
}
2319

24-
// Constructor
25-
// Pass a filename for the program text as a source for the TokenStream.
2620
public TokenStream(String fileName) {
2721
try {
2822
input = new BufferedReader(new FileReader(fileName));
@@ -32,77 +26,68 @@ public TokenStream(String fileName) {
3226
}
3327
}
3428

35-
public Token nextToken() { // Main function of the scanner
36-
// Return next token type and value.
29+
// main scanning function - returns the next token from input
30+
public Token nextToken() {
3731
Token t = new Token();
38-
t.setType("Other"); // For now it is Other
32+
t.setType("Other");
3933
t.setValue("");
4034

41-
// First check for whitespaces and bypass them
4235
skipWhiteSpace();
4336

44-
// Then check for a comment, and bypass it
45-
// but remember that / may also be a division operator.
37+
// skip over comments
4638
while (nextChar == '/') {
4739
nextChar = readChar();
48-
if (nextChar == '/') { // If / is followed by another /
49-
// skip rest of line - it's a comment.
50-
// look for <cr>, <lf>, <ff>
40+
if (nextChar == '/') {
41+
// rest of line is a comment
5142
while (!isEof && !isEndOfLine(nextChar)) {
5243
nextChar = readChar();
5344
}
5445
skipWhiteSpace();
5546
} else {
56-
// A slash followed by anything else must be an operator.
47+
// just a division operator
5748
t.setValue("/");
5849
t.setType("Operator");
5950
return t;
6051
}
6152
}
6253

63-
// Then check for an operator; this part of the code should recover 2-character
64-
// operators as well as 1-character ones.
54+
// check for two-char and one-char operators
6555
if (isOperator(nextChar)) {
6656
t.setType("Operator");
6757
t.setValue(t.getValue() + nextChar);
6858
switch (nextChar) {
6959
case '<':
70-
// <= or <>
7160
nextChar = readChar();
7261
if (nextChar == '=' || nextChar == '>') {
7362
t.setValue(t.getValue() + nextChar);
7463
nextChar = readChar();
7564
}
7665
return t;
7766
case '>':
78-
// >=
7967
nextChar = readChar();
8068
if (nextChar == '=') {
8169
t.setValue(t.getValue() + nextChar);
8270
nextChar = readChar();
8371
}
8472
return t;
8573
case '=':
86-
// == or single = (which is Other/invalid in KAY)
8774
nextChar = readChar();
8875
if (nextChar == '=') {
8976
t.setValue(t.getValue() + nextChar);
9077
nextChar = readChar();
9178
} else {
92-
// Single '=' is not a valid operator in KAY
79+
// single = is not valid in KAY
9380
t.setType("Other");
9481
}
9582
return t;
9683
case '!':
97-
// != or just !
9884
nextChar = readChar();
9985
if (nextChar == '=') {
10086
t.setValue(t.getValue() + nextChar);
10187
nextChar = readChar();
10288
}
10389
return t;
10490
case ':':
105-
// := (assignment operator in KAY)
10691
nextChar = readChar();
10792
if (nextChar == '=') {
10893
t.setValue(t.getValue() + nextChar);
@@ -113,7 +98,6 @@ public Token nextToken() { // Main function of the scanner
11398
return t;
11499
}
115100
case '|':
116-
// Look for ||
117101
nextChar = readChar();
118102
if (nextChar == '|') {
119103
t.setValue(t.getValue() + nextChar);
@@ -123,9 +107,7 @@ public Token nextToken() { // Main function of the scanner
123107
t.setType("Other");
124108
}
125109
return t;
126-
127110
case '&':
128-
// Look for &&
129111
nextChar = readChar();
130112
if (nextChar == '&') {
131113
t.setValue(t.getValue() + nextChar);
@@ -135,50 +117,46 @@ public Token nextToken() { // Main function of the scanner
135117
t.setType("Other");
136118
}
137119
return t;
138-
139-
default: // all other operators: +, -, *, /
120+
default:
140121
nextChar = readChar();
141122
return t;
142123
}
143124
}
144125

145-
// Then check for a separator (including comma for multi-variable declarations)
126+
// separators like { } ( ) ; ,
146127
if (isSeparator(nextChar)) {
147128
t.setType("Separator");
148129
t.setValue(t.getValue() + nextChar);
149130
nextChar = readChar();
150131
return t;
151132
}
152133

153-
// Then check for an identifier, keyword, or literal.
134+
// identifiers and keywords start with a letter
154135
if (isLetter(nextChar)) {
155-
// Set to an identifier
156136
t.setType("Identifier");
157137
while ((isLetter(nextChar) || isDigit(nextChar))) {
158138
t.setValue(t.getValue() + nextChar);
159139
nextChar = readChar();
160140
}
161-
// now see if this is a keyword
141+
// check if its actually a keyword
162142
if (isKeyword(t.getValue())) {
163143
t.setType("Keyword");
164144
} else if (t.getValue().equals("True") || t.getValue().equals("False")) {
165-
// In KAY, True and False are uppercased literals
166145
t.setType("Literal");
167146
}
168-
if (isEndOfToken(nextChar)) { // If token is valid, returns.
147+
if (isEndOfToken(nextChar)) {
169148
return t;
170149
}
171150
}
172151

173-
if (isDigit(nextChar)) { // check for integer literals
152+
// numbers
153+
if (isDigit(nextChar)) {
174154
t.setType("Literal");
175155
while (isDigit(nextChar)) {
176156
t.setValue(t.getValue() + nextChar);
177157
nextChar = readChar();
178158
}
179-
// An Integer-Literal is to be only followed by a space,
180-
// an operator, or a separator.
181-
if (isEndOfToken(nextChar)) {// If token is valid, returns.
159+
if (isEndOfToken(nextChar)) {
182160
return t;
183161
}
184162
}
@@ -189,13 +167,12 @@ public Token nextToken() { // Main function of the scanner
189167
return t;
190168
}
191169

192-
// Makes sure that the whole unknown token (Type: Other) is printed.
170+
// grab the rest of an unknown token
193171
while (!isEndOfToken(nextChar)) {
194172
t.setValue(t.getValue() + nextChar);
195173
nextChar = readChar();
196174
}
197175

198-
// Finally check for whitespaces and bypass them
199176
skipWhiteSpace();
200177

201178
return t;
@@ -219,8 +196,6 @@ private char readChar() {
219196
}
220197

221198
private boolean isKeyword(String s) {
222-
// Keywords in KAY: main, if, else, while, integer, bool
223-
// Note: void is NOT a keyword in KAY
224199
return s.equals("main") || s.equals("if") || s.equals("else") ||
225200
s.equals("while") || s.equals("integer") || s.equals("bool");
226201
}
@@ -233,25 +208,21 @@ private boolean isEndOfLine(char c) {
233208
return (c == '\r' || c == '\n' || c == '\f');
234209
}
235210

236-
private boolean isEndOfToken(char c) { // Is the value a separate token?
211+
private boolean isEndOfToken(char c) {
237212
return (isWhiteSpace(nextChar) || isOperator(nextChar) || isSeparator(nextChar) || isEof);
238213
}
239214

240215
private void skipWhiteSpace() {
241-
// check for whitespaces, and bypass them
242216
while (!isEof && isWhiteSpace(nextChar)) {
243217
nextChar = readChar();
244218
}
245219
}
246220

247221
private boolean isSeparator(char c) {
248-
// Separators in KAY: { } ; ( ) ,
249222
return (c == '{' || c == '}' || c == ';' || c == '(' || c == ')' || c == ',');
250223
}
251224

252225
private boolean isOperator(char c) {
253-
// Checks for characters that start operators
254-
// Operators in KAY: + - * / < <= > >= == != && || ! :=
255226
return (c == '+' || c == '-' || c == '*' || c == '/' ||
256227
c == '<' || c == '>' || c == '=' || c == '!' ||
257228
c == '&' || c == '|' || c == ':');

0 commit comments

Comments
 (0)