Skip to content

Commit 90ef62f

Browse files
authored
Update TokenStream.java
1 parent a8e8e5b commit 90ef62f

File tree

1 file changed

+145
-236
lines changed

1 file changed

+145
-236
lines changed
Lines changed: 145 additions & 236 deletions
Original file line numberDiff line numberDiff line change
@@ -1,242 +1,151 @@
11
package com.scanner.project;
22

3-
// scanner for KAY language
3+
import java.io.*;
4+
import java.util.*;
45

5-
import java.io.BufferedReader;
6-
import java.io.FileNotFoundException;
7-
import java.io.FileReader;
8-
import java.io.IOException;
6+
// KAY language scanner
97

108
public class TokenStream {
11-
12-
private boolean isEof = false;
13-
private char nextChar = ' ';
14-
private BufferedReader input;
15-
16-
public boolean isEoFile() {
17-
return isEof;
18-
}
19-
20-
public TokenStream(String fileName) {
21-
try {
22-
input = new BufferedReader(new FileReader(fileName));
23-
} catch (FileNotFoundException e) {
24-
System.out.println("File not found: " + fileName);
25-
isEof = true;
26-
}
27-
}
28-
29-
// main scanning function - returns the next token from input
30-
public Token nextToken() {
31-
Token t = new Token();
32-
t.setType("Other");
33-
t.setValue("");
34-
35-
skipWhiteSpace();
36-
37-
// skip over comments
38-
while (nextChar == '/') {
39-
nextChar = readChar();
40-
if (nextChar == '/') {
41-
// rest of line is a comment
42-
while (!isEof && !isEndOfLine(nextChar)) {
43-
nextChar = readChar();
44-
}
45-
skipWhiteSpace();
46-
} else {
47-
// just a division operator
48-
t.setValue("/");
49-
t.setType("Operator");
50-
return t;
51-
}
52-
}
53-
54-
// check for two-char and one-char operators
55-
if (isOperator(nextChar)) {
56-
t.setType("Operator");
57-
t.setValue(t.getValue() + nextChar);
58-
switch (nextChar) {
59-
case '<':
60-
nextChar = readChar();
61-
if (nextChar == '=' || nextChar == '>') {
62-
t.setValue(t.getValue() + nextChar);
63-
nextChar = readChar();
64-
}
65-
return t;
66-
case '>':
67-
nextChar = readChar();
68-
if (nextChar == '=') {
69-
t.setValue(t.getValue() + nextChar);
70-
nextChar = readChar();
71-
}
72-
return t;
73-
case '=':
74-
nextChar = readChar();
75-
if (nextChar == '=') {
76-
t.setValue(t.getValue() + nextChar);
77-
nextChar = readChar();
78-
} else {
79-
// single = is not valid in KAY
80-
t.setType("Other");
81-
}
82-
return t;
83-
case '!':
84-
nextChar = readChar();
85-
if (nextChar == '=') {
86-
t.setValue(t.getValue() + nextChar);
87-
nextChar = readChar();
88-
}
89-
return t;
90-
case ':':
91-
nextChar = readChar();
92-
if (nextChar == '=') {
93-
t.setValue(t.getValue() + nextChar);
94-
nextChar = readChar();
95-
return t;
96-
} else {
97-
t.setType("Other");
98-
return t;
99-
}
100-
case '|':
101-
nextChar = readChar();
102-
if (nextChar == '|') {
103-
t.setValue(t.getValue() + nextChar);
104-
nextChar = readChar();
105-
return t;
106-
} else {
107-
t.setType("Other");
108-
}
109-
return t;
110-
case '&':
111-
nextChar = readChar();
112-
if (nextChar == '&') {
113-
t.setValue(t.getValue() + nextChar);
114-
nextChar = readChar();
115-
return t;
116-
} else {
117-
t.setType("Other");
118-
}
119-
return t;
120-
default:
121-
nextChar = readChar();
122-
return t;
123-
}
124-
}
125-
126-
// separators like { } ( ) ; ,
127-
if (isSeparator(nextChar)) {
128-
t.setType("Separator");
129-
t.setValue(t.getValue() + nextChar);
130-
nextChar = readChar();
131-
return t;
132-
}
133-
134-
// identifiers and keywords start with a letter
135-
if (isLetter(nextChar)) {
136-
t.setType("Identifier");
137-
while ((isLetter(nextChar) || isDigit(nextChar))) {
138-
t.setValue(t.getValue() + nextChar);
139-
nextChar = readChar();
140-
}
141-
// check if its actually a keyword
142-
if (isKeyword(t.getValue())) {
143-
t.setType("Keyword");
144-
} else if (t.getValue().equals("True") || t.getValue().equals("False")) {
145-
t.setType("Literal");
146-
}
147-
if (isEndOfToken(nextChar)) {
148-
return t;
149-
}
150-
}
151-
152-
// numbers
153-
if (isDigit(nextChar)) {
154-
t.setType("Literal");
155-
while (isDigit(nextChar)) {
156-
t.setValue(t.getValue() + nextChar);
157-
nextChar = readChar();
158-
}
159-
if (isEndOfToken(nextChar)) {
160-
return t;
161-
}
162-
}
163-
164-
t.setType("Other");
165-
166-
if (isEof) {
167-
return t;
168-
}
169-
170-
// grab the rest of an unknown token
171-
while (!isEndOfToken(nextChar)) {
172-
t.setValue(t.getValue() + nextChar);
173-
nextChar = readChar();
174-
}
175-
176-
skipWhiteSpace();
177-
178-
return t;
179-
}
180-
181-
private char readChar() {
182-
int i = 0;
183-
if (isEof)
184-
return (char) 0;
185-
System.out.flush();
186-
try {
187-
i = input.read();
188-
} catch (IOException e) {
189-
System.exit(-1);
190-
}
191-
if (i == -1) {
192-
isEof = true;
193-
return (char) 0;
194-
}
195-
return (char) i;
196-
}
197-
198-
private boolean isKeyword(String s) {
199-
return s.equals("main") || s.equals("if") || s.equals("else") ||
200-
s.equals("while") || s.equals("integer") || s.equals("bool");
201-
}
202-
203-
private boolean isWhiteSpace(char c) {
204-
return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
205-
}
206-
207-
private boolean isEndOfLine(char c) {
208-
return (c == '\r' || c == '\n' || c == '\f');
209-
}
210-
211-
private boolean isEndOfToken(char c) {
212-
return (isWhiteSpace(nextChar) || isOperator(nextChar) || isSeparator(nextChar) || isEof);
213-
}
214-
215-
private void skipWhiteSpace() {
216-
while (!isEof && isWhiteSpace(nextChar)) {
217-
nextChar = readChar();
218-
}
219-
}
220-
221-
private boolean isSeparator(char c) {
222-
return (c == '{' || c == '}' || c == ';' || c == '(' || c == ')' || c == ',');
223-
}
224-
225-
private boolean isOperator(char c) {
226-
return (c == '+' || c == '-' || c == '*' || c == '/' ||
227-
c == '<' || c == '>' || c == '=' || c == '!' ||
228-
c == '&' || c == '|' || c == ':');
229-
}
230-
231-
private boolean isLetter(char c) {
232-
return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z');
233-
}
234-
235-
private boolean isDigit(char c) {
236-
return (c >= '0' && c <= '9');
237-
}
238-
239-
public boolean isEndofFile() {
240-
return isEof;
241-
}
9+
private BufferedReader reader;
10+
private int currentChar;
11+
12+
private static final Set<String> keywords = new HashSet<>(Arrays.asList(
13+
"main", "integer", "bool", "if", "else", "while", "True", "False"
14+
));
15+
16+
private static final Set<String> operators = new HashSet<>(Arrays.asList(
17+
":=", "+", "-", "*", "/", "<", ">", "<=", ">=", "==", "!=", "&&", "||", "!"
18+
));
19+
20+
private static final Set<Character> separators = new HashSet<>(Arrays.asList(
21+
'(', ')', '{', '}', ';', ','
22+
));
23+
24+
public TokenStream(String filename) {
25+
try {
26+
reader = new BufferedReader(new FileReader(filename));
27+
currentChar = reader.read();
28+
} catch (IOException e) {
29+
System.out.println("File not found: " + filename);
30+
}
31+
}
32+
33+
private void readNextChar() {
34+
try {
35+
currentChar = reader.read();
36+
} catch (IOException e) {
37+
currentChar = -1;
38+
}
39+
}
40+
41+
private boolean isLetter(int c) {
42+
return Character.isLetter((char) c);
43+
}
44+
45+
private boolean isDigit(int c) {
46+
return Character.isDigit((char) c);
47+
}
48+
49+
private void skipWhitespaceAndComments() {
50+
while (currentChar != -1) {
51+
if (Character.isWhitespace(currentChar)) {
52+
readNextChar();
53+
} else if (currentChar == '/') {
54+
try {
55+
reader.mark(2);
56+
readNextChar();
57+
if (currentChar == '/') {
58+
while (currentChar != -1 && currentChar != '\n') {
59+
readNextChar();
60+
}
61+
} else {
62+
reader.reset();
63+
currentChar = '/';
64+
break;
65+
}
66+
} catch (IOException e) {
67+
break;
68+
}
69+
} else {
70+
break;
71+
}
72+
}
73+
}
74+
75+
public Token nextToken() {
76+
skipWhitespaceAndComments();
77+
78+
if (currentChar == -1) {
79+
return new Token("EOF", "");
80+
}
81+
82+
// Identifiers or keywords
83+
if (isLetter(currentChar)) {
84+
StringBuilder sb = new StringBuilder();
85+
while (isLetter(currentChar) || isDigit(currentChar)) {
86+
sb.append((char) currentChar);
87+
readNextChar();
88+
}
89+
String word = sb.toString();
90+
if (keywords.contains(word))
91+
return new Token("Keyword", word);
92+
else
93+
return new Token("Identifier", word);
94+
}
95+
96+
// Numbers
97+
if (isDigit(currentChar)) {
98+
StringBuilder sb = new StringBuilder();
99+
while (isDigit(currentChar)) {
100+
sb.append((char) currentChar);
101+
readNextChar();
102+
}
103+
return new Token("Literal", sb.toString());
104+
}
105+
106+
// Separators
107+
if (separators.contains((char) currentChar)) {
108+
char c = (char) currentChar;
109+
readNextChar();
110+
return new Token("Separator", String.valueOf(c));
111+
}
112+
113+
// Operators
114+
if (currentChar == '+' || currentChar == '-' || currentChar == '*' || currentChar == '/') {
115+
char c = (char) currentChar;
116+
readNextChar();
117+
return new Token("Operator", String.valueOf(c));
118+
}
119+
120+
// Two-char operators and special cases
121+
if (currentChar == ':' || currentChar == '<' || currentChar == '>' ||
122+
currentChar == '=' || currentChar == '!' ||
123+
currentChar == '&' || currentChar == '|') {
124+
125+
StringBuilder sb = new StringBuilder();
126+
char firstChar = (char) currentChar;
127+
sb.append(firstChar);
128+
readNextChar();
129+
130+
if (currentChar == '=') {
131+
sb.append((char) currentChar);
132+
readNextChar();
133+
} else if ((firstChar == '&' && currentChar == '&') ||
134+
(firstChar == '|' && currentChar == '|')) {
135+
sb.append((char) currentChar);
136+
readNextChar();
137+
}
138+
139+
String op = sb.toString();
140+
if (operators.contains(op))
141+
return new Token("Operator", op);
142+
else
143+
return new Token("Other", op);
144+
}
145+
146+
// Everything else
147+
char bad = (char) currentChar;
148+
readNextChar();
149+
return new Token("Other", String.valueOf(bad));
150+
}
242151
}

0 commit comments

Comments
 (0)