11package com .scanner .project ;
22
3- // TokenStream.java
4-
5- // Implementation of the Scanner for KAY
3+ // scanner for KAY language
64
75import java .io .BufferedReader ;
86import java .io .FileNotFoundException ;
119
1210public class TokenStream {
1311
14- // Instance variables
15- private boolean isEof = false ; // is end of file
16- private char nextChar = ' ' ; // next character in input stream
12+ private boolean isEof = false ;
13+ private char nextChar = ' ' ;
1714 private BufferedReader input ;
1815
19- // This function was added to make the demo file work
2016 public boolean isEoFile () {
2117 return isEof ;
2218 }
2319
24- // Constructor
25- // Pass a filename for the program text as a source for the TokenStream.
2620 public TokenStream (String fileName ) {
2721 try {
2822 input = new BufferedReader (new FileReader (fileName ));
@@ -32,77 +26,68 @@ public TokenStream(String fileName) {
3226 }
3327 }
3428
35- public Token nextToken () { // Main function of the scanner
36- // Return next token type and value.
29+ // main scanning function - returns the next token from input
30+ public Token nextToken () {
3731 Token t = new Token ();
38- t .setType ("Other" ); // For now it is Other
32+ t .setType ("Other" );
3933 t .setValue ("" );
4034
41- // First check for whitespaces and bypass them
4235 skipWhiteSpace ();
4336
44- // Then check for a comment, and bypass it
45- // but remember that / may also be a division operator.
37+ // skip over comments
4638 while (nextChar == '/' ) {
4739 nextChar = readChar ();
48- if (nextChar == '/' ) { // If / is followed by another /
49- // skip rest of line - it's a comment.
50- // look for <cr>, <lf>, <ff>
40+ if (nextChar == '/' ) {
41+ // rest of line is a comment
5142 while (!isEof && !isEndOfLine (nextChar )) {
5243 nextChar = readChar ();
5344 }
5445 skipWhiteSpace ();
5546 } else {
56- // A slash followed by anything else must be an operator.
47+ // just a division operator
5748 t .setValue ("/" );
5849 t .setType ("Operator" );
5950 return t ;
6051 }
6152 }
6253
63- // Then check for an operator; this part of the code should recover 2-character
64- // operators as well as 1-character ones.
54+ // check for two-char and one-char operators
6555 if (isOperator (nextChar )) {
6656 t .setType ("Operator" );
6757 t .setValue (t .getValue () + nextChar );
6858 switch (nextChar ) {
6959 case '<' :
70- // <= or <>
7160 nextChar = readChar ();
7261 if (nextChar == '=' || nextChar == '>' ) {
7362 t .setValue (t .getValue () + nextChar );
7463 nextChar = readChar ();
7564 }
7665 return t ;
7766 case '>' :
78- // >=
7967 nextChar = readChar ();
8068 if (nextChar == '=' ) {
8169 t .setValue (t .getValue () + nextChar );
8270 nextChar = readChar ();
8371 }
8472 return t ;
8573 case '=' :
86- // == or single = (which is Other/invalid in KAY)
8774 nextChar = readChar ();
8875 if (nextChar == '=' ) {
8976 t .setValue (t .getValue () + nextChar );
9077 nextChar = readChar ();
9178 } else {
92- // Single '=' is not a valid operator in KAY
79+ // single = is not valid in KAY
9380 t .setType ("Other" );
9481 }
9582 return t ;
9683 case '!' :
97- // != or just !
9884 nextChar = readChar ();
9985 if (nextChar == '=' ) {
10086 t .setValue (t .getValue () + nextChar );
10187 nextChar = readChar ();
10288 }
10389 return t ;
10490 case ':' :
105- // := (assignment operator in KAY)
10691 nextChar = readChar ();
10792 if (nextChar == '=' ) {
10893 t .setValue (t .getValue () + nextChar );
@@ -113,7 +98,6 @@ public Token nextToken() { // Main function of the scanner
11398 return t ;
11499 }
115100 case '|' :
116- // Look for ||
117101 nextChar = readChar ();
118102 if (nextChar == '|' ) {
119103 t .setValue (t .getValue () + nextChar );
@@ -123,9 +107,7 @@ public Token nextToken() { // Main function of the scanner
123107 t .setType ("Other" );
124108 }
125109 return t ;
126-
127110 case '&' :
128- // Look for &&
129111 nextChar = readChar ();
130112 if (nextChar == '&' ) {
131113 t .setValue (t .getValue () + nextChar );
@@ -135,50 +117,46 @@ public Token nextToken() { // Main function of the scanner
135117 t .setType ("Other" );
136118 }
137119 return t ;
138-
139- default : // all other operators: +, -, *, /
120+ default :
140121 nextChar = readChar ();
141122 return t ;
142123 }
143124 }
144125
145- // Then check for a separator (including comma for multi-variable declarations)
126+ // separators like { } ( ) ; ,
146127 if (isSeparator (nextChar )) {
147128 t .setType ("Separator" );
148129 t .setValue (t .getValue () + nextChar );
149130 nextChar = readChar ();
150131 return t ;
151132 }
152133
153- // Then check for an identifier, keyword, or literal.
134+ // identifiers and keywords start with a letter
154135 if (isLetter (nextChar )) {
155- // Set to an identifier
156136 t .setType ("Identifier" );
157137 while ((isLetter (nextChar ) || isDigit (nextChar ))) {
158138 t .setValue (t .getValue () + nextChar );
159139 nextChar = readChar ();
160140 }
161- // now see if this is a keyword
141+ // check if its actually a keyword
162142 if (isKeyword (t .getValue ())) {
163143 t .setType ("Keyword" );
164144 } else if (t .getValue ().equals ("True" ) || t .getValue ().equals ("False" )) {
165- // In KAY, True and False are uppercased literals
166145 t .setType ("Literal" );
167146 }
168- if (isEndOfToken (nextChar )) { // If token is valid, returns.
147+ if (isEndOfToken (nextChar )) {
169148 return t ;
170149 }
171150 }
172151
173- if (isDigit (nextChar )) { // check for integer literals
152+ // numbers
153+ if (isDigit (nextChar )) {
174154 t .setType ("Literal" );
175155 while (isDigit (nextChar )) {
176156 t .setValue (t .getValue () + nextChar );
177157 nextChar = readChar ();
178158 }
179- // An Integer-Literal is to be only followed by a space,
180- // an operator, or a separator.
181- if (isEndOfToken (nextChar )) {// If token is valid, returns.
159+ if (isEndOfToken (nextChar )) {
182160 return t ;
183161 }
184162 }
@@ -189,13 +167,12 @@ public Token nextToken() { // Main function of the scanner
189167 return t ;
190168 }
191169
192- // Makes sure that the whole unknown token (Type: Other) is printed.
170+ // grab the rest of an unknown token
193171 while (!isEndOfToken (nextChar )) {
194172 t .setValue (t .getValue () + nextChar );
195173 nextChar = readChar ();
196174 }
197175
198- // Finally check for whitespaces and bypass them
199176 skipWhiteSpace ();
200177
201178 return t ;
@@ -219,8 +196,6 @@ private char readChar() {
219196 }
220197
221198 private boolean isKeyword (String s ) {
222- // Keywords in KAY: main, if, else, while, integer, bool
223- // Note: void is NOT a keyword in KAY
224199 return s .equals ("main" ) || s .equals ("if" ) || s .equals ("else" ) ||
225200 s .equals ("while" ) || s .equals ("integer" ) || s .equals ("bool" );
226201 }
@@ -233,25 +208,21 @@ private boolean isEndOfLine(char c) {
233208 return (c == '\r' || c == '\n' || c == '\f' );
234209 }
235210
236- private boolean isEndOfToken (char c ) { // Is the value a separate token?
211+ private boolean isEndOfToken (char c ) {
237212 return (isWhiteSpace (nextChar ) || isOperator (nextChar ) || isSeparator (nextChar ) || isEof );
238213 }
239214
240215 private void skipWhiteSpace () {
241- // check for whitespaces, and bypass them
242216 while (!isEof && isWhiteSpace (nextChar )) {
243217 nextChar = readChar ();
244218 }
245219 }
246220
247221 private boolean isSeparator (char c ) {
248- // Separators in KAY: { } ; ( ) ,
249222 return (c == '{' || c == '}' || c == ';' || c == '(' || c == ')' || c == ',' );
250223 }
251224
252225 private boolean isOperator (char c ) {
253- // Checks for characters that start operators
254- // Operators in KAY: + - * / < <= > >= == != && || ! :=
255226 return (c == '+' || c == '-' || c == '*' || c == '/' ||
256227 c == '<' || c == '>' || c == '=' || c == '!' ||
257228 c == '&' || c == '|' || c == ':' );
0 commit comments