Skip to content
This repository has been archived by the owner on Jul 3, 2022. It is now read-only.

Commit

Permalink
Add identifiers, Completed chapter 4, the Scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
RoelAdriaans committed Aug 1, 2020
1 parent fb29075 commit f55c50b
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 7 deletions.
10 changes: 4 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]


## [0.0.1] - 2020-08-01

### Added

- Completing chapter 4
- Completed including chapter 4.6.2
- Created CHANGELOG.md, README.md, STATUS.md
- Created first structure of the project, including tox, flake8, mypy, black and
other utilities
- Implemented `run`, `run_file` and `run_prompt` methods
- Added `Token`, `Scanner`, `TokenType`
- Support strings and numbers

## [0.0.1] - 2020-00-00

Todo, this is just a placeholder
- Support `strings`, `numbers` and `identifiers`

[Unreleased]: https://github.com/RoelAdriaans/yaplox/compare/v0.0.1...HEAD
[0.0.1]: https://github.com/RoelAdriaans/yaplox/releases/tag/v0.0.1
34 changes: 33 additions & 1 deletion src/yaplox/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@ class Scanner:
start: int = 0
current: int = 0
line: int = 1
keywords = {
"and": TokenType.AND,
"class": TokenType.CLASS,
"else": TokenType.ELSE,
"false": TokenType.FALSE,
"for": TokenType.FOR,
"fun": TokenType.FUN,
"if": TokenType.IF,
"nil": TokenType.NIL,
"or": TokenType.OR,
"print": TokenType.PRINT,
"return": TokenType.RETURN,
"super": TokenType.SUPER,
"this": TokenType.THIS,
"true": TokenType.TRUE,
"var": TokenType.VAR,
"while": TokenType.WHILE,
}

def __init__(self, source: str, on_error=None):
"""
Expand Down Expand Up @@ -76,6 +94,16 @@ def _number(self):
number_value = self.source[self.start : self.current]
self._add_token(TokenType.NUMBER, float(number_value))

def _identifier(self):
while self._peek().isalnum() or self._peek() == "_":
self._advance()

# See if the identifier is a reserved word
text = self.source[self.start : self.current]
token_type = self.keywords.get(text, TokenType.IDENTIFIER)

self._add_token(token_type=token_type)

def _scan_token(self):
""" Scan tokens"""
c = self._advance()
Expand Down Expand Up @@ -126,8 +154,12 @@ def _scan_token(self):
if c.isdigit():
# An digit encountered, consume the number
self._number()
# If we have an on_error callback, run this, otherwise raise the error again
elif c.isalpha() or c == "_":
# An letter encoutered
self._identifier()
elif self.on_error:
# If we have an on_error callback, run this, otherwise raise the
# error again
self.on_error(self.line, f"Unexpected character: {c}")
else:
raise
Expand Down
56 changes: 56 additions & 0 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,59 @@ def test_scanner_with_number(self, mocker):
assert tokens[5].literal == 13.0

assert not on_error_mock.called

def test_scanner_identifier(self, mocker):
source = "appelflap or nil if while _foo_bar_1_2"

on_error_mock = mocker.MagicMock()
scanner = Scanner(source, on_error=on_error_mock)

tokens = scanner.scan_tokens()

assert tokens[0].token_type == TokenType.IDENTIFIER
assert tokens[0].lexeme == "appelflap"

assert tokens[1].token_type == TokenType.OR
assert tokens[2].token_type == TokenType.NIL
assert tokens[3].token_type == TokenType.IF
assert tokens[4].token_type == TokenType.WHILE

assert tokens[5].token_type == TokenType.IDENTIFIER
assert tokens[5].lexeme == "_foo_bar_1_2"

assert not on_error_mock.called

def test_scanner_invalid_identifier(self, mocker):
# The bit of source code below is completely wrong, and identifies and
# numbers in here will not result in valid tokens, but not the tokens you
# would expect. This is not a problem of the scanner, it just does as it's
# told.
source = "123foo_bar bar-stool spam_egg_1.3_chickens"

on_error_mock = mocker.MagicMock()
scanner = Scanner(source, on_error=on_error_mock)

tokens = scanner.scan_tokens()

assert tokens[0].literal == 123.0

assert tokens[1].lexeme == "foo_bar"
assert tokens[1].token_type == TokenType.IDENTIFIER

assert tokens[2].lexeme == "bar"
assert tokens[2].token_type == TokenType.IDENTIFIER

assert tokens[3].token_type == TokenType.MINUS

assert tokens[4].lexeme == "stool"
assert tokens[5].lexeme == "spam_egg_1"
assert tokens[6].token_type == TokenType.DOT

# This token did not consume the 1 before, since that was still part of the
# valid identifier. The dot broke the identifier, and then a number started
assert tokens[7].token_type == TokenType.NUMBER
assert tokens[7].literal == 3.0

assert tokens[8].lexeme == "_chickens"

assert not on_error_mock.called

0 comments on commit f55c50b

Please sign in to comment.