-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.lisp
More file actions
121 lines (104 loc) · 5.68 KB
/
lexer.lisp
File metadata and controls
121 lines (104 loc) · 5.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
(defpackage :lexer
(:use :common-lisp))
(in-package #:lexer)
(defstruct token
type
lexeme
literal
line)
(defun create-token (type lexeme literal line)
(make-token :type type :lexeme lexeme :literal literal :line line))
(defun create-eof ()
(create-token 'eof "" nil 0))
(defmacro create-helpers (symbols)
`(progn
,@(loop for sym in symbols
collect (let ((func-name (intern (concatenate 'string (symbol-name :create-) (symbol-name sym)))))
`(defun ,func-name (lexeme)
(create-token ',sym lexeme nil 0))))))
(create-helpers (and class else false for if nil or print return super this true
var while number string left-paren right-paren left-brace right-brace
comma dot minus plus semi-colon star bang-equal bang equal-equal equal
less-equal less greater-equal greater slash identifier fun))
(defun parse-number (str)
(with-input-from-string (stream str) (read stream)))
(defun valid-identifier (c)
(or (alphanumericp c) (eq c #\_)))
(defparameter *keywords* (make-hash-table :test #'equal))
(setf (gethash "and" *keywords*) 'and)
(setf (gethash "class" *keywords*) 'class)
(setf (gethash "else" *keywords*) 'else)
(setf (gethash "false" *keywords*) 'false)
(setf (gethash "for" *keywords*) 'for)
(setf (gethash "fun" *keywords*) 'fun)
(setf (gethash "if" *keywords*) 'if)
(setf (gethash "null" *keywords*) 'null)
(setf (gethash "or" *keywords*) 'or)
(setf (gethash "print" *keywords*) 'print)
(setf (gethash "return" *keywords*) 'return)
(setf (gethash "super" *keywords*) 'super)
(setf (gethash "this" *keywords*) 'this)
(setf (gethash "true" *keywords*) 'true)
(setf (gethash "var" *keywords*) 'var)
(setf (gethash "while" *keywords*) 'while)
(defun scan-tokens (chars &optional tokens)
(if (null chars)
(if (or (null tokens) (not (eq (token-type (car tokens)) 'eof)))
(reverse (push (create-eof) tokens))
(reverse tokens))
(multiple-value-bind (new-chars token) (scan-token chars)
(scan-tokens new-chars (push token tokens)))))
(defun scan-token (chars &optional current-lexeme lexeme-type)
(let ((c (car chars)))
(cond
((and (null c) (eq lexeme-type 'string)) (error "Unterminated string."))
((null c) (values nil (create-eof)))
((and (eq c #\NewLine ) (eq lexeme-type 'comment)) (scan-token (cdr chars) nil nil))
((eq lexeme-type 'comment) (scan-token (cdr chars) nil 'comment))
((and (eq lexeme-type 'string) (eq c #\")) (values (cdr chars) (create-token 'string current-lexeme nil 0)))
((eq lexeme-type 'string) (scan-token (cdr chars) (concatenate 'string current-lexeme (string c)) 'string))
((or (alpha-char-p c) (and (valid-identifier c) (eq lexeme-type 'identifier)))
(let ((updated-lexeme (concatenate 'string current-lexeme (string c))))
(if (valid-identifier (cadr chars))
(scan-token (cdr chars) updated-lexeme 'identifier)
(let ((keyword (gethash updated-lexeme *keywords*)))
(values (cdr chars) (create-token (if keyword keyword 'identifier) updated-lexeme nil 0))))))
; TODO: Fix this when the last character is a number.
((digit-char-p c)
(let ((updated-lexeme (concatenate 'string current-lexeme (string c))))
(if (or (digit-char-p (cadr chars)) (and (eq (cadr chars) #\.) (eq lexeme-type 'integer)))
; This if statement insures that if we are in 'float mode we won't overwrite it with 'integer
(scan-token (cdr chars) updated-lexeme (if (not lexeme-type) 'integer lexeme-type))
; There might be a problem with text like 123abc. How should this be handled?
(values (cdr chars) (create-number (parse-number updated-lexeme))))))
((and (eq c #\.) (eq lexeme-type 'integer)) (scan-token (cdr chars) (concatenate 'string current-lexeme (string c)) 'float))
((or (eq c #\NewLine ) (eq c #\Space)) (scan-token (cdr chars)))
((eq c #\" ) (scan-token (cdr chars) "" 'string))
((eq c #\( ) (values (cdr chars) (create-left-paren (string c))))
((eq c #\) ) (values (cdr chars) (create-right-paren (string c))))
((eq c #\{ ) (values (cdr chars) (create-left-brace (string c))))
((eq c #\} ) (values (cdr chars) (create-right-brace (string c))))
((eq c #\, ) (values (cdr chars) (create-comma (string c))))
((eq c #\. ) (values (cdr chars) (create-dot (string c))))
((eq c #\- ) (values (cdr chars) (create-minus (string c))))
((eq c #\+ ) (values (cdr chars) (create-plus (string c))))
((eq c #\; ) (values (cdr chars) (create-semi-colon (string c))))
((eq c #\* ) (values (cdr chars) (create-star (string c))))
((eq c #\! ) (if (eq (cadr chars) #\=)
(values (cddr chars) (create-bang-equal "!="))
(values (cdr chars) (create-bang (string c)))))
((eq c #\= ) (if (eq (cadr chars) #\=)
(values (cddr chars) (create-equal-equal "=="))
(values (cdr chars) (create-equal (string c)))))
((eq c #\< ) (if (eq (cadr chars) #\=)
(values (cddr chars) (create-less-equal "<="))
(values (cdr chars) (create-less (string c)))))
((eq c #\> ) (if (eq (cadr chars) #\=)
(values (cddr chars) (create-greater-equal ">="))
(values (cdr chars) (create-greater (string c)))))
((eq c #\/ ) (if (eq (cadr chars) #\/)
(scan-token (cddr chars) nil 'comment)
(values (cdr chars) (create-slash (string c)))))
(t (error "Unexpected character ~C" c)))))
(let ((pack (find-package :lexer)))
(do-all-symbols (sym pack) (when (eql (symbol-package sym) pack) (export sym))))