mathpy/mathpy_lex.py at master · sharifnasser/mathpy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#--------------------------------------
# Programming Language: mathpy.py
# Orientation: numerical calculations
# Author: Sharif Nasser Kadamani
# ID: A00820367
# This is the lexer with python lex
#--------------------------------------
import ply.lex as lex

# List of tokens
tokens = [
	'ID', 'STRING', 'FLOAT', 'INT',
	'PLUS', 'MINUS', 'TIMES', 'DIVIDE',
	'ASSIGN', 'LPAREN', 'RPAREN',
	'EQ', 'NEQ', 'LT', 'LTE', 'GT', 'GTE',
	'DOT', 'COMMA', 'COLON', 'SEMICOLON', 'COMMENT'
]

reserved = {
	'program' : 'PROGRAM',
	'end' : 'END',
	'integer' : 'INTEGER',
	'real' : 'REAL',
	'subroutine' : 'SUBROUTINE',
	'if' : 'IF',
	'then' : 'THEN',
	'else' : 'ELSE',
	'do' : 'DO',
	'exit' : 'EXIT',
	'read' : 'READ',
	'print' : 'PRINT',
	'or' : 'OR',
	'and' : 'AND'
}

tokens = tokens + list(reserved.values())

# Regular expressions for simple tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_ASSIGN = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_EQ = r'=='
t_NEQ = r'/='
t_LT = r'<'
t_LTE = r'<='
t_GT = r'>'
t_GTE = r'>='
t_DOT = r'\.'
t_COMMA = r','
t_COLON = r':'
t_SEMICOLON = r';'

# Ignored characters (spaces and tabs)
t_ignore = ' \t'

# Regular expression rules definition
def t_COMMENT(t):
	r'\!.*'
	pass
	# No return value. Token discarded

def t_ID(t):
	r'[a-zA-Z][a-zA-Z0-9_]*'
	t.type = reserved.get(t.value, 'ID') # Check for reserved
	return t

def t_STRING(t):
	r'".*"'
	#r'[\"][a-zA-Z_]*[a-zA-Z0-9_ ]*[\"]'
	return t

def t_FLOAT(t):
	r'\d+\.\d+'
	t.value = float(t.value)
	return t

def t_INT(t):
	r'\d+'
	t.value = int(t.value)
	return t

# Compute column whit the input text string and a token instance
def find_column(input, token):
	line_start = input.rfind('\n', 0, token.lexpos) + 1
	return(token.lexpos - line_start) + 1

# Rule to track line numbers
def t_newline(t):
	r'\n+'
	t.lexer.lineno += len(t.value)

# Error handling rule
def t_error(t):
	print("Illegal character '%s'" % t.value[0])
	t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()

"""
# Test the lexer
while True:
	# Give lexer some input
	lexer.input(input(''))
	# Tokenize
	while True:
		tok = lexer.token()
		if not tok:
			break	# No more input
		print(tok)
"""