Skip to content

Commit 40e452b

Browse files
Implemented basic parser.
Now you can compile EXSES on linux-x86_64.
1 parent dacc860 commit 40e452b

File tree

6 files changed

+359
-17
lines changed

6 files changed

+359
-17
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ add_executable(
1919
src/Token.cpp
2020
src/Value.cpp
2121
src/Position.cpp
22+
src/Parser.cpp
2223
)
2324

2425
target_include_directories(exsi PUBLIC src)

src/Lexer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <string>
99
#include <vector>
1010

11+
#include <Parser.hpp>
1112
#include <Procedure.hpp>
1213
#include <Token.hpp>
1314
#include <Value.hpp>
@@ -253,6 +254,8 @@ void Lexer::run()
253254
}
254255
#endif
255256

257+
Parser parser(this->target, this->fileName, this->program);
258+
256259
// if (this->target == EXSI) {
257260
// this->intrepret();
258261
// } else {

src/Lexer.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
#include <Token.hpp>
1111

1212
enum Target {
13-
EXSI
13+
EXSI,
14+
NASM_LINUX_X86_64,
15+
NASM_WIN32, // NOTE: Not supported
16+
#ifdef SUPPORT_LLVM
17+
LLVM, // NOTE: Not supported
18+
#endif
1419
};
1520

1621
class Lexer {

src/Parser.cpp

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
#include <Parser.hpp>
2+
3+
#include <Lexer.hpp>
4+
5+
#include <format>
6+
#include <fstream>
7+
#include <iostream>
8+
#include <regex>
9+
10+
#define CMD(command) \
11+
do { \
12+
std::cout << "COMMAND: " << (command) << '\n'; \
13+
std::system((command).c_str()); \
14+
} while (0)
15+
16+
void Parser::parse()
17+
{
18+
switch (this->target) {
19+
case EXSI:
20+
std::cout << "ERROR: Intrepreting is not supported!\n";
21+
std::exit(1);
22+
break;
23+
case NASM_LINUX_X86_64:
24+
this->compileToNasmLinux86_64();
25+
break;
26+
case NASM_WIN32:
27+
std::cout << "ERROR: Compiling to nasm for Win32 is not supported!\n";
28+
std::exit(1);
29+
break;
30+
#ifdef SUPPORT_LLVM
31+
case LLVM:
32+
std::cout << "ERROR: Compiling to LLVM IR is not supported!\n";
33+
std::exit(1);
34+
break;
35+
#endif
36+
}
37+
}
38+
39+
void Parser::compileToNasmLinux86_64()
40+
{
41+
std::string outputFilepath(std::regex_replace(inputFileName, std::regex("\\.xes$"), ".asm"));
42+
std::string output("BITS 64\n"
43+
"extern printf\n"
44+
"global _start\n"
45+
"section .text\n"
46+
"_start:\n");
47+
48+
std::cout << "INFO: Generating " << outputFilepath << '\n';
49+
50+
for (; ip < this->program.size(); ip++) {
51+
Token token = this->program[ip];
52+
53+
switch (token.type) {
54+
case PUSH: {
55+
output.append(std::format("addr_{}: ;; {}: PUSH {}\n", ip, token.pos.toString(), std::stoi(token.value.text)));
56+
output.append(std::format(" push {}\n", token.value.text));
57+
} break;
58+
case STRING: {
59+
std::cout << "Strings is not implemented in nasm-linux-x86_64!\n";
60+
std::exit(1);
61+
} break;
62+
case STRING_PLUS: {
63+
std::cout << "Strings is not implemented in nasm-linux-x86_64!\n";
64+
std::exit(1);
65+
} break;
66+
case DUP: {
67+
output.append(std::format("addr_{}: ;; {}: DUP\n", ip, token.pos.toString()));
68+
output.append(" pop rax\n");
69+
output.append(" push rax\n");
70+
output.append(" push rax\n");
71+
} break;
72+
case OVER: {
73+
output.append(std::format("addr_{}: ;; {}: OVER\n", ip, token.pos.toString()));
74+
output.append(" pop rax\n");
75+
output.append(" pop rbx\n");
76+
output.append(" push rbx\n");
77+
output.append(" push rax\n");
78+
output.append(" push rbx\n");
79+
} break;
80+
case DROP: {
81+
output.append(std::format("addr_{}: ;; {}: DROP\n", ip, token.pos.toString()));
82+
output.append(" pop rax\n");
83+
} break;
84+
case SWAP: {
85+
output.append(std::format("addr_{}: ;; {}: SWAP\n", ip, token.pos.toString()));
86+
output.append(" pop rax\n");
87+
output.append(" pop rbx\n");
88+
output.append(" push rax\n");
89+
output.append(" push rbx\n");
90+
} break;
91+
case PLUS: {
92+
output.append(std::format("addr_{}: ;; {}: PLUS\n", ip, token.pos.toString()));
93+
output.append(" pop rax\n");
94+
output.append(" pop rbx\n");
95+
output.append(" add rbx, rax\n");
96+
output.append(" push rbx\n");
97+
} break;
98+
case MINUS: {
99+
output.append(std::format("addr_{}: ;; {}: MINUS\n", ip, token.pos.toString()));
100+
output.append(" pop rax\n");
101+
output.append(" pop rbx\n");
102+
output.append(" sub rbx, rax\n");
103+
output.append(" push rbx\n");
104+
} break;
105+
case MULT: {
106+
output.append(std::format("addr_{}: ;; {}: MINUS\n", ip, token.pos.toString()));
107+
output.append(" pop rax\n");
108+
output.append(" pop rbx\n");
109+
output.append(" mul rbx\n");
110+
output.append(" push rax\n");
111+
} break;
112+
case DIV: {
113+
output.append(std::format("addr_{}: ;; {}: DIV\n", ip, token.pos.toString()));
114+
} break;
115+
case DUMP: {
116+
output.append(std::format("addr_{}: ;; {}: DUMP\n", ip, token.pos.toString()));
117+
output.append(" pop rsi\n");
118+
output.append(" mov rdi, numPrintfFmt\n");
119+
output.append(" xor eax, eax\n");
120+
output.append(" call printf\n");
121+
} break;
122+
case INPUT: {
123+
output.append(std::format("addr_{}: ;; {}: INPUT\n", ip, token.pos.toString()));
124+
} break;
125+
case BIND: {
126+
output.append(std::format("addr_{}: ;; {}: BIND\n", ip, token.pos.toString()));
127+
} break;
128+
case SAVE: {
129+
output.append(std::format("addr_{}: ;; {}: SAVE\n", ip, token.pos.toString()));
130+
} break;
131+
case LOAD: {
132+
output.append(std::format("addr_{}: ;; {}: LOAD\n", ip, token.pos.toString()));
133+
} break;
134+
case TERNARY: {
135+
output.append(std::format("addr_{}: ;; {}: TERNARY\n", ip, token.pos.toString()));
136+
} break;
137+
case MAKEPROC: {
138+
output.append(std::format("addr_{}: ;; {}: MAKEPROC\n", ip, token.pos.toString()));
139+
} break;
140+
case ENDPROC: {
141+
output.append(std::format("addr_{}: ;; {}: ENDPROC\n", ip, token.pos.toString()));
142+
} break;
143+
case INVOKEPROC: {
144+
output.append(std::format("addr_{}: ;; {}: INVOKEPROC\n", ip, token.pos.toString()));
145+
} break;
146+
case IF: {
147+
output.append(std::format("addr_{}: ;; {}: IF\n", ip, token.pos.toString()));
148+
} break;
149+
case ENDIF: {
150+
output.append(std::format("addr_{}: ;; {}: ENDIF\n", ip, token.pos.toString()));
151+
} break;
152+
case EQUAL: {
153+
output.append(std::format("addr_{}: ;; {}: EQUAL\n", ip, token.pos.toString()));
154+
output.append(" mov rcx, 0\n");
155+
output.append(" mov rdx, 1\n");
156+
output.append(" pop rax\n");
157+
output.append(" pop rbx\n");
158+
output.append(" cmp rax, rbx\n");
159+
output.append(" cmove rcx, rdx\n");
160+
output.append(" push rcx\n");
161+
} break;
162+
case NOTEQUAL: {
163+
output.append(std::format("addr_{}: ;; {}: NOTEQUAL\n", ip, token.pos.toString()));
164+
output.append(" mov rcx, 0\n");
165+
output.append(" mov rdx, 1\n");
166+
output.append(" pop rax\n");
167+
output.append(" pop rbx\n");
168+
output.append(" cmp rax, rbx\n");
169+
output.append(" cmovne rcx, rdx\n");
170+
output.append(" push rcx\n");
171+
} break;
172+
case LESS: {
173+
output.append(std::format("addr_{}: ;; {}: LESS\n", ip, token.pos.toString()));
174+
output.append(" mov rcx, 0\n");
175+
output.append(" mov rdx, 1\n");
176+
output.append(" pop rax\n");
177+
output.append(" pop rbx\n");
178+
output.append(" cmp rax, rbx\n");
179+
output.append(" cmovl rcx, rdx\n");
180+
output.append(" push rcx\n");
181+
} break;
182+
case LESSEQUAL: {
183+
output.append(std::format("addr_{}: ;; {}: LESSEQUAL\n", ip, token.pos.toString()));
184+
output.append(" mov rcx, 0\n");
185+
output.append(" mov rdx, 1\n");
186+
output.append(" pop rax\n");
187+
output.append(" pop rbx\n");
188+
output.append(" cmp rax, rbx\n");
189+
output.append(" cmovle rcx, rdx\n");
190+
output.append(" push rcx\n");
191+
} break;
192+
case GREATER: {
193+
output.append(std::format("addr_{}: ;; {}: GREATER\n", ip, token.pos.toString()));
194+
output.append(" mov rcx, 0\n");
195+
output.append(" mov rdx, 1\n");
196+
output.append(" pop rax\n");
197+
output.append(" pop rbx\n");
198+
output.append(" cmp rax, rbx\n");
199+
output.append(" cmovg rcx, rdx\n");
200+
output.append(" push rcx\n");
201+
} break;
202+
case GREATEREQUAL: {
203+
output.append(std::format("addr_{}: ;; {}: GREATEREQUAL\n", ip, token.pos.toString()));
204+
output.append(" mov rcx, 0\n");
205+
output.append(" mov rdx, 1\n");
206+
output.append(" pop rax\n");
207+
output.append(" pop rbx\n");
208+
output.append(" cmp rax, rbx\n");
209+
output.append(" cmovge rcx, rdx\n");
210+
output.append(" push rcx\n");
211+
} break;
212+
case LOR: {
213+
output.append(std::format("addr_{}: ;; {}: LOR\n", ip, token.pos.toString()));
214+
output.append(" pop rax\n");
215+
output.append(" pop rbx\n");
216+
output.append(" or rbx, rax\n");
217+
output.append(" push rbx\n");
218+
} break;
219+
case LAND: {
220+
output.append(std::format("addr_{}: ;; {}: LAND\n", ip, token.pos.toString()));
221+
output.append(" pop rax\n");
222+
output.append(" pop rbx\n");
223+
output.append(" and rbx, rax\n");
224+
output.append(" push rbx\n");
225+
} break;
226+
case LNOT: {
227+
output.append(std::format("addr_{}: ;; {}: LNOT\n", ip, token.pos.toString()));
228+
output.append(" pop rax\n");
229+
output.append(" not rax\n");
230+
output.append(" push rax\n");
231+
} break;
232+
case UNDEFINED:
233+
case TRUE:
234+
case FALSE: {
235+
std::cout << "UwU EndeyshentWabs made a fucky wucky!! A wittle fucko boingo!\n";
236+
std::exit(69);
237+
} break;
238+
} // switch (token.type)
239+
} // for
240+
241+
output.append(" mov rax, 60\n");
242+
output.append(" mov rdi, 0\n");
243+
output.append(" syscall\n");
244+
245+
output.append("section .data\n");
246+
{
247+
// TODO: Populate the .data
248+
output.append(" numPrintfFmt: db '%u',0xA,0\n");
249+
}
250+
output.append("section .bss\n");
251+
{
252+
// TODO: Populate the .bss
253+
}
254+
255+
#if 1
256+
{
257+
std::ofstream outputStream(outputFilepath.c_str(), std::ios::out | std::ios::binary);
258+
if (!outputStream.is_open()) {
259+
std::cout << "No stream!\n";
260+
std::exit(2);
261+
} else {
262+
outputStream << output.c_str();
263+
}
264+
outputStream.close();
265+
}
266+
#endif
267+
268+
CMD(std::format("nasm -felf64 {}", outputFilepath));
269+
CMD(std::format("ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o {} {} -lc", std::regex_replace(outputFilepath, std::regex("\\.asm$"), ""), std::regex_replace(outputFilepath, std::regex("\\.asm$"), ".o")));
270+
}

src/Parser.hpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#ifndef EXSES_PARSER_H
2+
#define EXSES_PARSER_H
3+
4+
#include <vector>
5+
6+
#include <Token.hpp>
7+
#include <Lexer.hpp>
8+
9+
class Parser {
10+
public:
11+
Parser(Target target, std::string inputFileName, std::vector<Token> program)
12+
: target(target)
13+
, inputFileName(std::move(inputFileName))
14+
, program(std::move(program))
15+
{
16+
this->parse();
17+
}
18+
Parser(const Parser&) = default;
19+
Parser(Parser&&) = default;
20+
Parser& operator=(const Parser&) = default;
21+
Parser& operator=(Parser&&) = default;
22+
Target target;
23+
24+
void parse();
25+
26+
private:
27+
std::string inputFileName;
28+
unsigned int ip;
29+
std::vector<Token> program;
30+
31+
void compileToNasmLinux86_64();
32+
};
33+
34+
#endif /* EXSES_PARSER_H */

0 commit comments

Comments
 (0)