From 6fc14d8c4415be9f739912490d24f16b943efbec Mon Sep 17 00:00:00 2001 From: LunaStev Date: Wed, 24 Dec 2025 21:23:55 +0900 Subject: [PATCH] refactor: reorganize expression parser with proper precedence hierarchy Restructure expression parsing to follow standard C operator precedence with dedicated functions for each precedence level. Changes: - Implement complete operator precedence hierarchy: 1. Assignment (=, +=, -=, *=, /=) 2. Logical OR (||) 3. Logical AND (&&) 4. Bitwise OR (|) 5. Bitwise XOR (^) 6. Bitwise AND (&) 7. Equality (==, !=) 8. Relational (<, <=, >, >=) 9. Shift (<<, >>) 10. Additive (+, -) 11. Multiplicative (*, /, %) 12. Unary (!, ~, &, deref) 13. Primary (literals, identifiers, function calls) - Add binary literal support in lexer: - Parse 0b prefix for binary numbers (0b1010, 0b0101) - Convert binary strings to i64 using from_str_radix - Format lexeme as "0b{binary_digits}" - Move shift operators (<<, >>) to character-level parsing: - Parse << and >> directly in lexer char matching - Remove from identifier-based keyword matching - Check for << before <= in '<' handler - Check for >> before >= in '>' handler - Refactor unary expression parsing: - Move all prefix operators to parse_unary_expression - Handle !, ~, &, deref in single dedicated function - Parse unary operators recursively (e.g., !!x, ~!x) - Fix logical operator code generation: - Add to_bool() helper for boolean coercion - Convert integer values to i1 before logical AND/OR - Handle i1 types without unnecessary conversions - Improve bitwise operator codegen: - Add missing BitwiseAnd, BitwiseOr, BitwiseXor implementations - Generate and, or, xor LLVM instructions - Properly handle operator in binary expression match - Fix shift operation type casting: - Cast shift amount to match shifted value type - Prevent type mismatch errors in build_left_shift/build_right_shift - Use build_int_cast for explicit type conversion - Enhance unary NOT operators: - LogicalNot (!): Compare with zero for multi-bit integers - BitwiseNot (~): Use LLVM's build_not instruction - Handle i1 boolean types specially in logical NOT - Add Operator::Not variant to AST for consistency - Add Expression::Grouped for parenthesized expressions - Simplify parser function signatures with std::iter::Peekable Benefits: - Correct operator precedence matching C/C++ standards - Clear separation of concerns in parsing logic - Easier to maintain and extend with new operators - Proper type handling in all binary/unary operations Example precedence: a + b * c // * before + a << 2 + 1 // + before a & b == c // == before & !a || b && c // ! > && > || Signed-off-by: LunaStev --- front/lexer/src/lexer/lexer.rs | 43 +- front/parser/src/parser/ast.rs | 1 + front/parser/src/parser/format.rs | 402 ++++++++++-------- .../src/llvm_temporary/expression.rs | 103 +++-- 4 files changed, 331 insertions(+), 218 deletions(-) diff --git a/front/lexer/src/lexer/lexer.rs b/front/lexer/src/lexer/lexer.rs index 16846280..c201809e 100644 --- a/front/lexer/src/lexer/lexer.rs +++ b/front/lexer/src/lexer/lexer.rs @@ -306,7 +306,13 @@ impl<'a> Lexer<'a> { line: self.line, }, '<' => { - if self.match_next('=') { + if self.match_next('<') { + Token { + token_type: TokenType::Rol, + lexeme: "<<".to_string(), + line: self.line, + } + } else if self.match_next('=') { Token { token_type: TokenType::LchevrEq, lexeme: "<=".to_string(), @@ -321,7 +327,13 @@ impl<'a> Lexer<'a> { } } '>' => { - if self.match_next('=') { + if self.match_next('>') { + Token { + token_type: TokenType::Ror, + lexeme: ">>".to_string(), + line: self.line, + } + } else if self.match_next('=') { Token { token_type: TokenType::RchevrEq, lexeme: ">=".to_string(), @@ -586,16 +598,6 @@ impl<'a> Lexer<'a> { lexeme: "asm".to_string(), line: self.line, }, - "<<" => Token { - token_type: TokenType::Rol, - lexeme: "<<".to_string(), - line: self.line, - }, - ">>" => Token { - token_type: TokenType::Ror, - lexeme: ">>".to_string(), - line: self.line, - }, "xnand" => Token { token_type: TokenType::Xnand, lexeme: "xnand".to_string(), @@ -784,6 +786,23 @@ impl<'a> Lexer<'a> { } } '0'..='9' => { + if c == '0' && (self.peek() == 'b' || self.peek() == 'B') { + self.advance(); // consume 'b' or 'B' + + let mut bin_str = String::new(); + while self.peek() == '0' || self.peek() == '1' { + bin_str.push(self.advance()); + } + + let value = i64::from_str_radix(&bin_str, 2).unwrap_or(0); + + return Token { + token_type: TokenType::Number(value), + lexeme: format!("0b{}", bin_str), + line: self.line, + } + } + if c == '0' && (self.peek() == 'x' || self.peek() == 'X') { self.advance(); // consume 'x' or 'X' diff --git a/front/parser/src/parser/ast.rs b/front/parser/src/parser/ast.rs index 71b34a30..fc687a80 100644 --- a/front/parser/src/parser/ast.rs +++ b/front/parser/src/parser/ast.rs @@ -161,6 +161,7 @@ pub enum Operator { BitwiseXor, LogicalNot, BitwiseNot, + Not, } #[derive(Debug, Clone)] diff --git a/front/parser/src/parser/format.rs b/front/parser/src/parser/format.rs index 8fcc665d..36891217 100644 --- a/front/parser/src/parser/format.rs +++ b/front/parser/src/parser/format.rs @@ -33,64 +33,18 @@ pub fn parse_format_string(s: &str) -> Vec { parts } -pub fn parse_expression<'a, T>(tokens: &mut Peekable) -> Option +pub fn parse_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { - if let Some(Token { - token_type: TokenType::Not, - .. - }) = tokens.peek() - { - tokens.next(); - let inner = parse_expression(tokens)?; - return Some(Expression::Unary { - operator: Operator::LogicalNot, - expr: Box::new(inner), - }); - } - - if let Some(Token { - token_type: TokenType::BitwiseNot, - .. - }) = tokens.peek() - { - tokens.next(); - let inner = parse_expression(tokens)?; - return Some(Expression::Unary { - operator: Operator::BitwiseNot, - expr: Box::new(inner), - }); - } - - if let Some(Token { - token_type: TokenType::AddressOf, - .. - }) = tokens.peek() - { - tokens.next(); // consume '&' - let inner = parse_expression(tokens)?; - return Some(Expression::AddressOf(Box::new(inner))); - } - - if let Some(Token { - token_type: TokenType::Deref, - .. - }) = tokens.peek() - { - tokens.next(); // consume 'deref' - let inner = parse_expression(tokens)?; - return Some(Expression::Deref(Box::new(inner))); - } - let expr = parse_assignment_expression(tokens)?; - Some(expr) + parse_assignment_expression(tokens) } -pub fn parse_assignment_expression<'a, T>(tokens: &mut Peekable) -> Option +pub fn parse_assignment_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { - let left = parse_logical_expression(tokens)?; + let left = parse_logical_or_expression(tokens)?; if let Some(token) = tokens.peek() { let op = match token.token_type { @@ -103,9 +57,9 @@ where _ => return Some(left), }; - tokens.next(); // consume +=, -= + tokens.next(); // consume op - let right = parse_logical_expression(tokens)?; + let right = parse_assignment_expression(tokens)?; return Some(Expression::AssignOperation { target: Box::new(left), operator: op, @@ -116,130 +70,262 @@ where Some(left) } -pub fn parse_logical_expression<'a, T>(tokens: &mut Peekable) -> Option +fn parse_logical_or_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_logical_and_expression(tokens)?; + + while matches!(tokens.peek().map(|t| &t.token_type), Some(TokenType::LogicalOr)) { + tokens.next(); + let right = parse_logical_and_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: Operator::LogicalOr, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_logical_and_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_bitwise_or_expression(tokens)?; + + while matches!(tokens.peek().map(|t| &t.token_type), Some(TokenType::LogicalAnd)) { + tokens.next(); + let right = parse_bitwise_or_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: Operator::LogicalAnd, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_bitwise_or_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_bitwise_xor_expression(tokens)?; + + while matches!(tokens.peek().map(|t| &t.token_type), Some(TokenType::BitwiseOr)) { + tokens.next(); + let right = parse_bitwise_xor_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: Operator::BitwiseOr, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_bitwise_xor_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_bitwise_and_expression(tokens)?; + + while matches!(tokens.peek().map(|t| &t.token_type), Some(TokenType::Xor)) { + tokens.next(); + let right = parse_bitwise_and_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: Operator::BitwiseXor, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_bitwise_and_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { - let mut left = parse_bitwise_expression(tokens)?; + let mut left = parse_equality_expression(tokens)?; + + while matches!(tokens.peek().map(|t| &t.token_type), Some(TokenType::AddressOf)) { + tokens.next(); + let right = parse_equality_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: Operator::BitwiseAnd, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_equality_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_relational_expression(tokens)?; while let Some(token) = tokens.peek() { - match token.token_type { - TokenType::LogicalAnd | TokenType::LogicalOr => { - let op = match token.token_type { - TokenType::LogicalAnd => Operator::LogicalAnd, - TokenType::LogicalOr => Operator::LogicalOr, - _ => unreachable!(), - }; - tokens.next(); + let op = match token.token_type { + TokenType::EqualTwo => Operator::Equal, + TokenType::NotEqual => Operator::NotEqual, + _ => break, + }; + tokens.next(); + let right = parse_relational_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: op, + right: Box::new(right), + }; + } - let right = parse_relational_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; - } + Some(left) +} + +pub fn parse_relational_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + let mut left = parse_shift_expression(tokens)?; + + while let Some(token) = tokens.peek() { + let op = match token.token_type { + TokenType::Rchevr => Operator::Greater, + TokenType::RchevrEq => Operator::GreaterEqual, + TokenType::Lchevr => Operator::Less, + TokenType::LchevrEq => Operator::LessEqual, _ => break, - } + }; + tokens.next(); + let right = parse_shift_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: op, + right: Box::new(right), + }; } + Some(left) } -pub fn parse_relational_expression<'a, T>(tokens: &mut Peekable) -> Option +pub fn parse_shift_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { let mut left = parse_additive_expression(tokens)?; while let Some(token) = tokens.peek() { - match token.token_type { - TokenType::EqualTwo - | TokenType::NotEqual - | TokenType::Rchevr - | TokenType::RchevrEq - | TokenType::Lchevr - | TokenType::LchevrEq => { - let op = match token.token_type { - TokenType::EqualTwo => Operator::Equal, - TokenType::NotEqual => Operator::NotEqual, - TokenType::Rchevr => Operator::Greater, - TokenType::RchevrEq => Operator::GreaterEqual, - TokenType::Lchevr => Operator::Less, - TokenType::LchevrEq => Operator::LessEqual, - _ => unreachable!(), - }; - tokens.next(); - - let right = parse_additive_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; - } + let op = match token.token_type { + TokenType::Rol => Operator::ShiftLeft, + TokenType::Ror => Operator::ShiftRight, _ => break, - } + }; + + tokens.next(); + let right = parse_additive_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: op, + right: Box::new(right), + }; } + Some(left) } -pub fn parse_additive_expression<'a, T>(tokens: &mut Peekable) -> Option +pub fn parse_additive_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { let mut left = parse_multiplicative_expression(tokens)?; while let Some(token) = tokens.peek() { - match token.token_type { - TokenType::Plus | TokenType::Minus => { - let op = match token.token_type { - TokenType::Plus => Operator::Add, - TokenType::Minus => Operator::Subtract, - _ => unreachable!(), - }; - tokens.next(); - - let right = parse_multiplicative_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; - } + let op = match token.token_type { + TokenType::Plus => Operator::Add, + TokenType::Minus => Operator::Subtract, _ => break, - } + }; + tokens.next(); + let right = parse_multiplicative_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: op, + right: Box::new(right), + }; } + Some(left) } -pub fn parse_multiplicative_expression<'a, T>(tokens: &mut Peekable) -> Option +pub fn parse_multiplicative_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option where T: Iterator, { - let mut left = parse_primary_expression(tokens)?; + let mut left = parse_unary_expression(tokens)?; while let Some(token) = tokens.peek() { + let op = match token.token_type { + TokenType::Star => Operator::Multiply, + TokenType::Div => Operator::Divide, + TokenType::Remainder => Operator::Remainder, + _ => break, + }; + tokens.next(); + let right = parse_unary_expression(tokens)?; + left = Expression::BinaryExpression { + left: Box::new(left), + operator: op, + right: Box::new(right), + }; + } + + Some(left) +} + +fn parse_unary_expression<'a, T>(tokens: &mut std::iter::Peekable) -> Option +where + T: Iterator, +{ + if let Some(token) = tokens.peek() { match token.token_type { - TokenType::Star | TokenType::Div | TokenType::Remainder => { - let op = match token.token_type { - TokenType::Star => Operator::Multiply, - TokenType::Div => Operator::Divide, - TokenType::Remainder => Operator::Remainder, - _ => unreachable!(), - }; + TokenType::Not => { tokens.next(); - - let right = parse_primary_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; + let inner = parse_unary_expression(tokens)?; + return Some(Expression::Unary { + operator: Operator::Not, + expr: Box::new(inner), + }); } - _ => break, + TokenType::BitwiseNot => { + tokens.next(); + let inner = parse_unary_expression(tokens)?; + return Some(Expression::Unary { + operator: Operator::BitwiseNot, + expr: Box::new(inner), + }); + } + TokenType::AddressOf => { + tokens.next(); + let inner = parse_unary_expression(tokens)?; + return Some(Expression::AddressOf(Box::new(inner))); + } + TokenType::Deref => { + tokens.next(); + let inner = parse_unary_expression(tokens)?; + return Some(Expression::Deref(Box::new(inner))); + } + _ => {} } } - Some(left) + + parse_primary_expression(tokens) } pub fn parse_primary_expression<'a, T>(tokens: &mut Peekable) -> Option @@ -756,52 +842,4 @@ where None } } -} - -pub fn parse_shift_expression<'a, T>(tokens: &mut Peekable) -> Option -where - T: Iterator, -{ - let mut left = parse_relational_expression(tokens)?; - - while let Some(token) = tokens.peek() { - let op = match token.token_type { - TokenType::Rol => Operator::ShiftLeft, - TokenType::Ror => Operator::ShiftRight, - _ => break, - }; - - tokens.next(); - let right = parse_relational_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; - } - Some(left) -} - -pub fn parse_bitwise_expression<'a, T>(tokens: &mut Peekable) -> Option -where - T: Iterator, -{ - let mut left = parse_shift_expression(tokens)?; - - while let Some(token) = tokens.peek() { - let op = match token.token_type { - TokenType::BitwiseOr => Operator::BitwiseOr, - TokenType::Xor => Operator::BitwiseXor, - _ => break, - }; - - tokens.next(); - let right = parse_shift_expression(tokens)?; - left = Expression::BinaryExpression { - left: Box::new(left), - operator: op, - right: Box::new(right), - }; - } - Some(left) -} +} \ No newline at end of file diff --git a/llvm_temporary/src/llvm_temporary/expression.rs b/llvm_temporary/src/llvm_temporary/expression.rs index ab13a665..182145cf 100644 --- a/llvm_temporary/src/llvm_temporary/expression.rs +++ b/llvm_temporary/src/llvm_temporary/expression.rs @@ -1,10 +1,11 @@ use crate::llvm_temporary::llvm_codegen::{generate_address_ir, VariableInfo}; use inkwell::context::Context; use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, StructType}; -use inkwell::values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum}; +use inkwell::values::{BasicMetadataValueEnum, BasicValue, BasicValueEnum, IntValue}; use inkwell::{FloatPredicate, IntPredicate}; use parser::ast::{ASTNode, AssignOperator, Expression, Literal, Operator, WaveType}; use std::collections::HashMap; +use inkwell::builder::Builder; pub struct ProtoInfo<'ctx> { pub vtable_ty: StructType<'ctx>, @@ -12,6 +13,18 @@ pub struct ProtoInfo<'ctx> { pub methods: Vec, } +fn to_bool<'ctx>(builder: &Builder<'ctx>, v: IntValue<'ctx>) -> IntValue<'ctx> { + if v.get_type().get_bit_width() == 1 { + return v; + } + + let zero = v.get_type().const_zero(); + builder + .build_int_compare(IntPredicate::NE, v, zero, "tobool") + .unwrap() +} + + pub fn generate_expression_ir<'ctx>( context: &'ctx Context, builder: &'ctx inkwell::builder::Builder<'ctx>, @@ -556,16 +569,28 @@ pub fn generate_expression_ir<'ctx>( let l_type = l.get_type(); let r_type = r.get_type(); - let (l_casted, r_casted) = if l_type != r_type { - if l_type.get_bit_width() < r_type.get_bit_width() { - let new_l = builder.build_int_z_extend(l, r_type, "zext_l").unwrap(); - (new_l, r) - } else { - let new_r = builder.build_int_z_extend(r, l_type, "zext_r").unwrap(); - (l, new_r) + let (l_casted, r_casted) = match operator { + Operator::ShiftLeft | Operator::ShiftRight => { + let r2 = if r_type != l_type { + builder.build_int_cast(r, l_type, "shamt").unwrap() + } else { + r + }; + (l, r2) + } + _ => { + if l_type != r_type { + if l_type.get_bit_width() < r_type.get_bit_width() { + let new_l = builder.build_int_z_extend(l, r_type, "zext_l").unwrap(); + (new_l, r) + } else { + let new_r = builder.build_int_z_extend(r, l_type, "zext_r").unwrap(); + (l, new_r) + } + } else { + (l, r) + } } - } else { - (l, r) }; let mut result = match operator { @@ -582,6 +607,9 @@ pub fn generate_expression_ir<'ctx>( Operator::ShiftRight => { builder.build_right_shift(l_casted, r_casted, true, "shr") } + Operator::BitwiseAnd => builder.build_and(l_casted, r_casted, "andtmp"), + Operator::BitwiseOr => builder.build_or(l_casted, r_casted, "ortmp"), + Operator::BitwiseXor => builder.build_xor(l_casted, r_casted, "xortmp"), Operator::Greater => builder.build_int_compare( IntPredicate::SGT, l_casted, @@ -618,17 +646,24 @@ pub fn generate_expression_ir<'ctx>( r_casted, "cmptmp", ), + Operator::LogicalAnd => { + let lb = to_bool(builder, l_casted); + let rb = to_bool(builder, r_casted); + builder.build_and(lb, rb, "land") + } + Operator::LogicalOr => { + let lb = to_bool(builder, l_casted); + let rb = to_bool(builder, r_casted); + builder.build_or(lb, rb, "lor") + } _ => panic!("Unsupported binary operator"), } .unwrap(); if let Some(BasicTypeEnum::IntType(target_ty)) = expected_type { let result_ty = result.get_type(); - if result_ty != target_ty { - result = builder - .build_int_cast(result, target_ty, "cast_result") - .unwrap(); + result = builder.build_int_cast(result, target_ty, "cast_result").unwrap(); } } @@ -1042,18 +1077,24 @@ pub fn generate_expression_ir<'ctx>( match (operator, val) { // ! (logical not) - (Operator::LogicalNot, BasicValueEnum::IntValue(v)) => { - let one = v.get_type().const_int(1, false); - builder - .build_xor(v, one, "logical_not") - .unwrap() - .as_basic_value_enum() + (Operator::LogicalNot, BasicValueEnum::IntValue(iv)) + | (Operator::Not, BasicValueEnum::IntValue(iv)) => { + let bw = iv.get_type().get_bit_width(); + if bw == 1 { + builder.build_not(iv, "lnot").unwrap().as_basic_value_enum() + } else { + let zero = iv.get_type().const_zero(); + builder + .build_int_compare(IntPredicate::EQ, iv, zero, "lnot") + .unwrap() + .as_basic_value_enum() + } } - (Operator::BitwiseNot, BasicValueEnum::IntValue(v)) => builder - .build_not(v, "bitwise_not") - .unwrap() - .as_basic_value_enum(), + // ~ (bitwise not) + (Operator::BitwiseNot, BasicValueEnum::IntValue(iv)) => { + builder.build_not(iv, "bnot").unwrap().as_basic_value_enum() + } _ => panic!( "Unsupported unary operator {:?} for value {:?}", @@ -1062,6 +1103,20 @@ pub fn generate_expression_ir<'ctx>( } } + Expression::Grouped(inner) => { + generate_expression_ir( + context, + builder, + inner, + variables, + module, + expected_type, + global_consts, + struct_types, + struct_field_indices, + ) + } + _ => unimplemented!("Unsupported expression type"), } }