From 685d4969fac4150499584d87420667999591b805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20K=C3=A4chele?= Date: Tue, 14 Oct 2025 22:26:55 +0200 Subject: [PATCH 1/2] Use hb_string_T in lexer peek helper --- src/include/lexer_peek_helpers.h | 28 ++++++++-------- src/lexer_peek_helpers.c | 57 ++++++++++++++++---------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/include/lexer_peek_helpers.h b/src/include/lexer_peek_helpers.h index c89ca2e08..9cc7e94c9 100644 --- a/src/include/lexer_peek_helpers.h +++ b/src/include/lexer_peek_helpers.h @@ -20,25 +20,25 @@ typedef struct { lexer_state_T state; } lexer_state_snapshot_T; -char lexer_peek(const lexer_T* lexer, int offset); -bool lexer_peek_for_doctype(const lexer_T* lexer, int offset); -bool lexer_peek_for_xml_declaration(const lexer_T* lexer, int offset); -bool lexer_peek_for_cdata_start(const lexer_T* lexer, int offset); -bool lexer_peek_for_cdata_end(const lexer_T* lexer, int offset); +char lexer_peek(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_for_doctype(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_for_xml_declaration(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_for_cdata_start(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_for_cdata_end(const lexer_T* lexer, uint32_t offset); -bool lexer_peek_for_html_comment_start(const lexer_T* lexer, int offset); -bool lexer_peek_for_html_comment_end(const lexer_T* lexer, int offset); +bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset); -bool lexer_peek_erb_close_tag(const lexer_T* lexer, int offset); -bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, int offset); -bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, int offset); -bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, int offset); -bool lexer_peek_erb_end(const lexer_T* lexer, int offset); +bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset); +bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset); -char lexer_backtrack(const lexer_T* lexer, int offset); +char lexer_backtrack(const lexer_T* lexer, uint32_t offset); bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type); -bool lexer_peek_for_close_tag_start(const lexer_T* lexer, int offset); +bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset); lexer_state_snapshot_T lexer_save_state(lexer_T* lexer); void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot); diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 828d2f5fd..45ec78e99 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -3,73 +3,74 @@ #include "include/lexer_struct.h" #include "include/macros.h" #include "include/token.h" +#include "include/util/hb_string.h" #include #include -char lexer_backtrack(const lexer_T* lexer, const int offset) { +char lexer_backtrack(const lexer_T* lexer, uint32_t offset) { return lexer->source.data[MAX(lexer->current_position - offset, 0)]; } -char lexer_peek(const lexer_T* lexer, const int offset) { +char lexer_peek(const lexer_T* lexer, uint32_t offset) { return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)]; } -bool lexer_peek_for(const lexer_T* lexer, const int offset, const char* pattern, const bool case_insensitive) { - for (int index = 0; pattern[index]; index++) { +bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) { + for (uint32_t index = 0; index < pattern.length; index++) { const char character = lexer_peek(lexer, offset + index); if (case_insensitive) { - if (tolower(character) != tolower(pattern[index])) { return false; } + if (tolower(character) != tolower(pattern.data[index])) { return false; } } else { - if (character != pattern[index]) { return false; } + if (character != pattern.data[index]) { return false; } } } return true; } -bool lexer_peek_for_doctype(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "", false); +bool lexer_peek_for_cdata_end(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("]]>"), false); } -bool lexer_peek_for_html_comment_start(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "", false); +bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("-->"), false); } -bool lexer_peek_erb_close_tag(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "%>", false); +bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("%>"), false); } -bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "-%>", false); +bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("-%>"), false); } -bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "%%>", false); +bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("%%>"), false); } -bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, const int offset) { - return lexer_peek_for(lexer, offset, "=%>", false); +bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) { + return lexer_peek_for(lexer, offset, hb_string_from_c_string("=%>"), false); } -bool lexer_peek_erb_end(const lexer_T* lexer, const int offset) { +bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) { return ( lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset) || lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset) @@ -103,7 +104,7 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok return result; } -bool lexer_peek_for_close_tag_start(const lexer_T* lexer, const int offset) { +bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) { if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; } int pos = offset + 2; From 004212f769c3189ec91e4d583cc3ad72c905f7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20K=C3=A4chele?= Date: Fri, 17 Oct 2025 11:57:02 +0200 Subject: [PATCH 2/2] Refactor peek for method --- src/lexer_peek_helpers.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/lexer_peek_helpers.c b/src/lexer_peek_helpers.c index 45ec78e99..e5decf1d9 100644 --- a/src/lexer_peek_helpers.c +++ b/src/lexer_peek_helpers.c @@ -17,17 +17,14 @@ char lexer_peek(const lexer_T* lexer, uint32_t offset) { } bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) { - for (uint32_t index = 0; index < pattern.length; index++) { - const char character = lexer_peek(lexer, offset + index); - - if (case_insensitive) { - if (tolower(character) != tolower(pattern.data[index])) { return false; } - } else { - if (character != pattern.data[index]) { return false; } - } - } + hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset); + remaining_source.length = MIN(pattern.length, remaining_source.length); - return true; + if (case_insensitive) { + return hb_string_equals_case_insensitive(remaining_source, pattern); + } else { + return hb_string_equals(remaining_source, pattern); + } } bool lexer_peek_for_doctype(const lexer_T* lexer, uint32_t offset) {