Skip to content

Commit be2b356

Browse files
authored
C: Inline hot-path lexer peek helpers (#1319)
This pull request moves frequently called lexer peek functions to `static inline` functions in the header. The cold-path functions remain in the `lexer_peek_helpers.c` file.
1 parent feba674 commit be2b356

File tree

2 files changed

+97
-95
lines changed

2 files changed

+97
-95
lines changed

src/include/lexer_peek_helpers.h

Lines changed: 87 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
#define HERB_LEXER_PEEK_HELPERS_H
33

44
#include "lexer_struct.h"
5+
#include "macros.h"
56
#include "token_struct.h"
7+
#include "util/hb_string.h"
68

9+
#include <ctype.h>
710
#include <stdbool.h>
811
#include <stdint.h>
912
#include <stdio.h>
@@ -20,29 +23,98 @@ typedef struct {
2023
lexer_state_T state;
2124
} lexer_state_snapshot_T;
2225

23-
char lexer_peek(const lexer_T* lexer, uint32_t offset);
2426
bool lexer_peek_for_doctype(const lexer_T* lexer, uint32_t offset);
2527
bool lexer_peek_for_xml_declaration(const lexer_T* lexer, uint32_t offset);
2628
bool lexer_peek_for_cdata_start(const lexer_T* lexer, uint32_t offset);
2729
bool lexer_peek_for_cdata_end(const lexer_T* lexer, uint32_t offset);
28-
2930
bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset);
30-
bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset);
31-
bool lexer_peek_for_html_comment_invalid_end(const lexer_T* lexer, uint32_t offset);
31+
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type);
32+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset);
3233

33-
bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset);
34-
bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset);
35-
bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset);
36-
bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset);
37-
bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset);
38-
bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset);
34+
static inline char lexer_peek(const lexer_T* lexer, uint32_t offset) {
35+
return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
36+
}
3937

40-
char lexer_backtrack(const lexer_T* lexer, uint32_t offset);
38+
static inline char lexer_backtrack(const lexer_T* lexer, uint32_t offset) {
39+
return lexer->source.data[MAX(lexer->current_position - offset, 0)];
40+
}
4141

42-
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type);
43-
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset);
42+
static inline bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) {
43+
uint32_t position = lexer->current_position + offset;
44+
45+
return position + 2 < lexer->source.length && lexer->source.data[position] == '-'
46+
&& lexer->source.data[position + 1] == '-' && lexer->source.data[position + 2] == '>';
47+
}
48+
49+
static inline bool lexer_peek_for_html_comment_invalid_end(const lexer_T* lexer, uint32_t offset) {
50+
uint32_t position = lexer->current_position + offset;
51+
52+
return position + 3 < lexer->source.length && lexer->source.data[position] == '-'
53+
&& lexer->source.data[position + 1] == '-' && lexer->source.data[position + 2] == '!'
54+
&& lexer->source.data[position + 3] == '>';
55+
}
56+
57+
static inline bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset) {
58+
uint32_t position = lexer->current_position + offset;
59+
60+
return position + 1 < lexer->source.length && lexer->source.data[position] == '<'
61+
&& lexer->source.data[position + 1] == '%';
62+
}
63+
64+
static inline bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
65+
uint32_t position = lexer->current_position + offset;
66+
67+
return position + 1 < lexer->source.length && lexer->source.data[position] == '%'
68+
&& lexer->source.data[position + 1] == '>';
69+
}
70+
71+
static inline bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) {
72+
uint32_t position = lexer->current_position + offset;
73+
74+
return position + 2 < lexer->source.length && lexer->source.data[position] == '-'
75+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
76+
}
77+
78+
static inline bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) {
79+
uint32_t position = lexer->current_position + offset;
80+
81+
return position + 2 < lexer->source.length && lexer->source.data[position] == '%'
82+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
83+
}
84+
85+
static inline bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) {
86+
uint32_t position = lexer->current_position + offset;
87+
88+
return position + 2 < lexer->source.length && lexer->source.data[position] == '='
89+
&& lexer->source.data[position + 1] == '%' && lexer->source.data[position + 2] == '>';
90+
}
91+
92+
static inline bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
93+
return lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
94+
|| lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset);
95+
}
96+
97+
static inline lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
98+
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
99+
.line = lexer->current_line,
100+
.column = lexer->current_column,
101+
.previous_position = lexer->previous_position,
102+
.previous_line = lexer->previous_line,
103+
.previous_column = lexer->previous_column,
104+
.current_character = lexer->current_character,
105+
.state = lexer->state };
106+
return snapshot;
107+
}
44108

45-
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer);
46-
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot);
109+
static inline void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
110+
lexer->current_position = snapshot.position;
111+
lexer->current_line = snapshot.line;
112+
lexer->current_column = snapshot.column;
113+
lexer->previous_position = snapshot.previous_position;
114+
lexer->previous_line = snapshot.previous_line;
115+
lexer->previous_column = snapshot.previous_column;
116+
lexer->current_character = snapshot.current_character;
117+
lexer->state = snapshot.state;
118+
}
47119

48120
#endif

src/lexer_peek_helpers.c

Lines changed: 10 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,10 @@
11
#include "include/lexer_peek_helpers.h"
22
#include "include/lexer.h"
3-
#include "include/lexer_struct.h"
4-
#include "include/macros.h"
53
#include "include/token.h"
6-
#include "include/util/hb_string.h"
74

85
#include <ctype.h>
9-
#include <stdbool.h>
106

11-
char lexer_backtrack(const lexer_T* lexer, uint32_t offset) {
12-
return lexer->source.data[MAX(lexer->current_position - offset, 0)];
13-
}
14-
15-
char lexer_peek(const lexer_T* lexer, uint32_t offset) {
16-
return lexer->source.data[MIN(lexer->current_position + offset, lexer->source.length)];
17-
}
18-
19-
bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, const bool case_insensitive) {
7+
static bool lexer_peek_for(const lexer_T* lexer, uint32_t offset, hb_string_T pattern, bool case_insensitive) {
208
hb_string_T remaining_source = hb_string_slice(lexer->source, lexer->current_position + offset);
219
remaining_source.length = MIN(pattern.length, remaining_source.length);
2210

@@ -47,39 +35,19 @@ bool lexer_peek_for_html_comment_start(const lexer_T* lexer, uint32_t offset) {
4735
return lexer_peek_for(lexer, offset, hb_string("<!--"), false);
4836
}
4937

50-
bool lexer_peek_for_html_comment_end(const lexer_T* lexer, uint32_t offset) {
51-
return lexer_peek_for(lexer, offset, hb_string("-->"), false);
52-
}
53-
54-
bool lexer_peek_for_html_comment_invalid_end(const lexer_T* lexer, uint32_t offset) {
55-
return lexer_peek_for(lexer, offset, hb_string("--!>"), false);
56-
}
57-
58-
bool lexer_peek_erb_close_tag(const lexer_T* lexer, uint32_t offset) {
59-
return lexer_peek_for(lexer, offset, hb_string("%>"), false);
60-
}
61-
62-
bool lexer_peek_erb_dash_close_tag(const lexer_T* lexer, uint32_t offset) {
63-
return lexer_peek_for(lexer, offset, hb_string("-%>"), false);
64-
}
38+
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
39+
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
6540

66-
bool lexer_peek_erb_percent_close_tag(const lexer_T* lexer, uint32_t offset) {
67-
return lexer_peek_for(lexer, offset, hb_string("%%>"), false);
68-
}
41+
uint32_t position = offset + 2;
6942

70-
bool lexer_peek_erb_equals_close_tag(const lexer_T* lexer, uint32_t offset) {
71-
return lexer_peek_for(lexer, offset, hb_string("=%>"), false);
72-
}
43+
while (lexer_peek(lexer, position) == ' ' || lexer_peek(lexer, position) == '\t'
44+
|| lexer_peek(lexer, position) == '\n' || lexer_peek(lexer, position) == '\r') {
45+
position++;
46+
}
7347

74-
bool lexer_peek_erb_end(const lexer_T* lexer, uint32_t offset) {
75-
return (
76-
lexer_peek_erb_close_tag(lexer, offset) || lexer_peek_erb_dash_close_tag(lexer, offset)
77-
|| lexer_peek_erb_percent_close_tag(lexer, offset) || lexer_peek_erb_equals_close_tag(lexer, offset)
78-
);
79-
}
48+
char character = lexer_peek(lexer, position);
8049

81-
bool lexer_peek_erb_start(const lexer_T* lexer, uint32_t offset) {
82-
return lexer_peek_for(lexer, offset, hb_string("<%"), false);
50+
return isalpha(character) || character == '_';
8351
}
8452

8553
bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T token_type) {
@@ -108,41 +76,3 @@ bool lexer_peek_for_token_type_after_whitespace(lexer_T* lexer, token_type_T tok
10876

10977
return result;
11078
}
111-
112-
bool lexer_peek_for_close_tag_start(const lexer_T* lexer, uint32_t offset) {
113-
if (lexer_peek(lexer, offset) != '<' || lexer_peek(lexer, offset + 1) != '/') { return false; }
114-
115-
uint32_t pos = offset + 2;
116-
117-
while (lexer_peek(lexer, pos) == ' ' || lexer_peek(lexer, pos) == '\t' || lexer_peek(lexer, pos) == '\n'
118-
|| lexer_peek(lexer, pos) == '\r') {
119-
pos++;
120-
}
121-
122-
char c = lexer_peek(lexer, pos);
123-
124-
return isalpha(c) || c == '_';
125-
}
126-
127-
lexer_state_snapshot_T lexer_save_state(lexer_T* lexer) {
128-
lexer_state_snapshot_T snapshot = { .position = lexer->current_position,
129-
.line = lexer->current_line,
130-
.column = lexer->current_column,
131-
.previous_position = lexer->previous_position,
132-
.previous_line = lexer->previous_line,
133-
.previous_column = lexer->previous_column,
134-
.current_character = lexer->current_character,
135-
.state = lexer->state };
136-
return snapshot;
137-
}
138-
139-
void lexer_restore_state(lexer_T* lexer, lexer_state_snapshot_T snapshot) {
140-
lexer->current_position = snapshot.position;
141-
lexer->current_line = snapshot.line;
142-
lexer->current_column = snapshot.column;
143-
lexer->previous_position = snapshot.previous_position;
144-
lexer->previous_line = snapshot.previous_line;
145-
lexer->previous_column = snapshot.previous_column;
146-
lexer->current_character = snapshot.current_character;
147-
lexer->state = snapshot.state;
148-
}

0 commit comments

Comments
 (0)