Skip to content

Commit 7d7845a

Browse files
authored
Fixed lexing escaped regions (#72)
1 parent bd04ced commit 7d7845a

File tree

2 files changed

+128
-88
lines changed

2 files changed

+128
-88
lines changed

src/compiling/lexing/lexer.rs

Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,6 @@ impl Lexer {
104104
lex_state.word = String::new()
105105
}
106106

107-
/// Checks whether this is a nontokenizable region
108-
#[inline]
109-
fn is_tokenized_region(&self, reaction: &RegionReaction, lex_state: &mut LexState) -> bool {
110-
if let Some(region) = lex_state.region_handler.get_region() {
111-
region.tokenize && *reaction == RegionReaction::Pass
112-
} else {
113-
false
114-
}
115-
}
116-
117107
/// Pattern code for adding a symbol
118108
/// **[*]**
119109
#[inline]
@@ -191,9 +181,14 @@ impl Lexer {
191181

192182
// Reaction stores the reaction of the region handler
193183
// Have we just opened or closed some region?
194-
let reaction = lex_state
195-
.region_handler
196-
.handle_region(&lex_state.reader, lex_state.is_escaped);
184+
let reaction = if lex_state.is_escaped {
185+
RegionReaction::Pass
186+
} else {
187+
lex_state.region_handler.handle_region(&lex_state.reader)
188+
};
189+
190+
lex_state.is_escaped = !lex_state.is_escaped && letter == self.escape_symbol;
191+
197192
match reaction {
198193
// If the region has been opened
199194
// Finish the part that we have been parsing
@@ -236,7 +231,9 @@ impl Lexer {
236231
}
237232
}
238233
RegionReaction::Pass => {
239-
let is_tokenized_region = self.is_tokenized_region(&reaction, &mut lex_state);
234+
let region = lex_state.region_handler.get_region().unwrap();
235+
let is_tokenized_region = region.tokenize;
236+
240237
match lex_state.compound_handler.handle_compound(
241238
letter,
242239
&lex_state.reader,
@@ -247,15 +244,9 @@ impl Lexer {
247244
CompoundReaction::End => self.pattern_end(&mut lex_state, letter),
248245
CompoundReaction::Pass => {
249246
// Handle region scope
250-
if !self.is_tokenized_region(&reaction, &mut lex_state) {
251-
let region = lex_state.region_handler.get_region().unwrap();
252-
// Flip escaped key
253-
lex_state.is_escaped = (!lex_state.is_escaped
254-
&& letter == self.escape_symbol)
255-
.then(|| !lex_state.is_escaped)
256-
.unwrap_or(false);
247+
if !is_tokenized_region {
257248
// Handle singleline attribute
258-
if letter == '\n' && region.singleline {
249+
if region.singleline && letter == '\n' {
259250
let pos = lex_state.reader.get_position();
260251
return Err((
261252
LexerErrorType::Singleline,
@@ -509,4 +500,61 @@ mod test {
509500
}
510501
assert_eq!(expected, result);
511502
}
503+
504+
// Test if comments are tokenized in the string (it should not be)
505+
#[test]
506+
fn test_lexer_tokenized_regions() {
507+
let symbols = vec!['/', '"', '{', '}'];
508+
let regions = reg![
509+
reg!(string as "String" => {
510+
begin: "\"",
511+
end: "\"",
512+
tokenize: true
513+
} => [
514+
reg!(str_interp as "string interpolation" => {
515+
begin: "{",
516+
end: "}",
517+
tokenize: true
518+
} ref global)
519+
]),
520+
reg!(comment as "Comment" => {
521+
begin: "//",
522+
end: "\n",
523+
allow_unclosed_region: true
524+
})
525+
];
526+
let compounds = vec![('/', '/')];
527+
let rules = Rules::new(symbols, compounds, regions);
528+
let lexer = super::Lexer::new(rules);
529+
530+
let text = r#""\{should not be interpolated // should not be a comment}""#;
531+
532+
let res = lexer.tokenize(&vec![text].join("\n"));
533+
assert!(res.is_ok());
534+
535+
let mut result = vec![];
536+
for lex in res.unwrap() {
537+
result.push((lex.word, lex.pos.0, lex.pos.1));
538+
}
539+
540+
let expected = vec![
541+
("\"", 1, 1),
542+
("\\", 1, 2),
543+
("{", 1, 3),
544+
("should", 1, 4),
545+
("not", 1, 11),
546+
("be", 1, 15),
547+
("interpolated", 1, 18),
548+
("//", 1, 31),
549+
("should", 1, 34),
550+
("not", 1, 41),
551+
("be", 1, 45),
552+
("a", 1, 48),
553+
("comment", 1, 50),
554+
("}", 1, 57),
555+
("\"", 1, 58),
556+
].iter().map(|(word, row, col)| (word.to_string(), *row, *col)).collect::<Vec<_>>();
557+
558+
assert_eq!(result, expected);
559+
}
512560
}

src/compiling/lexing/region_handler.rs

Lines changed: 58 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::compiling_rules::{Region, Rules, RegionMap};
2-
use super::reader::Reader;
31
use super::reader::ReadMode;
2+
use super::reader::Reader;
3+
use crate::compiling_rules::{Region, RegionMap, Rules};
44

55
#[cfg(feature = "serde")]
66
use serde::{Serialize, Deserialize};
@@ -10,20 +10,20 @@ use serde::{Serialize, Deserialize};
1010
pub enum RegionReaction {
1111
Begin(bool),
1212
End(bool),
13-
Pass
13+
Pass,
1414
}
1515

1616
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1717
pub struct RegionHandler {
1818
region_stack: Vec<Region>,
19-
region_map: RegionMap
19+
region_map: RegionMap,
2020
}
2121

2222
impl RegionHandler {
2323
pub fn new(rules: &Rules) -> Self {
2424
RegionHandler {
2525
region_stack: vec![rules.region_tree.clone()],
26-
region_map: rules.region_tree.clone().generate_region_map()
26+
region_map: rules.region_tree.clone().generate_region_map(),
2727
}
2828
}
2929

@@ -35,7 +35,7 @@ impl RegionHandler {
3535
// Error if after code lexing
3636
// some region was left unclosed
3737
#[inline]
38-
pub fn is_region_closed(&self, reader: &Reader) -> Result<(),((usize, usize), Region)> {
38+
pub fn is_region_closed(&self, reader: &Reader) -> Result<(), ((usize, usize), Region)> {
3939
if let Some(region) = self.region_stack.last() {
4040
if !region.allow_unclosed_region {
4141
let pos = reader.get_position();
@@ -46,12 +46,12 @@ impl RegionHandler {
4646
}
4747

4848
// Check where we are in code and open / close some region if matched
49-
pub fn handle_region(&mut self, reader: &Reader, is_escaped: bool) -> RegionReaction {
49+
pub fn handle_region(&mut self, reader: &Reader) -> RegionReaction {
5050
// If we are not in the global scope
5151
if let Some(region) = self.get_region() {
5252
for interp_region in region.interp.iter() {
5353
// The region that got matched based on current code lexing state
54-
if let Some(mut begin_region) = self.match_region_by_begin(reader, is_escaped) {
54+
if let Some(mut begin_region) = self.match_region_by_begin(reader) {
5555
if begin_region.name == *interp_region.name {
5656
// Save the tokenize state here to preserve borrow rules
5757
let tokenize = begin_region.tokenize;
@@ -64,7 +64,7 @@ impl RegionHandler {
6464
// If success, then we want to do the replace
6565
Some(target_region) => {
6666
begin_region.interp = target_region.interp.clone();
67-
},
67+
}
6868
// If fail then it means that we have invalid reference name
6969
None => {
7070
panic!("Could not find region with id '{}'", reference_name);
@@ -77,12 +77,12 @@ impl RegionHandler {
7777
}
7878
}
7979
// Let's check if we can close current region
80-
if let Some(end_region) = self.match_region_by_end(reader, is_escaped) {
80+
if let Some(end_region) = self.match_region_by_end(reader) {
8181
if end_region.name == region.name {
8282
// Save the tokenize state here to preserve borrow rules
8383
let tokenize = end_region.tokenize;
8484
self.region_stack.pop();
85-
return RegionReaction::End(tokenize)
85+
return RegionReaction::End(tokenize);
8686
}
8787
}
8888
}
@@ -97,48 +97,49 @@ impl RegionHandler {
9797
cb: impl Fn(&Region) -> &String,
9898
candidates: &[Region],
9999
read_mode: ReadMode,
100-
is_escaped: bool
101100
) -> Option<Region> {
102101
// Closure that checks if for each given Region is there any that matches current history state
103-
let predicate = |candidate: &Region| match reader.get_history_or_future(cb(candidate).len(), &read_mode) {
104-
Some(code_chunk) => !is_escaped && &code_chunk == cb(candidate),
105-
None => false
102+
let predicate = |candidate: &Region| match reader
103+
.get_history_or_future(cb(candidate).len(), &read_mode)
104+
{
105+
Some(code_chunk) => &code_chunk == cb(candidate),
106+
None => false,
106107
};
107108
self.get_region_by(predicate, candidates)
108109
}
109110

110111
#[inline]
111-
fn match_region_by_begin(&self, reader: &Reader, is_escaped: bool) -> Option<Region> {
112+
fn match_region_by_begin(&self, reader: &Reader) -> Option<Region> {
112113
let region = self.get_region().unwrap();
113114
self.match_region_by(
114115
reader,
115116
|candidate: &Region| &candidate.begin,
116117
&region.interp,
117118
ReadMode::Future,
118-
is_escaped
119119
)
120120
}
121121

122122
#[inline]
123-
fn match_region_by_end(&self, reader: &Reader, is_escaped: bool) -> Option<Region> {
123+
fn match_region_by_end(&self, reader: &Reader) -> Option<Region> {
124124
let region = self.get_region().unwrap();
125125
if !region.global {
126126
self.match_region_by(
127127
reader,
128128
|candidate: &Region| &candidate.end,
129129
&[region.clone()],
130130
ReadMode::History,
131-
is_escaped
132131
)
133-
} else { None }
132+
} else {
133+
None
134+
}
134135
}
135136

136137
// Target region is a region on which we want to search the interpolations
137138
#[inline]
138139
fn get_region_by(&self, cb: impl Fn(&Region) -> bool, candidates: &[Region]) -> Option<Region> {
139140
for region in candidates.iter() {
140141
if cb(region) {
141-
return Some(region.clone())
142+
return Some(region.clone());
142143
}
143144
}
144145
None
@@ -147,30 +148,21 @@ impl RegionHandler {
147148

148149
#[cfg(test)]
149150
mod test {
150-
use crate::reg;
151-
use crate::compiling_rules::Region;
152-
use super::{ RegionHandler, RegionReaction };
153151
use super::Reader;
152+
use super::{RegionHandler, RegionReaction};
153+
use crate::compiling_rules::Region;
154+
use crate::reg;
154155

155156
#[test]
156157
fn match_region() {
157-
let lines = vec![
158-
"begin",
159-
"\\begin",
160-
"end"
161-
];
162-
let expected = vec![
163-
(0, String::from("begin")),
164-
(15, String::from("end"))
165-
];
158+
let lines = vec!["begin", "\\begin", "end"];
159+
let expected = vec![(0, String::from("begin")), (15, String::from("end"))];
166160
let code = lines.join(" ");
167161
let mut reader = Reader::new(&code);
168-
let region = reg![
169-
reg!(module as "Module literal" => {
170-
begin: "begin",
171-
end: "end"
172-
})
173-
];
162+
let region = reg![reg!(module as "Module literal" => {
163+
begin: "begin",
164+
end: "end"
165+
})];
174166
let mut rh = RegionHandler {
175167
region_stack: vec![region.clone()],
176168
region_map: region.generate_region_map(),
@@ -180,58 +172,58 @@ mod test {
180172
let mut is_escaped = false;
181173
// Simulate matching regions
182174
while let Some(letter) = reader.next() {
183-
if let Some(begin) = rh.match_region_by_begin(&reader, is_escaped) {
175+
if is_escaped {
176+
is_escaped = !is_escaped && letter == '\\';
177+
continue;
178+
}
179+
180+
if let Some(begin) = rh.match_region_by_begin(&reader) {
184181
rh.region_stack.push(begin.clone());
185182
result.push((reader.get_index(), begin.begin));
186183
}
187-
if let Some(end) = rh.match_region_by_end(&reader, is_escaped) {
184+
if let Some(end) = rh.match_region_by_end(&reader) {
188185
result.push((reader.get_index(), end.end));
189186
}
190187
// Handle the escape key
191-
is_escaped = (!is_escaped && letter == '\\')
192-
.then(|| !is_escaped)
193-
.unwrap_or(false);
188+
is_escaped = !is_escaped && letter == '\\';
194189
}
195190
assert_eq!(expected, result);
196191
}
197192

198193
#[test]
199194
fn handle_region() {
200-
let lines = vec![
201-
"'My name is \\\\\\'{name}.\\\\'"
202-
];
203-
let expected = vec![
204-
0, 16, 21, 25
205-
];
195+
let lines = vec!["'My name is \\\\\\'{name}.\\\\'"];
196+
let expected = vec![0, 16, 21, 25];
206197
let code = lines.join("\n");
207-
let region = reg![
208-
reg!(string as "String literal" => {
209-
begin: "'",
210-
end: "'"
211-
} => [
212-
reg!(interp as "Interpolation" => {
213-
begin: "{",
214-
end: "}"
215-
})
216-
])
217-
];
198+
let region = reg![reg!(string as "String literal" => {
199+
begin: "'",
200+
end: "'"
201+
} => [
202+
reg!(interp as "Interpolation" => {
203+
begin: "{",
204+
end: "}"
205+
})
206+
])];
218207
let mut reader = Reader::new(&code);
219208
let mut rh = RegionHandler {
220209
region_stack: vec![region.clone()],
221-
region_map: region.generate_region_map()
210+
region_map: region.generate_region_map(),
222211
};
223212
let mut result = vec![];
224213
let mut is_escaped = false;
225214
// Simulate matching regions
226215
while let Some(letter) = reader.next() {
227-
let region_mutated = rh.handle_region(&reader, is_escaped);
216+
if is_escaped {
217+
is_escaped = !is_escaped && letter == '\\';
218+
continue;
219+
}
220+
221+
let region_mutated = rh.handle_region(&reader);
228222
if let RegionReaction::Begin(_) | RegionReaction::End(_) = region_mutated {
229223
result.push(reader.get_index());
230224
}
231225
// Handle the escape key
232-
is_escaped = (!is_escaped && letter == '\\')
233-
.then(|| !is_escaped)
234-
.unwrap_or(false);
226+
is_escaped = !is_escaped && letter == '\\';
235227
}
236228
assert_eq!(expected, result);
237229
}

0 commit comments

Comments
 (0)