Skip to content

Commit 97bec66

Browse files
committed
Moved to bool event lookup vs bitwise checks
1 parent 88254c0 commit 97bec66

File tree

4 files changed

+88
-51
lines changed

4 files changed

+88
-51
lines changed

lib/sax-wasm.wasm

43 Bytes
Binary file not shown.

src/sax/grapheme_iterator.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ impl GraphemeClusters<'_> {
132132
/// assert!(gc.take_until_ascii(&[b'!']).is_none());
133133
///
134134
/// // Handle broken surrogate at the end
135-
/// let bytes_with_surrogate = "🐉 hello, world!\xF0".as_bytes();
136-
/// let mut gc_with_surrogate = GraphemeClusters::new(bytes_with_surrogate);
135+
/// let bytes = "hello, world!🐉🐉🐉".as_bytes();
136+
/// let mut gc_with_surrogate = GraphemeClusters::new(&bytes[..14]);
137137
/// if let Some(result) = gc_with_surrogate.take_until_ascii(&[b'!']) {
138138
/// assert_eq!(result.0, "🐉 hello, world");
139139
/// assert_eq!(result.1, 0);

src/sax/parser.rs

+76-47
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use std::mem;
2+
use std::ops::Index;
3+
use std::ops::IndexMut;
24
use std::str;
35

46
use super::grapheme_iterator::GraphemeClusters;
@@ -51,7 +53,7 @@ pub trait EventHandler {
5153
/// * `end_pos` - The end position of the current parse.
5254
pub struct SAXParser<'a> {
5355
// Configuration and State
54-
pub events: u32,
56+
pub events: [bool; 10],
5557
state: State,
5658
brace_ct: u32,
5759
quote: u8,
@@ -129,7 +131,7 @@ impl<'a> SAXParser<'a> {
129131
pub fn new(event_handler: &'a dyn EventHandler) -> SAXParser<'a> {
130132
SAXParser {
131133
// Configuration and State
132-
events: 0,
134+
events: [false; 10],
133135
state: State::Begin,
134136
brace_ct: 0,
135137
quote: 0,
@@ -399,8 +401,8 @@ impl<'a> SAXParser<'a> {
399401
return;
400402
}
401403

402-
if self.events & Event::OpenTagStart as u32 != 0 {
403-
self.event_handler.handle_event(Event::OpenTagStart, Entity::Tag(&mut self.tag));
404+
if self.events[Event::OpenTagStart] {
405+
self.event_handler.handle_event(Event::OpenTagStart, Entity::Tag(&self.tag));
404406
}
405407
match current.0 {
406408
">" => self.process_open_tag(false, current),
@@ -439,11 +441,11 @@ impl<'a> SAXParser<'a> {
439441

440442
let mut text = mem::replace(&mut self.text, Text::new([line, character]));
441443
text.end = [line, character - 1];
442-
if self.events & Event::Text as u32 != 0 {
443-
self.event_handler.handle_event(Event::Text, Entity::Text(&mut text));
444+
if self.events[Event::Text] {
445+
self.event_handler.handle_event(Event::Text, Entity::Text(&text));
444446
}
445447
// Store these only if we're interested in CloseTag events
446-
if len != 0 && self.events & Event::CloseTag as u32 != 0 {
448+
if len != 0 && self.events[Event::CloseTag] {
447449
self.tags[len - 1].text_nodes.push(text);
448450
}
449451
}
@@ -481,10 +483,10 @@ impl<'a> SAXParser<'a> {
481483

482484
if current.0 == ">" {
483485
let mut sgml_decl = mem::replace(&mut self.sgml_decl, Text::new([0, 0]));
484-
if self.events & Event::SGMLDeclaration as u32 != 0 {
486+
if self.events[Event::SGMLDeclaration] {
485487
sgml_decl.value.extend_from_slice(current.0.as_bytes());
486488
sgml_decl.end = [current.1, current.2 - 1];
487-
self.event_handler.handle_event(Event::SGMLDeclaration, Entity::Text(&mut sgml_decl));
489+
self.event_handler.handle_event(Event::SGMLDeclaration, Entity::Text(&sgml_decl));
488490
}
489491

490492
self.new_text(current);
@@ -514,10 +516,10 @@ impl<'a> SAXParser<'a> {
514516
fn doctype(&mut self, current: &GraphemeResult) {
515517
if current.0 == ">" {
516518
self.new_text(current);
517-
if self.events & Event::Doctype as u32 != 0 {
519+
if self.events[Event::Doctype] {
518520
let mut doctype = mem::replace(&mut self.doctype, Text::new([0, 0]));
519521
doctype.end = [current.1, current.2 - 1];
520-
self.event_handler.handle_event(Event::Doctype, Entity::Text(&mut doctype));
522+
self.event_handler.handle_event(Event::Doctype, Entity::Text(&doctype));
521523
}
522524
return;
523525
}
@@ -568,7 +570,7 @@ impl<'a> SAXParser<'a> {
568570
if let Some(comment) = comment_result {
569571
comment_str = comment.0;
570572
}
571-
if self.events & Event::Comment as u32 != 0 {
573+
if self.events[Event::Comment] {
572574
self.comment.value.extend_from_slice(current.0.as_bytes());
573575
self.comment.value.extend_from_slice(comment_str.as_bytes());
574576
}
@@ -579,7 +581,7 @@ impl<'a> SAXParser<'a> {
579581
self.state = State::CommentEnded;
580582
return;
581583
}
582-
if self.events & Event::Comment as u32 != 0 {
584+
if self.events[Event::Comment] {
583585
self.comment.value.push(b'-');
584586
self.comment.value.extend_from_slice(current.0.as_bytes());
585587
}
@@ -588,15 +590,15 @@ impl<'a> SAXParser<'a> {
588590

589591
fn comment_ended(&mut self, current: &GraphemeResult) {
590592
if current.0 == ">" {
591-
if self.events & Event::Comment as u32 != 0 {
593+
if self.events[Event::Comment] {
592594
let mut comment = mem::replace(&mut self.comment, Text::new([0, 0]));
593595
comment.end = [current.1, current.2 - 1];
594-
self.event_handler.handle_event(Event::Comment, Entity::Text(&mut comment));
596+
self.event_handler.handle_event(Event::Comment, Entity::Text(&comment));
595597
}
596598
self.state = State::BeginWhitespace;
597599
return;
598600
}
599-
if self.events & Event::Comment as u32 != 0 {
601+
if self.events[Event::Comment] {
600602
self.comment.value.extend_from_slice("--".as_bytes());
601603
self.comment.value.extend_from_slice(current.0.as_bytes());
602604
}
@@ -627,10 +629,10 @@ impl<'a> SAXParser<'a> {
627629
fn cdata_ending_2(&mut self, current: &GraphemeResult) {
628630
if current.0 == ">" {
629631
self.new_text(current);
630-
if self.events & Event::Cdata as u32 != 0 {
632+
if self.events[Event::Cdata] {
631633
let mut cdata = mem::replace(&mut self.cdata, Text::new([0, 0]));
632634
cdata.end = [current.1, current.2 - 1];
633-
self.event_handler.handle_event(Event::Cdata, Entity::Text(&mut cdata));
635+
self.event_handler.handle_event(Event::Cdata, Entity::Text(&cdata));
634636
}
635637
return;
636638
} else if current.0 == "]" {
@@ -683,9 +685,9 @@ impl<'a> SAXParser<'a> {
683685
if current.0 == ">" {
684686
self.new_text(current);
685687
let mut proc_inst = mem::replace(&mut self.proc_inst, ProcInst::new());
686-
if self.events & Event::ProcessingInstruction as u32 != 0 {
688+
if self.events[Event::ProcessingInstruction] {
687689
proc_inst.end = [current.1, current.2];
688-
self.event_handler.handle_event(Event::ProcessingInstruction,Entity::ProcInst(&mut proc_inst));
690+
self.event_handler.handle_event(Event::ProcessingInstruction,Entity::ProcInst(&proc_inst));
689691
}
690692
return;
691693
}
@@ -852,13 +854,13 @@ impl<'a> SAXParser<'a> {
852854
}
853855

854856
fn process_attribute(&mut self) {
855-
let mut attr = mem::replace(&mut self.attribute, Attribute::new());
856-
let attribute_event = self.events & Event::Attribute as u32 != 0;
857+
let attr = mem::replace(&mut self.attribute, Attribute::new());
858+
let attribute_event = self.events[Event::Attribute];
857859
if attribute_event {
858-
self.event_handler.handle_event(Event::Attribute, Entity::Attribute(&mut attr));
860+
self.event_handler.handle_event(Event::Attribute, Entity::Attribute(&attr));
859861
}
860862
// Store them only if we're interested in Open and Close tag events
861-
if attribute_event || self.events & Event::CloseTag as u32 != 0 {
863+
if attribute_event || self.events[Event::CloseTag] {
862864
self.tag.attributes.push(attr);
863865
}
864866
}
@@ -868,8 +870,8 @@ impl<'a> SAXParser<'a> {
868870
tag.self_closing = self_closing;
869871
tag.open_end = [current.1, current.2];
870872

871-
if self.events & Event::OpenTag as u32 != 0 {
872-
self.event_handler.handle_event(Event::OpenTag, Entity::Tag(&mut tag));
873+
if self.events[Event::OpenTag] {
874+
self.event_handler.handle_event(Event::OpenTag, Entity::Tag(&tag));
873875
}
874876
if !self_closing {
875877
self.new_text(current);
@@ -881,7 +883,7 @@ impl<'a> SAXParser<'a> {
881883
self.new_text(current);
882884
let mut tags_len = self.tags.len();
883885

884-
let close_tag_name = if self.close_tag_name.is_empty() && self.tag.self_closing {
886+
let close_tag_name = if self.tag.self_closing && self.close_tag_name.is_empty() {
885887
&self.tag.name
886888
} else {
887889
&mem::take(&mut self.close_tag_name)
@@ -909,7 +911,7 @@ impl<'a> SAXParser<'a> {
909911
return;
910912
}
911913

912-
if self.events & Event::CloseTag as u32 == 0 {
914+
if !self.events[Event::CloseTag] {
913915
if tag_index > 1 {
914916
self.tags.truncate(tag_index);
915917
return;
@@ -925,7 +927,7 @@ impl<'a> SAXParser<'a> {
925927
let mut tag = self.tags.remove(tags_len);
926928
tag.close_end = [current.1, current.2];
927929

928-
self.event_handler.handle_event(Event::CloseTag, Entity::Tag(&mut tag));
930+
self.event_handler.handle_event(Event::CloseTag, Entity::Tag(&tag));
929931
self.tag = tag;
930932
}
931933
}
@@ -956,14 +958,14 @@ impl<'a> SAXParser<'a> {
956958
}
957959

958960
fn new_text(&mut self, current: &GraphemeResult) {
959-
if self.events & Event::Text as u32 != 0 || self.events & Event::CloseTag as u32 != 0 {
961+
if self.events[Event::Text] || self.events[Event::CloseTag] {
960962
self.text = Text::new([current.1, current.2]);
961963
}
962964
self.state = State::Text;
963965
}
964966

965967
fn write_text(&mut self, grapheme: &[u8]) {
966-
if self.events & Event::Text as u32 == 0 && self.events & Event::CloseTag as u32 == 0 {
968+
if !self.events[Event::Text] && !self.events[Event::CloseTag] {
967969
return;
968970
}
969971
self.text.value.extend_from_slice(grapheme);
@@ -973,25 +975,40 @@ impl<'a> SAXParser<'a> {
973975
#[derive(PartialEq, Clone, Copy)]
974976
pub enum Event {
975977
// 1
976-
Text = 0b0000000001,
978+
Text = 0,
977979
// 2
978-
ProcessingInstruction = 0b0000000010,
980+
ProcessingInstruction = 1,
979981
// 4
980-
SGMLDeclaration = 0b0000000100,
982+
SGMLDeclaration = 2,
981983
// 8
982-
Doctype = 0b0000001000,
984+
Doctype = 3,
983985
// 16
984-
Comment = 0b0000010000,
986+
Comment = 4,
985987
// 32
986-
OpenTagStart = 0b0000100000,
988+
OpenTagStart = 5,
987989
// 64
988-
Attribute = 0b0001000000,
990+
Attribute = 6,
989991
// 128
990-
OpenTag = 0b0010000000,
992+
OpenTag = 7,
991993
// 256
992-
CloseTag = 0b0100000000,
994+
CloseTag = 8,
993995
// 512
994-
Cdata = 0b1000000000,
996+
Cdata = 9,
997+
}
998+
999+
impl Index<Event> for [bool; 10] {
1000+
type Output = bool;
1001+
#[inline(always)]
1002+
fn index(&self, event: Event) -> &Self::Output {
1003+
unsafe { &self.get_unchecked(event as usize) }
1004+
}
1005+
}
1006+
1007+
impl IndexMut<Event> for [bool; 10] {
1008+
#[inline(always)]
1009+
fn index_mut(&mut self, event: Event) -> &mut Self::Output {
1010+
unsafe { self.get_unchecked_mut(event as usize) }
1011+
}
9951012
}
9961013

9971014
#[derive(PartialEq)]
@@ -1104,7 +1121,9 @@ mod tests {
11041121
fn stream_very_large_xml() -> Result<()> {
11051122
let event_handler = TextEventHandler::new();
11061123
let mut sax = SAXParser::new(&event_handler);
1107-
sax.events = Event::Text as u32;
1124+
let mut events = [false;10];
1125+
events[Event::Text] = true;
1126+
sax.events = events;
11081127
let f = File::open("src/js/__test__/xml.xml")?;
11091128
let mut reader = BufReader::new(f);
11101129
const BUFFER_LEN: usize = 32 * 1024;
@@ -1124,7 +1143,9 @@ mod tests {
11241143
fn test_comment() -> Result<()> {
11251144
let event_handler = TextEventHandler::new();
11261145
let mut sax = SAXParser::new(&event_handler);
1127-
sax.events = Event::Comment as u32;
1146+
let mut events = [false;10];
1147+
events[Event::Comment] = true;
1148+
sax.events = events;
11281149
let str = "<!--name='test 3 attr' some comment--> <-- name='test 3 attr' some comment -->";
11291150

11301151
sax.write(str.as_bytes());
@@ -1142,7 +1163,9 @@ mod tests {
11421163
fn test_4_bytes() -> Result<()> {
11431164
let event_handler = TextEventHandler::new();
11441165
let mut sax = SAXParser::new(&event_handler);
1145-
sax.events = Event::Text as u32;
1166+
let mut events = [false;10];
1167+
events[Event::Text] = true;
1168+
sax.events = events;
11461169
let str = "🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚🏴📚📚";
11471170
let bytes = str.as_bytes();
11481171
sax.write(&bytes[0..14]);
@@ -1161,7 +1184,9 @@ mod tests {
11611184
fn count_grapheme_length() -> Result<()> {
11621185
let event_handler = TextEventHandler::new();
11631186
let mut sax = SAXParser::new(&event_handler);
1164-
sax.events = Event::Text as u32;
1187+
let mut events = [false;10];
1188+
events[Event::Text] = true;
1189+
sax.events = events;
11651190
let str = "🏴📚📚<div href=\"./123/123\">hey there</div>";
11661191

11671192
sax.write(str.as_bytes());
@@ -1179,7 +1204,9 @@ mod tests {
11791204
fn parse_jsx_expression() -> Result<()> {
11801205
let event_handler = TextEventHandler::new();
11811206
let mut sax = SAXParser::new(&event_handler);
1182-
sax.events = Event::Text as u32;
1207+
let mut events = [false;10];
1208+
events[Event::Text] = true;
1209+
sax.events = events;
11831210
let str = "<foo>{bar < baz ? <div></div> : <></>}</foo>";
11841211

11851212
sax.write(str.as_bytes());
@@ -1202,7 +1229,9 @@ mod tests {
12021229
fn parse_empty_cdata() -> Result<()> {
12031230
let event_handler = TextEventHandler::new();
12041231
let mut sax = SAXParser::new(&event_handler);
1205-
sax.events = Event::Cdata as u32;
1232+
let mut events = [false;10];
1233+
events[Event::Cdata] = true;
1234+
sax.events = events;
12061235
let str = "<div>
12071236
<div>
12081237
<![CDATA[]]>

src/sax_wasm.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,26 @@ impl EventHandler for SaxEventHandler {
1717
#[inline(always)]
1818
fn handle_event(&self, event: Event, data: Entity) {
1919
let encoded_data = data.encode();
20-
unsafe { event_listener(event as u32, encoded_data.as_ptr(), encoded_data.len()) };
20+
unsafe { event_listener(1 << event as u32, encoded_data.as_ptr(), encoded_data.len()) };
2121
}
2222
}
2323

24+
fn generate_event_lookup(events: u32) -> [bool; 10] {
25+
let mut event_lookup = [false; 10];
26+
for i in 0..10 {
27+
event_lookup[i] = events & (1 << i) != 0;
28+
}
29+
event_lookup
30+
}
31+
2432
#[no_mangle]
2533
pub unsafe extern "C" fn parser(events: u32) {
2634
if SAX == 0 as *mut SAXParser {
2735
let event_handler = Box::leak(Box::new(SaxEventHandler::new()));
2836
let sax_parse = SAXParser::new(event_handler);
2937
SAX = mem::transmute(Box::new(sax_parse));
3038
}
31-
(*SAX).events = events;
39+
(*SAX).events = generate_event_lookup(events);
3240
}
3341

3442
#[no_mangle]

0 commit comments

Comments
 (0)