Skip to content

Commit 5d1c5b6

Browse files
committed
Used box to control availability of memory for dispatched events
1 parent 2aa87ce commit 5d1c5b6

14 files changed

+74
-49
lines changed

Diff for: lib/cjs/saxWasm.d.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ export declare class SAXParser {
420420
* })();
421421
* ```
422422
*/
423-
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
423+
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Reader<Detail>]>;
424424
/**
425425
* Writes a chunk of data to the parser.
426426
*

Diff for: lib/cjs/saxWasm.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: lib/cjs/saxWasm.js.map

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: lib/esm/saxWasm.d.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ export declare class SAXParser {
420420
* })();
421421
* ```
422422
*/
423-
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
423+
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Reader<Detail>]>;
424424
/**
425425
* Writes a chunk of data to the parser.
426426
*

Diff for: lib/esm/saxWasm.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: lib/esm/saxWasm.js.map

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: lib/sax-wasm.wasm

3.28 KB
Binary file not shown.

Diff for: lib/saxWasm.d.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ export declare class SAXParser {
420420
* })();
421421
* ```
422422
*/
423-
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]>;
423+
parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Reader<Detail>]>;
424424
/**
425425
* Writes a chunk of data to the parser.
426426
*

Diff for: src/js/__test__/jsx.spec.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ describe('When parsing JSX, the SaxWasm', () => {
3737
</Component>`));
3838

3939
deepStrictEqual(_event,SaxEventType.CloseTag);
40-
deepStrictEqual(_data[0].name,'User');
41-
deepStrictEqual(_data[1].name,'SignIn');
40+
deepStrictEqual(_data[0].name,'SignIn');
41+
deepStrictEqual(_data[1].name,'User');
4242
deepStrictEqual(_data[2].name,'Component');
4343
});
4444

Diff for: src/js/__test__/largeXML.spec.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ describe('When parsing XML, the SaxWasm', () => {
5858
const webReadable = Readable.toWeb(readable);
5959
const eventsFromGenerator: [SaxEventType, Detail][] = [];
6060
for await (const [event, detail] of parser.parse(webReadable.getReader())) {
61-
eventsFromGenerator.push([event, detail]);
61+
eventsFromGenerator.push([event, detail.toJSON()]);
6262
}
6363

6464
const eventsFromEventHandler: [SaxEventType, Detail][] = [];

Diff for: src/js/__test__/xml.xml

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
<test-tag id="test-id">testing this tag</test-tag>
1515
<div disabled="">
1616
testing
17+
this
18+
xml file
1719
</div>
1820
<?process-div instruction?>
1921
<army id="test_id" class="container emphasize">1682443131</army>

Diff for: src/js/saxWasm.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,10 @@ export class SAXParser {
579579
* })();
580580
* ```
581581
*/
582-
public async *parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Detail]> {
583-
let eventAggregator: [SaxEventType, Detail][] | null = [];
582+
public async *parse(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<[SaxEventType, Reader<Detail>]> {
583+
let eventAggregator: [SaxEventType, Reader<Detail>][] | null = [];
584584
this.eventHandler = function (event, detail) {
585-
eventAggregator.push([event, detail.toJSON()]);
585+
eventAggregator.push([event, detail]);
586586
};
587587

588588
while (true) {

Diff for: src/sax/parser.rs

+52-36
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ pub struct SAXParser<'a> {
7474

7575
// Event Handling
7676
event_handler: &'a dyn EventHandler,
77+
// Used to make sure dispatched objects
78+
// stick around until the next write
79+
dispatched: Vec<Dispatched>,
7780

7881
// Parsing Buffers
7982
tags: Vec<Tag>,
@@ -154,6 +157,7 @@ impl<'a> SAXParser<'a> {
154157

155158
// Event Handling
156159
event_handler,
160+
dispatched: Vec::new(),
157161

158162
// Parsing Buffers
159163
text: None,
@@ -223,6 +227,7 @@ impl<'a> SAXParser<'a> {
223227
///
224228
/// ```
225229
pub fn write(&mut self, source: &[u8]) {
230+
self.dispatched.clear();
226231
let mut bytes = source;
227232

228233
let frag_len = self.fragment.len();
@@ -494,8 +499,11 @@ impl<'a> SAXParser<'a> {
494499
}
495500

496501
if self.events[Event::OpenTagStart] {
497-
self.tag.hydrate(self.source_ptr);
498-
self.event_handler.handle_event(Event::OpenTagStart, Entity::Tag(&self.tag));
502+
let mut tag = Box::new(self.tag.clone());
503+
tag.hydrate(self.source_ptr);
504+
505+
self.event_handler.handle_event(Event::OpenTagStart, Entity::Tag(&*tag));
506+
self.dispatched.push(Dispatched::Tag(tag));
499507
}
500508

501509
match byte {
@@ -557,25 +565,29 @@ impl<'a> SAXParser<'a> {
557565
}
558566

559567
fn flush_text(&mut self, line: u32, character: u32, offset: usize) {
560-
if let Some(mut text) = self.text.take() {
561-
text.end = [line, character];
562-
text.header.1 = offset;
568+
if self.text.is_none() {
569+
return;
570+
}
571+
let mut text = Box::new(unsafe { self.text.take().unwrap_unchecked() });
572+
text.end = [line, character];
573+
text.header.1 = offset;
563574

564-
// Empty
565-
if text.header.0 == text.header.1 && text.value.is_empty() {
566-
return;
567-
}
575+
// Empty
576+
if text.header.0 == text.header.1 && text.value.is_empty() {
577+
return;
578+
}
568579

569-
if self.events[Event::Text] && text.hydrate(self.source_ptr) {
570-
self.event_handler.handle_event(Event::Text, Entity::Text(&text));
571-
}
580+
let len = self.tags.len();
581+
// Store these only if we're interested in CloseTag events
582+
if len != 0 && self.events[Event::CloseTag] {
583+
self.tags[len - 1].text_nodes.push(*text.clone());
584+
}
572585

573-
let len = self.tags.len();
574-
// Store these only if we're interested in CloseTag events
575-
if len != 0 && self.events[Event::CloseTag] {
576-
self.tags[len - 1].text_nodes.push(text);
577-
}
586+
if self.events[Event::Text] && text.hydrate(self.source_ptr) {
587+
self.event_handler.handle_event(Event::Text, Entity::Text(&text));
588+
self.dispatched.push(Dispatched::Text(text));
578589
}
590+
579591
}
580592

581593
fn markup_decl(&mut self, gc: &mut GraphemeClusters, current: &[u8]) {
@@ -652,8 +664,10 @@ impl<'a> SAXParser<'a> {
652664
if len > 2 && &markup_slice[(len - 3)..] == b"-->" {
653665
markup_decl.end = [gc.line, gc.character];
654666
if self.events[Event::Comment] && markup_decl.hydrate(self.source_ptr) {
667+
let mut markup_decl = Box::new(self.markup_decl.take().unwrap());
655668
markup_decl.value.truncate(markup_decl.value.len() - 3); // remove '-->'
656669
self.event_handler.handle_event(Event::Comment, Entity::Text(&markup_decl));
670+
self.dispatched.push(Dispatched::Text(markup_decl));
657671
}
658672
self.markup_decl = None;
659673
self.state = State::BeginWhitespace;
@@ -674,10 +688,11 @@ impl<'a> SAXParser<'a> {
674688
if len > 2 && &markup_slice[(len - 3)..] == b"]]>" {
675689
markup_decl.end = [gc.line, gc.character];
676690
if self.events[Event::Cdata] && markup_decl.hydrate(self.source_ptr) {
691+
let mut markup_decl = Box::new(self.markup_decl.take().unwrap());
677692
markup_decl.value.truncate(markup_decl.value.len() - 3); // remove ]]>
678693
self.event_handler.handle_event(Event::Cdata, Entity::Text(&markup_decl));
694+
self.dispatched.push(Dispatched::Text(markup_decl));
679695
}
680-
self.markup_decl = None;
681696
self.state = State::BeginWhitespace;
682697
}
683698
}
@@ -697,13 +712,13 @@ impl<'a> SAXParser<'a> {
697712
fn doctype(&mut self, gc: &mut GraphemeClusters, current: &[u8]) {
698713
let mut byte = current[0];
699714

700-
let markup_decl = self.markup_decl.as_mut().unwrap();
701715
// determine where to stop taking bytes for
702716
// for the doctype value. e.g. '<!DOCTYPE movie ' <----- take 'movie' but not 'movie '
703717
if self.state != State::DoctypeEntity && !DOCTYPE_VALUE_END.contains(&byte) {
704718
if let Some((span, _)) = gc.take_until_one_found(DOCTYPE_VALUE_END, true) {
705719
byte = span[span.len() - 1];
706720
}
721+
let markup_decl = self.markup_decl.as_mut().unwrap();
707722
markup_decl.header.1 = gc.cursor;
708723
}
709724

@@ -724,13 +739,14 @@ impl<'a> SAXParser<'a> {
724739
}
725740

726741
if byte == b'>' {
742+
let mut markup_decl = Box::new(self.markup_decl.take().unwrap());
727743
markup_decl.end = [gc.line, gc.character];
728744
if self.events[Event::Doctype] && markup_decl.hydrate(self.source_ptr) {
729745
markup_decl.value.truncate(markup_decl.value.len() - 1); // remove '>' or '['
730746

731747
self.event_handler.handle_event(Event::Cdata, Entity::Text(&markup_decl));
748+
self.dispatched.push(Dispatched::Text(markup_decl));
732749
}
733-
self.markup_decl = None;
734750
self.state = State::BeginWhitespace;
735751
}
736752
}
@@ -745,14 +761,14 @@ impl<'a> SAXParser<'a> {
745761
}
746762

747763
if byte == b'>' {
748-
let markup_entity = self.markup_entity.as_mut().unwrap();
764+
let mut markup_entity = Box::new(self.markup_entity.take().unwrap());
749765
markup_entity.header.1 = gc.cursor - 1;
750766
markup_entity.end = [gc.line, gc.character.saturating_sub(1)];
751767

752768
if self.events[Event::Declaration] && markup_entity.hydrate(self.source_ptr) {
753769
self.event_handler.handle_event(Event::Cdata, Entity::Text(&markup_entity));
770+
self.dispatched.push(Dispatched::Text(markup_entity));
754771
}
755-
self.markup_entity = None;
756772
// if we have a markup_decl, we previously
757773
// were processing a doctype and encountered
758774
// entities and now need to complete the doctype
@@ -818,7 +834,7 @@ impl<'a> SAXParser<'a> {
818834

819835
fn process_proc_inst(&mut self, gc: &mut GraphemeClusters) {
820836
self.state = State::BeginWhitespace;
821-
let proc_inst = &mut self.proc_inst.take().unwrap();
837+
let mut proc_inst = Box::new(self.proc_inst.take().unwrap());
822838

823839
if self.events[Event::ProcessingInstruction] && proc_inst.hydrate(self.source_ptr) {
824840
proc_inst.end = [gc.line, gc.character];
@@ -827,6 +843,7 @@ impl<'a> SAXParser<'a> {
827843
proc_inst.target.value.drain(..2); // remove '<?'
828844
proc_inst.content.value.truncate(proc_inst.content.value.len().saturating_sub(2)); // remove '?>'
829845
self.event_handler.handle_event(Event::ProcessingInstruction, Entity::ProcInst(&proc_inst));
846+
self.dispatched.push(Dispatched::ProcInst(proc_inst));
830847
}
831848
}
832849

@@ -970,7 +987,9 @@ impl<'a> SAXParser<'a> {
970987
fn process_attribute(&mut self) {
971988
let mut attr = mem::replace(&mut self.attribute, Attribute::new());
972989
if self.events[Event::Attribute] && attr.hydrate(self.source_ptr) {
973-
self.event_handler.handle_event(Event::Attribute, Entity::Attribute(&attr));
990+
let attr_box = Box::new(attr.clone());
991+
self.event_handler.handle_event(Event::Attribute, Entity::Attribute(&attr_box));
992+
self.dispatched.push(Dispatched::Attribute(attr_box));
974993
}
975994
// Store them only if we're interested in Open and Close tag events
976995
if self.events[Event::OpenTag] || self.events[Event::CloseTag] {
@@ -985,17 +1004,11 @@ impl<'a> SAXParser<'a> {
9851004

9861005
if self.events[Event::OpenTag] {
9871006
tag.hydrate(self.source_ptr);
988-
self.event_handler.handle_event(Event::OpenTag, Entity::Tag(&tag));
989-
}
990-
991-
if self_closing && self.events[Event::CloseTag] {
992-
tag.hydrate(self.source_ptr);
993-
self.event_handler.handle_event(Event::CloseTag, Entity::Tag(&tag));
994-
}
995-
996-
if !self_closing {
997-
self.tags.push(tag);
1007+
let tag_box = Box::new(tag.clone());
1008+
self.event_handler.handle_event(Event::OpenTag, Entity::Tag(&tag_box));
1009+
self.dispatched.push(Dispatched::Tag(tag_box));
9981010
}
1011+
self.tags.push(tag);
9991012

10001013
self.state = State::BeginWhitespace;
10011014
}
@@ -1037,10 +1050,13 @@ impl<'a> SAXParser<'a> {
10371050
return;
10381051
}
10391052

1040-
for tag in self.tags.drain(tag_index..).rev() {
1041-
let mut tag = tag; // Create a mutable binding
1053+
let mut i = self.tags.len();
1054+
while i > tag_index {
1055+
let mut tag = Box::new(unsafe { self.tags.pop().unwrap_unchecked()});
10421056
tag.hydrate(self.source_ptr);
10431057
self.event_handler.handle_event(Event::CloseTag, Entity::Tag(&tag));
1058+
self.dispatched.push(Dispatched::Tag(tag));
1059+
i -= 1;
10441060
}
10451061
}
10461062

Diff for: src/sax/tag.rs

+7
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ pub enum Entity<'a> {
180180
Text(&'a Text),
181181
}
182182

183+
pub enum Dispatched {
184+
Attribute(Box<Attribute>),
185+
ProcInst(Box<ProcInst>),
186+
Tag(Box<Tag>),
187+
Text(Box<Text>),
188+
}
189+
183190
#[derive(Clone, Copy)]
184191
pub enum AttrType {
185192
Normal = 0x00,

0 commit comments

Comments
 (0)