Skip to content

Commit 242e376

Browse files
authored
fix(#105): Regression in attribute handling (#107)
1 parent e9f7350 commit 242e376

File tree

4 files changed

+130
-8
lines changed

4 files changed

+130
-8
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ All parsers are tested using a large XML document (3 MB) containing a variety of
108108

109109
| Parser with Advanced Features | time/ms (lower is better)| JS | Runs in browser |
110110
|--------------------------------------------------------------------------------------------|-------------------------:|:------:|:---------------:|
111-
| [sax-wasm](https://github.com/justinwilaby/sax-wasm) | 18.54 |||
111+
| [sax-wasm](https://github.com/justinwilaby/sax-wasm) | 13.27 |||
112112
| [saxes](https://github.com/lddubeau/saxes) | 41.01 |||
113113
| [ltx(using Saxes as the parser)](https://github.com/xmppjs/ltx) | 44.56 |||
114114
| [sax-js](https://github.com/isaacs/sax-js) | 116.98 ||* |

lib/sax-wasm.wasm

322 Bytes
Binary file not shown.

src/js/__test__/attribute.spec.ts

+31
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,35 @@ describe('SaxWasm', () => {
159159
deepStrictEqual(_data[4].name.value, 'attribute2');
160160
deepStrictEqual(_data[4].value.value, 'value2');
161161
});
162+
163+
it('should correctly parse attribute with single character as name (no value)', () => {
164+
parser.write(Buffer.from(`<element attribute1='value1'a attribute3='value3'></element>`));
165+
deepStrictEqual(_event, SaxEventType.Attribute);
166+
deepStrictEqual(_data[0].name.value, 'attribute1');
167+
deepStrictEqual(_data[0].value.value, 'value1');
168+
deepStrictEqual(_data[1].name.value, 'a');
169+
deepStrictEqual(_data[1].value.value, '');
170+
deepStrictEqual(_data[2].name.value, 'attribute3');
171+
deepStrictEqual(_data[2].value.value, 'value3');
172+
});
173+
174+
it('should correctly parse attribute with single character as name (with value)', () => {
175+
parser.write(Buffer.from(`<element attribute1='value1'a="value2" attribute3='value3'></element>`));
176+
deepStrictEqual(_event, SaxEventType.Attribute);
177+
deepStrictEqual(_data[0].name.value, 'attribute1');
178+
deepStrictEqual(_data[0].value.value, 'value1');
179+
deepStrictEqual(_data[1].name.value, 'a');
180+
deepStrictEqual(_data[1].value.value, 'value2');
181+
deepStrictEqual(_data[2].name.value, 'attribute3');
182+
deepStrictEqual(_data[2].value.value, 'value3');
183+
});
184+
185+
it('should correctly parse unquoted attribute with following attribute', () => {
186+
parser.write(Buffer.from(`<element attribute1=value1 attribute2='value2'></element>`));
187+
deepStrictEqual(_event, SaxEventType.Attribute);
188+
deepStrictEqual(_data[0].name.value, 'attribute1');
189+
deepStrictEqual(_data[0].value.value, 'value1');
190+
deepStrictEqual(_data[1].name.value, 'attribute2');
191+
deepStrictEqual(_data[1].value.value, 'value2');
192+
});
162193
});

src/sax/parser.rs

+98-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ static TEXT_END: &[u8] = &[ b'<' , b'\n'];
1919

2020
/// Characters that indicate the end of
2121
/// an attribute name
22-
static ATTRIBUTE_NAME_END: &[u8] = &[b'=' , b'>' , b' ', b'\t'];
22+
static ATTRIBUTE_NAME_END: &[u8] = &[b'=' , b'>' , b' ', b'\t', b'\n'];
2323

2424
static ATTRIBUTE_VALUE_END: &[u8] = &[b' ', b'\t', b'>' , b'/'];
2525

@@ -493,10 +493,12 @@ impl<'a> SAXParser<'a> {
493493
let mut byte = current[0];
494494

495495
if !TAG_NAME_END.contains(&byte) {
496-
if let Some((span, _)) = gc.take_until_one_found(TAG_NAME_END, true) {
496+
if let Some((span, found)) = gc.take_until_one_found(TAG_NAME_END, true) {
497497
byte = span[span.len() - 1];
498+
self.tag.header.1 = if found { gc.last_cursor_pos } else { gc.cursor };
499+
} else {
500+
self.tag.header.1 = gc.last_cursor_pos;
498501
}
499-
self.tag.header.1 = gc.last_cursor_pos;
500502
}
501503

502504
if self.events[Event::OpenTagStart] {
@@ -870,6 +872,7 @@ impl<'a> SAXParser<'a> {
870872
self.attribute.name.start = [gc.line, gc.character.saturating_sub(1)];
871873
self.attribute.name.header.0 = gc.last_cursor_pos;
872874
self.state = State::AttribName;
875+
self.attribute_name(gc, current);
873876
}
874877
}
875878
}
@@ -878,6 +881,7 @@ impl<'a> SAXParser<'a> {
878881
match current[0] {
879882
b'=' => {
880883
self.attribute.name.end = [gc.line, gc.character.saturating_sub(1)];
884+
self.attribute.name.header.1 = gc.cursor.saturating_sub(1);
881885
self.state = State::AttribValue;
882886
}
883887
b'>' => {
@@ -938,8 +942,8 @@ impl<'a> SAXParser<'a> {
938942
self.attribute.attr_type = AttrType::JSX;
939943
self.brace_ct += 1;
940944
} else {
945+
self.attribute.value.header.0 = gc.last_cursor_pos;
941946
self.state = State::AttribValueUnquoted;
942-
gc.take_until_one_found(ATTRIBUTE_VALUE_END, false);
943947
}
944948
}
945949

@@ -966,22 +970,30 @@ impl<'a> SAXParser<'a> {
966970
} else {
967971
self.attribute.name.header.0 = gc.last_cursor_pos;
968972
self.state = State::AttribName;
973+
self.attribute_name(gc, current);
969974
}
970975
}
971976

972977
#[cold]
973978
fn attribute_value_unquoted(&mut self, gc: &mut GraphemeClusters, current: &[u8]) {
974-
let byte = current[0];
975-
if byte != b'>' || byte < 33 {
979+
let mut byte = current[0];
980+
if byte < 33 {
976981
return;
977982
}
983+
if let Some((span, found)) = gc.take_until_one_found(ATTRIBUTE_VALUE_END, true) {
984+
byte = span[span.len() - 1];
985+
self.attribute.value.header.1 = if found { gc.last_cursor_pos } else { gc.cursor };
986+
} else {
987+
self.attribute.value.header.1 = gc.last_cursor_pos;
988+
}
978989
self.attribute.value.end = [gc.line, gc.character.saturating_sub(1)];
979-
self.attribute.value.header.1 = gc.last_cursor_pos;
990+
980991
self.process_attribute();
981992
if byte == b'>' {
982993
self.process_open_tag(false, gc);
983994
} else {
984995
self.state = State::Attrib;
996+
self.attribute(gc, &[byte]);
985997
}
986998
}
987999

@@ -1248,6 +1260,85 @@ mod tests {
12481260
Ok(())
12491261
}
12501262
#[test]
1263+
fn test_attribute_single_character_boolean() -> Result<()> {
1264+
let event_handler = TextEventHandler::new();
1265+
let mut sax = SAXParser::new(&event_handler);
1266+
let mut events = [false; 10];
1267+
events[Event::Attribute] = true;
1268+
events[Event::CloseTag] = true;
1269+
events[Event::Text] = true;
1270+
sax.events = events;
1271+
let str = r#"<element attribute1='value1'a attribute3='value3'></element>"#;
1272+
1273+
sax.write(str.as_bytes());
1274+
sax.identity();
1275+
1276+
let attrs = event_handler.attributes.borrow();
1277+
let texts = event_handler.texts.borrow();
1278+
assert_eq!(attrs.len(), 3);
1279+
assert_eq!(attrs[0].name.value, b"attribute1");
1280+
assert_eq!(attrs[0].value.value, b"value1");
1281+
assert_eq!(attrs[1].name.value, b"a");
1282+
assert_eq!(attrs[1].value.value, b"");
1283+
assert_eq!(attrs[2].name.value, b"attribute3");
1284+
assert_eq!(attrs[2].value.value, b"value3");
1285+
assert_eq!(texts.len(), 0);
1286+
1287+
Ok(())
1288+
}
1289+
#[test]
1290+
fn test_attribute_unquoted() -> Result<()> {
1291+
let event_handler = TextEventHandler::new();
1292+
let mut sax = SAXParser::new(&event_handler);
1293+
let mut events = [false; 10];
1294+
events[Event::Attribute] = true;
1295+
events[Event::CloseTag] = true;
1296+
events[Event::Text] = true;
1297+
sax.events = events;
1298+
let str = r#"<element attribute1=value1 attribute2='value2'></element>"#;
1299+
1300+
sax.write(str.as_bytes());
1301+
sax.identity();
1302+
1303+
let attrs = event_handler.attributes.borrow();
1304+
let texts = event_handler.texts.borrow();
1305+
assert_eq!(attrs.len(), 2);
1306+
assert_eq!(attrs[0].name.value, b"attribute1");
1307+
assert_eq!(attrs[0].value.value, b"value1");
1308+
assert_eq!(attrs[1].name.value, b"attribute2");
1309+
assert_eq!(attrs[1].value.value, b"value2");
1310+
assert_eq!(texts.len(), 0);
1311+
1312+
Ok(())
1313+
}
1314+
#[test]
1315+
fn test_attribute_single_character() -> Result<()> {
1316+
let event_handler = TextEventHandler::new();
1317+
let mut sax = SAXParser::new(&event_handler);
1318+
let mut events = [false; 10];
1319+
events[Event::Attribute] = true;
1320+
events[Event::CloseTag] = true;
1321+
events[Event::Text] = true;
1322+
sax.events = events;
1323+
let str = r#"<element attribute1='value1'a="value2" attribute3='value3'></element>"#;
1324+
1325+
sax.write(str.as_bytes());
1326+
sax.identity();
1327+
1328+
let attrs = event_handler.attributes.borrow();
1329+
let texts = event_handler.texts.borrow();
1330+
assert_eq!(attrs.len(), 3);
1331+
assert_eq!(attrs[0].name.value, b"attribute1");
1332+
assert_eq!(attrs[0].value.value, b"value1");
1333+
assert_eq!(attrs[1].name.value, b"a");
1334+
assert_eq!(attrs[1].value.value, b"value2");
1335+
assert_eq!(attrs[2].name.value, b"attribute3");
1336+
assert_eq!(attrs[2].value.value, b"value3");
1337+
assert_eq!(texts.len(), 0);
1338+
1339+
Ok(())
1340+
}
1341+
#[test]
12511342
fn test_empty_tag() -> Result<()> {
12521343
let event_handler = TextEventHandler::new();
12531344
let mut sax = SAXParser::new(&event_handler);

0 commit comments

Comments
 (0)