Skip to content

Commit 959eb55

Browse files
authored
Merge pull request #780 from Mingun/end-attributes
Allow attributes in the `Event::End` and fix `.error_position()`
2 parents 0960333 + 6a48a28 commit 959eb55

18 files changed

+415
-449
lines changed

Changelog.md

+6
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@
1919

2020
- [#781]: Fix conditions to start CDATA section. Only uppercase `<![CDATA[` can start it.
2121
Previously any case was allowed.
22+
- [#780]: Fixed incorrect `.error_position()` when encountering syntax error for open or self-closed tag.
2223

2324
### Misc Changes
2425

26+
- [#780]: `reader::Parser`, `reader::ElementParser` and `reader::PiParser` moved to the new module `parser`.
27+
- [#776]: Allow to have attributes in the end tag for compatibility reasons with Adobe Flash XML parser.
28+
29+
[#776]: https://github.com/tafia/quick-xml/issues/776
30+
[#780]: https://github.com/tafia/quick-xml/pull/780
2531
[#781]: https://github.com/tafia/quick-xml/pull/781
2632

2733

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ loop {
4040
// when the input is a &str or a &[u8], we don't actually need to use another
4141
// buffer, we could directly call `reader.read_event()`
4242
match reader.read_event_into(&mut buf) {
43-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
43+
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
4444
// exits the loop when reaching end of file
4545
Ok(Event::Eof) => break,
4646

@@ -98,7 +98,7 @@ loop {
9898
Ok(Event::Eof) => break,
9999
// we can either move or borrow the event to write, depending on your use-case
100100
Ok(e) => assert!(writer.write_event(e).is_ok()),
101-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
101+
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
102102
}
103103
}
104104

examples/custom_entities.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
6868
);
6969
}
7070
Ok(Event::Eof) => break,
71-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
71+
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
7272
_ => (),
7373
}
7474
}

examples/read_buffered.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ fn main() -> Result<(), quick_xml::Error> {
2323
count += 1;
2424
}
2525
Ok(Event::Eof) => break, // exits the loop when reaching end of file
26-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
26+
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
2727
_ => (), // There are several other `Event`s we do not consider here
2828
}
2929
}

examples/read_texts.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn main() {
1818
println!("{:?}", txt);
1919
}
2020
Ok(Event::Eof) => break, // exits the loop when reaching end of file
21-
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
21+
Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
2222
_ => (), // There are several other `Event`s we do not consider here
2323
}
2424
}

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub mod errors;
5959
pub mod escape;
6060
pub mod events;
6161
pub mod name;
62+
pub mod parser;
6263
pub mod reader;
6364
#[cfg(feature = "serialize")]
6465
pub mod se;

src/reader/element.rs renamed to src/parser/element.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Contains a parser for an XML element.
22
33
use crate::errors::SyntaxError;
4-
use crate::reader::Parser;
4+
use crate::parser::Parser;
55

66
/// A parser that search a `>` symbol in the slice outside of quoted regions.
77
///
@@ -25,7 +25,7 @@ use crate::reader::Parser;
2525
///
2626
/// ```
2727
/// # use pretty_assertions::assert_eq;
28-
/// use quick_xml::reader::{ElementParser, Parser};
28+
/// use quick_xml::parser::{ElementParser, Parser};
2929
///
3030
/// let mut parser = ElementParser::default();
3131
///

src/parser/mod.rs

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//! Contains low-level parsers of different XML pieces.
2+
3+
use crate::errors::SyntaxError;
4+
5+
mod element;
6+
mod pi;
7+
8+
pub use element::ElementParser;
9+
pub use pi::PiParser;
10+
11+
/// Used to decouple reading of data from data source and parsing XML structure from it.
12+
/// This is a state preserved between getting chunks of bytes from the reader.
13+
///
14+
/// This trait is implemented for every parser that processes piece of XML grammar.
15+
pub trait Parser {
16+
/// Process new data and try to determine end of the parsed thing.
17+
///
18+
/// Returns position of the end of thing in `bytes` in case of successful search
19+
/// and `None` otherwise.
20+
///
21+
/// # Parameters
22+
/// - `bytes`: a slice to find the end of a thing.
23+
/// Should contain text in ASCII-compatible encoding
24+
fn feed(&mut self, bytes: &[u8]) -> Option<usize>;
25+
26+
/// Returns parse error produced by this parser in case of reaching end of
27+
/// input without finding the end of a parsed thing.
28+
fn eof_error() -> SyntaxError;
29+
}

src/reader/pi.rs renamed to src/parser/pi.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Contains a parser for an XML processing instruction.
22
33
use crate::errors::SyntaxError;
4-
use crate::reader::Parser;
4+
use crate::parser::Parser;
55

66
/// A parser that search a `?>` sequence in the slice.
77
///
@@ -19,7 +19,7 @@ use crate::reader::Parser;
1919
///
2020
/// ```
2121
/// # use pretty_assertions::assert_eq;
22-
/// use quick_xml::reader::{Parser, PiParser};
22+
/// use quick_xml::parser::{Parser, PiParser};
2323
///
2424
/// let mut parser = PiParser::default();
2525
///

src/reader/async_tokio.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@ use tokio::io::{self, AsyncBufRead, AsyncBufReadExt};
77
use crate::errors::{Error, Result, SyntaxError};
88
use crate::events::Event;
99
use crate::name::{QName, ResolveResult};
10+
use crate::parser::{ElementParser, Parser, PiParser};
1011
use crate::reader::buffered_reader::impl_buffered_source;
11-
use crate::reader::{
12-
BangType, ElementParser, NsReader, ParseState, Parser, PiParser, ReadTextResult, Reader, Span,
13-
};
12+
use crate::reader::{BangType, NsReader, ParseState, ReadTextResult, Reader, Span};
1413
use crate::utils::is_whitespace;
1514

1615
/// A struct for read XML asynchronously from an [`AsyncBufRead`].
@@ -59,7 +58,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
5958
/// match reader.read_event_into_async(&mut buf).await {
6059
/// Ok(Event::Start(_)) => count += 1,
6160
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
62-
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
61+
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
6362
/// Ok(Event::Eof) => break,
6463
/// _ => (),
6564
/// }

src/reader/buffered_reader.rs

+3-50
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use std::path::Path;
88
use crate::errors::{Error, Result};
99
use crate::events::Event;
1010
use crate::name::QName;
11-
use crate::reader::{BangType, Parser, ReadTextResult, Reader, Span, XmlSource};
11+
use crate::parser::Parser;
12+
use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource};
1213
use crate::utils::is_whitespace;
1314

1415
macro_rules! impl_buffered_source {
@@ -100,54 +101,6 @@ macro_rules! impl_buffered_source {
100101
ReadTextResult::UpToEof(&buf[start..])
101102
}
102103

103-
#[inline]
104-
$($async)? fn read_bytes_until $(<$lf>)? (
105-
&mut self,
106-
byte: u8,
107-
buf: &'b mut Vec<u8>,
108-
position: &mut u64,
109-
) -> io::Result<(&'b [u8], bool)> {
110-
// search byte must be within the ascii range
111-
debug_assert!(byte.is_ascii());
112-
113-
let mut read = 0;
114-
let start = buf.len();
115-
loop {
116-
let available = match self $(.$reader)? .fill_buf() $(.$await)? {
117-
Ok(n) if n.is_empty() => break,
118-
Ok(n) => n,
119-
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
120-
Err(e) => {
121-
*position += read;
122-
return Err(e);
123-
}
124-
};
125-
126-
match memchr::memchr(byte, available) {
127-
Some(i) => {
128-
buf.extend_from_slice(&available[..i]);
129-
130-
let used = i + 1;
131-
self $(.$reader)? .consume(used);
132-
read += used as u64;
133-
134-
*position += read;
135-
return Ok((&buf[start..], true));
136-
}
137-
None => {
138-
buf.extend_from_slice(available);
139-
140-
let used = available.len();
141-
self $(.$reader)? .consume(used);
142-
read += used as u64;
143-
}
144-
}
145-
}
146-
147-
*position += read;
148-
Ok((&buf[start..], false))
149-
}
150-
151104
#[inline]
152105
$($async)? fn read_with<$($lf,)? P: Parser>(
153106
&mut self,
@@ -327,7 +280,7 @@ impl<R: BufRead> Reader<R> {
327280
/// match reader.read_event_into(&mut buf) {
328281
/// Ok(Event::Start(_)) => count += 1,
329282
/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
330-
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
283+
/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
331284
/// Ok(Event::Eof) => break,
332285
/// _ => (),
333286
/// }

0 commit comments

Comments
 (0)