@@ -345,18 +345,26 @@ macro_rules! read_until_close {
345
345
}
346
346
} ,
347
347
// `</` - closing tag
348
+ // #776: We parse using ElementParser which allows us to have attributes
349
+ // in close tags. While such tags are not allowed by the specification,
350
+ // we anyway allow to parse them because:
351
+ // - we do not check constraints during parsing. This is performed by the
352
+ // optional validate step which user should call manually
353
+ // - if we just look for `>` we will parse `</tag attr=">" >` as end tag
354
+ // `</tag attr=">` and text `" >` which probably no one existing parser
355
+ // does. This is malformed XML, however it is tolerated by some parsers
356
+ // (e.g. the one used by Adobe Flash) and such documents do exist in the wild.
348
357
Ok ( Some ( b'/' ) ) => match $reader
349
- . read_bytes_until ( b'>' , $buf, & mut $self. state. offset)
358
+ . read_with ( ElementParser :: Outside , $buf, & mut $self. state. offset)
350
359
$( . $await) ?
351
360
{
352
- Ok ( ( bytes, true ) ) => $self. state. emit_end( bytes) ,
353
- Ok ( ( _ , false ) ) => {
361
+ Ok ( bytes) => $self. state. emit_end( bytes) ,
362
+ Err ( e ) => {
354
363
// We want to report error at `<`, but offset was increased,
355
364
// so return it back (-1 for `<`)
356
365
$self. state. last_error_offset = start - 1 ;
357
- Err ( Error :: Syntax ( SyntaxError :: UnclosedTag ) )
366
+ Err ( e )
358
367
}
359
- Err ( e) => Err ( Error :: Io ( e. into( ) ) ) ,
360
368
} ,
361
369
// `<?` - processing instruction
362
370
Ok ( Some ( b'?' ) ) => match $reader
@@ -824,39 +832,6 @@ trait XmlSource<'r, B> {
824
832
/// [events]: crate::events::Event
825
833
fn read_text ( & mut self , buf : B , position : & mut u64 ) -> ReadTextResult < ' r , B > ;
826
834
827
- /// Read input until `byte` is found or end of input is reached.
828
- ///
829
- /// Returns a slice of data read up to `byte` (exclusive),
830
- /// and a flag noting whether `byte` was found in the input or not.
831
- ///
832
- /// # Example
833
- ///
834
- /// ```ignore
835
- /// let mut position = 0;
836
- /// let mut input = b"abc*def".as_ref();
837
- /// // ^= 4
838
- ///
839
- /// assert_eq!(
840
- /// input.read_bytes_until(b'*', (), &mut position).unwrap(),
841
- /// (b"abc".as_ref(), true)
842
- /// );
843
- /// assert_eq!(position, 4); // position after the symbol matched
844
- /// ```
845
- ///
846
- /// # Parameters
847
- /// - `byte`: Byte for search
848
- /// - `buf`: Buffer that could be filled from an input (`Self`) and
849
- /// from which [events] could borrow their data
850
- /// - `position`: Will be increased by amount of bytes consumed
851
- ///
852
- /// [events]: crate::events::Event
853
- fn read_bytes_until (
854
- & mut self ,
855
- byte : u8 ,
856
- buf : B ,
857
- position : & mut u64 ,
858
- ) -> io:: Result < ( & ' r [ u8 ] , bool ) > ;
859
-
860
835
/// Read input until processing instruction is finished.
861
836
///
862
837
/// This method expect that start sequence of a parser already was read.
@@ -1022,115 +997,6 @@ mod test {
1022
997
$buf: expr
1023
998
$( , $async: ident, $await: ident) ?
1024
999
) => {
1025
- mod read_bytes_until {
1026
- use super :: * ;
1027
- // Use Bytes for printing bytes as strings for ASCII range
1028
- use crate :: utils:: Bytes ;
1029
- use pretty_assertions:: assert_eq;
1030
-
1031
- /// Checks that search in the empty buffer returns `None`
1032
- #[ $test]
1033
- $( $async) ? fn empty( ) {
1034
- let buf = $buf;
1035
- let mut position = 0 ;
1036
- let mut input = b"" . as_ref( ) ;
1037
- // ^= 0
1038
-
1039
- let ( bytes, found) = $source( & mut input)
1040
- . read_bytes_until( b'*' , buf, & mut position)
1041
- $( . $await) ?
1042
- . unwrap( ) ;
1043
- assert_eq!(
1044
- ( Bytes ( bytes) , found) ,
1045
- ( Bytes ( b"" ) , false )
1046
- ) ;
1047
- assert_eq!( position, 0 ) ;
1048
- }
1049
-
1050
- /// Checks that search in the buffer non-existent value returns entire buffer
1051
- /// as a result and set `position` to `len()`
1052
- #[ $test]
1053
- $( $async) ? fn non_existent( ) {
1054
- let buf = $buf;
1055
- let mut position = 0 ;
1056
- let mut input = b"abcdef" . as_ref( ) ;
1057
- // ^= 6
1058
-
1059
- let ( bytes, found) = $source( & mut input)
1060
- . read_bytes_until( b'*' , buf, & mut position)
1061
- $( . $await) ?
1062
- . unwrap( ) ;
1063
- assert_eq!(
1064
- ( Bytes ( bytes) , found) ,
1065
- ( Bytes ( b"abcdef" ) , false )
1066
- ) ;
1067
- assert_eq!( position, 6 ) ;
1068
- }
1069
-
1070
- /// Checks that search in the buffer an element that is located in the front of
1071
- /// buffer returns empty slice as a result and set `position` to one symbol
1072
- /// after match (`1`)
1073
- #[ $test]
1074
- $( $async) ? fn at_the_start( ) {
1075
- let buf = $buf;
1076
- let mut position = 0 ;
1077
- let mut input = b"*abcdef" . as_ref( ) ;
1078
- // ^= 1
1079
-
1080
- let ( bytes, found) = $source( & mut input)
1081
- . read_bytes_until( b'*' , buf, & mut position)
1082
- $( . $await) ?
1083
- . unwrap( ) ;
1084
- assert_eq!(
1085
- ( Bytes ( bytes) , found) ,
1086
- ( Bytes ( b"" ) , true )
1087
- ) ;
1088
- assert_eq!( position, 1 ) ; // position after the symbol matched
1089
- }
1090
-
1091
- /// Checks that search in the buffer an element that is located in the middle of
1092
- /// buffer returns slice before that symbol as a result and set `position` to one
1093
- /// symbol after match
1094
- #[ $test]
1095
- $( $async) ? fn inside( ) {
1096
- let buf = $buf;
1097
- let mut position = 0 ;
1098
- let mut input = b"abc*def" . as_ref( ) ;
1099
- // ^= 4
1100
-
1101
- let ( bytes, found) = $source( & mut input)
1102
- . read_bytes_until( b'*' , buf, & mut position)
1103
- $( . $await) ?
1104
- . unwrap( ) ;
1105
- assert_eq!(
1106
- ( Bytes ( bytes) , found) ,
1107
- ( Bytes ( b"abc" ) , true )
1108
- ) ;
1109
- assert_eq!( position, 4 ) ; // position after the symbol matched
1110
- }
1111
-
1112
- /// Checks that search in the buffer an element that is located in the end of
1113
- /// buffer returns slice before that symbol as a result and set `position` to one
1114
- /// symbol after match (`len()`)
1115
- #[ $test]
1116
- $( $async) ? fn in_the_end( ) {
1117
- let buf = $buf;
1118
- let mut position = 0 ;
1119
- let mut input = b"abcdef*" . as_ref( ) ;
1120
- // ^= 7
1121
-
1122
- let ( bytes, found) = $source( & mut input)
1123
- . read_bytes_until( b'*' , buf, & mut position)
1124
- $( . $await) ?
1125
- . unwrap( ) ;
1126
- assert_eq!(
1127
- ( Bytes ( bytes) , found) ,
1128
- ( Bytes ( b"abcdef" ) , true )
1129
- ) ;
1130
- assert_eq!( position, 7 ) ; // position after the symbol matched
1131
- }
1132
- }
1133
-
1134
1000
mod read_bang_element {
1135
1001
use super :: * ;
1136
1002
use crate :: errors:: { Error , SyntaxError } ;
@@ -1693,6 +1559,81 @@ mod test {
1693
1559
assert_eq!( position, 42 ) ;
1694
1560
}
1695
1561
}
1562
+
1563
+ mod close {
1564
+ use super :: * ;
1565
+ use pretty_assertions:: assert_eq;
1566
+
1567
+ #[ $test]
1568
+ $( $async) ? fn empty_tag( ) {
1569
+ let buf = $buf;
1570
+ let mut position = 1 ;
1571
+ let mut input = b"/ >" . as_ref( ) ;
1572
+ // ^= 4
1573
+
1574
+ assert_eq!(
1575
+ Bytes ( $source( & mut input) . read_with( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1576
+ Bytes ( b"/ " )
1577
+ ) ;
1578
+ assert_eq!( position, 4 ) ;
1579
+ }
1580
+
1581
+ #[ $test]
1582
+ $( $async) ? fn normal( ) {
1583
+ let buf = $buf;
1584
+ let mut position = 1 ;
1585
+ let mut input = b"/tag>" . as_ref( ) ;
1586
+ // ^= 6
1587
+
1588
+ assert_eq!(
1589
+ Bytes ( $source( & mut input) . read_with( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1590
+ Bytes ( b"/tag" )
1591
+ ) ;
1592
+ assert_eq!( position, 6 ) ;
1593
+ }
1594
+
1595
+ #[ $test]
1596
+ $( $async) ? fn empty_ns_empty_tag( ) {
1597
+ let buf = $buf;
1598
+ let mut position = 1 ;
1599
+ let mut input = b"/:>" . as_ref( ) ;
1600
+ // ^= 4
1601
+
1602
+ assert_eq!(
1603
+ Bytes ( $source( & mut input) . read_with( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1604
+ Bytes ( b"/:" )
1605
+ ) ;
1606
+ assert_eq!( position, 4 ) ;
1607
+ }
1608
+
1609
+ #[ $test]
1610
+ $( $async) ? fn empty_ns( ) {
1611
+ let buf = $buf;
1612
+ let mut position = 1 ;
1613
+ let mut input = b"/:tag>" . as_ref( ) ;
1614
+ // ^= 7
1615
+
1616
+ assert_eq!(
1617
+ Bytes ( $source( & mut input) . read_with( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1618
+ Bytes ( b"/:tag" )
1619
+ ) ;
1620
+ assert_eq!( position, 7 ) ;
1621
+ }
1622
+
1623
+ #[ $test]
1624
+ $( $async) ? fn with_attributes( ) {
1625
+ let buf = $buf;
1626
+ let mut position = 1 ;
1627
+ let mut input = br#"/tag attr-1=">" attr2 = '>' 3attr>"# . as_ref( ) ;
1628
+ // ^= 40
1629
+
1630
+ assert_eq!(
1631
+ Bytes ( $source( & mut input) . read_with( ElementParser :: default ( ) , buf, & mut position) $( . $await) ? . unwrap( ) ) ,
1632
+ Bytes ( br#"/tag attr-1=">" attr2 = '>' 3attr"# )
1633
+ ) ;
1634
+ assert_eq!( position, 40 ) ;
1635
+ }
1636
+ }
1696
1637
}
1697
1638
1698
1639
/// Ensures, that no empty `Text` events are generated
0 commit comments