@@ -9,47 +9,55 @@ class NfseXmlParser
99{
1010 public function parse (string $ xml ): NfseData
1111 {
12- // Clean up the XML string
13- $ xml = trim ($ xml );
14-
15- // Try to detect if the XML has double UTF-8 encoding
16- // This happens when the SEFIN API returns XML that was already UTF-8 encoded
17- // and then got encoded again during transmission
18- $ hasDoubleEncoding = $ this ->detectDoubleUtf8Encoding ($ xml );
19-
20- if ($ hasDoubleEncoding ) {
21- // Decode once to fix the double encoding
22- $ xml = mb_convert_encoding ($ xml , 'ISO-8859-1 ' , 'UTF-8 ' );
12+ // 1. Fix Encoding
13+ if (! mb_check_encoding ($ xml , 'UTF-8 ' )) {
14+ $ xml = mb_convert_encoding ($ xml , 'UTF-8 ' , 'ISO-8859-1 ' );
2315 }
2416
25- // Load with proper encoding options
17+ // Remove invalid characters
18+ $ xml = iconv ('UTF-8 ' , 'UTF-8//IGNORE ' , $ xml );
19+
20+ // 2. Parse XML
21+ $ useInternal = libxml_use_internal_errors (true );
2622 $ simpleXml = simplexml_load_string (
2723 $ xml ,
2824 'SimpleXMLElement ' ,
2925 LIBXML_NOCDATA | LIBXML_NOBLANKS
3026 );
3127
3228 if ($ simpleXml === false ) {
33- throw new Exception ('Failed to parse XML ' );
29+ $ errors = libxml_get_errors ();
30+ $ errorMsg = $ errors [0 ]->message ?? 'Failed to parse XML ' ;
31+ libxml_clear_errors ();
32+ libxml_use_internal_errors ($ useInternal );
33+ throw new Exception ($ errorMsg );
3434 }
35+ libxml_use_internal_errors ($ useInternal );
3536
36- // Use JSON_UNESCAPED_UNICODE to preserve characters correctly
37+ // 3. Convert to Array via JSON (mimic vendor behavior)
3738 $ json = json_encode ($ simpleXml , JSON_UNESCAPED_UNICODE );
3839 $ parsedDoc = json_decode ($ json , true );
3940
41+ // 4. Sanitize Array (Fix [] -> null)
42+ $ parsedDoc = $ this ->sanitizeArray ($ parsedDoc );
43+
4044 return new NfseData ($ parsedDoc );
4145 }
4246
43- /**
44- * Detect if the XML has double UTF-8 encoding
45- *
46- * This checks for the pattern where UTF-8 multi-byte characters are double-encoded
47- * For example: "ç" (0xC3 0xA7) becomes "ç" (0xC3 0x83 0xC2 0xA7)
48- */
49- private function detectDoubleUtf8Encoding (string $ xml ): bool
47+ private function sanitizeArray ($ data )
5048 {
51- // Look for the double-encoding pattern: 0xC3 0x83 or 0xC3 0x82
52- // This is a strong indicator of double UTF-8 encoding
53- return preg_match ('/\xC3[\x82\x83]/ ' , $ xml ) === 1 ;
49+ if (! is_array ($ data )) {
50+ return $ data ;
51+ }
52+
53+ if (empty ($ data )) {
54+ return null ;
55+ }
56+
57+ foreach ($ data as $ key => $ value ) {
58+ $ data [$ key ] = $ this ->sanitizeArray ($ value );
59+ }
60+
61+ return $ data ;
5462 }
5563}
0 commit comments