Skip to content

Commit 59b6f03

Browse files
committed
improve NfseXmlParser
1 parent eaa7a81 commit 59b6f03

1 file changed

Lines changed: 32 additions & 24 deletions

File tree

src/Xml/NfseXmlParser.php

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,47 +9,55 @@ class NfseXmlParser
99
{
1010
public function parse(string $xml): NfseData
1111
{
12-
// Clean up the XML string
13-
$xml = trim($xml);
14-
15-
// Try to detect if the XML has double UTF-8 encoding
16-
// This happens when the SEFIN API returns XML that was already UTF-8 encoded
17-
// and then got encoded again during transmission
18-
$hasDoubleEncoding = $this->detectDoubleUtf8Encoding($xml);
19-
20-
if ($hasDoubleEncoding) {
21-
// Decode once to fix the double encoding
22-
$xml = mb_convert_encoding($xml, 'ISO-8859-1', 'UTF-8');
12+
// 1. Fix Encoding
13+
if (! mb_check_encoding($xml, 'UTF-8')) {
14+
$xml = mb_convert_encoding($xml, 'UTF-8', 'ISO-8859-1');
2315
}
2416

25-
// Load with proper encoding options
17+
// Remove invalid characters
18+
$xml = iconv('UTF-8', 'UTF-8//IGNORE', $xml);
19+
20+
// 2. Parse XML
21+
$useInternal = libxml_use_internal_errors(true);
2622
$simpleXml = simplexml_load_string(
2723
$xml,
2824
'SimpleXMLElement',
2925
LIBXML_NOCDATA | LIBXML_NOBLANKS
3026
);
3127

3228
if ($simpleXml === false) {
33-
throw new Exception('Failed to parse XML');
29+
$errors = libxml_get_errors();
30+
$errorMsg = $errors[0]->message ?? 'Failed to parse XML';
31+
libxml_clear_errors();
32+
libxml_use_internal_errors($useInternal);
33+
throw new Exception($errorMsg);
3434
}
35+
libxml_use_internal_errors($useInternal);
3536

36-
// Use JSON_UNESCAPED_UNICODE to preserve characters correctly
37+
// 3. Convert to Array via JSON (mimic vendor behavior)
3738
$json = json_encode($simpleXml, JSON_UNESCAPED_UNICODE);
3839
$parsedDoc = json_decode($json, true);
3940

41+
// 4. Sanitize Array (Fix [] -> null)
42+
$parsedDoc = $this->sanitizeArray($parsedDoc);
43+
4044
return new NfseData($parsedDoc);
4145
}
4246

43-
/**
44-
* Detect if the XML has double UTF-8 encoding
45-
*
46-
* This checks for the pattern where UTF-8 multi-byte characters are double-encoded
47-
* For example: "ç" (0xC3 0xA7) becomes "ç" (0xC3 0x83 0xC2 0xA7)
48-
*/
49-
private function detectDoubleUtf8Encoding(string $xml): bool
47+
private function sanitizeArray($data)
5048
{
51-
// Look for the double-encoding pattern: 0xC3 0x83 or 0xC3 0x82
52-
// This is a strong indicator of double UTF-8 encoding
53-
return preg_match('/\xC3[\x82\x83]/', $xml) === 1;
49+
if (! is_array($data)) {
50+
return $data;
51+
}
52+
53+
if (empty($data)) {
54+
return null;
55+
}
56+
57+
foreach ($data as $key => $value) {
58+
$data[$key] = $this->sanitizeArray($value);
59+
}
60+
61+
return $data;
5462
}
5563
}

0 commit comments

Comments
 (0)