1414class RowIterator implements IteratorInterface
1515{
1616 /**
17- * If no value is given to stream_get_line (), it defaults to 8192 (which may be too low).
17+ * If no value is given to fgetcsv (), it defaults to 8192 (which may be too low).
1818 * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
1919 */
2020 const MAX_READ_BYTES_PER_LINE = 32768 ;
@@ -128,16 +128,12 @@ public function next()
128128 }
129129
130130 do {
131- $ lineData = false ;
132- $ utf8EncodedLineData = $ this ->getNextUTF8EncodedLine ();
133- if ($ utf8EncodedLineData !== false ) {
134- $ lineData = $ this ->globalFunctionsHelper ->str_getcsv ($ utf8EncodedLineData , $ this ->fieldDelimiter , $ this ->fieldEnclosure );
135- }
131+ $ rowData = $ this ->getNextUTF8EncodedRow ();
136132 $ hasNowReachedEndOfFile = $ this ->globalFunctionsHelper ->feof ($ this ->filePointer );
137- } while (($ lineData === false && !$ hasNowReachedEndOfFile ) || $ this ->isEmptyLine ($ lineData ));
133+ } while (($ rowData === false && !$ hasNowReachedEndOfFile ) || $ this ->isEmptyLine ($ rowData ));
138134
139- if ($ lineData !== false ) {
140- $ this ->rowDataBuffer = $ lineData ;
135+ if ($ rowData !== false ) {
136+ $ this ->rowDataBuffer = $ rowData ;
141137 $ this ->numReadRows ++;
142138 } else {
143139 // If we reach this point, it means end of file was reached.
@@ -147,24 +143,39 @@ public function next()
147143 }
148144
149145 /**
150- * Returns the next line, converted if necessary to UTF-8.
151- * Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
146+ * Returns the next row, converted if necessary to UTF-8.
147+ * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
148+ * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
152149 *
153- * @return string |false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
150+ * @return array |false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
154151 * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
155152 */
156- protected function getNextUTF8EncodedLine ()
153+ protected function getNextUTF8EncodedRow ()
157154 {
158- // Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
159- $ encodedEOLDelimiter = $ this ->getEncodedEOLDelimiter ();
160- $ encodedLineData = $ this ->globalFunctionsHelper ->stream_get_line ($ this ->filePointer , self ::MAX_READ_BYTES_PER_LINE , $ encodedEOLDelimiter );
155+ $ encodedRowData = fgetcsv ($ this ->filePointer , self ::MAX_READ_BYTES_PER_LINE , $ this ->fieldDelimiter , $ this ->fieldEnclosure );
156+ if (false === $ encodedRowData ) {
157+ return false ;
158+ }
161159
162- // If the line could have been read, it can be converted to UTF-8
163- $ utf8EncodedLineData = ($ encodedLineData !== false ) ?
164- $ this ->encodingHelper ->attemptConversionToUTF8 ($ encodedLineData , $ this ->encoding ) :
165- false ;
160+ foreach ($ encodedRowData as $ cellIndex => $ cellValue ) {
161+ switch ($ this ->encoding ) {
162+ case EncodingHelper::ENCODING_UTF16_LE :
163+ case EncodingHelper::ENCODING_UTF32_LE :
164+ // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
165+ $ cellValue = ltrim ($ cellValue );
166+ break ;
167+
168+ case EncodingHelper::ENCODING_UTF16_BE :
169+ case EncodingHelper::ENCODING_UTF32_BE :
170+ // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
171+ $ cellValue = rtrim ($ cellValue );
172+ break ;
173+ }
174+
175+ $ encodedRowData [$ cellIndex ] = $ this ->encodingHelper ->attemptConversionToUTF8 ($ cellValue , $ this ->encoding );
176+ }
166177
167- return $ utf8EncodedLineData ;
178+ return $ encodedRowData ;
168179 }
169180
170181 /**
0 commit comments