Skip to content
This repository was archived by the owner on May 26, 2022. It is now read-only.

Commit b69e280

Browse files
committed
Merge pull request #187 from skeleton/issue-183
Fix line breaks on CSV reader
2 parents e321f30 + d6e8fe4 commit b69e280

File tree

5 files changed

+51
-50
lines changed

5 files changed

+51
-50
lines changed

src/Spout/Common/Helper/GlobalFunctionsHelper.php

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -291,35 +291,6 @@ public function stream_get_wrappers()
291291
return stream_get_wrappers();
292292
}
293293

294-
/**
295-
* Wrapper around global function stream_get_line()
296-
* @see stream_get_line()
297-
*
298-
* @param resource $handle
299-
* @param int $length
300-
* @param string|void $ending
301-
* @return string|bool
302-
*/
303-
public function stream_get_line($handle, $length, $ending = null)
304-
{
305-
return stream_get_line($handle, $length, $ending);
306-
}
307-
308-
/**
309-
* Wrapper around global function str_getcsv()
310-
* @see str_getcsv()
311-
*
312-
* @param string $input
313-
* @param string|void $delimiter
314-
* @param string|void $enclosure
315-
* @param string|void $escape
316-
* @return array
317-
*/
318-
public function str_getcsv($input, $delimiter = null, $enclosure = null, $escape = null)
319-
{
320-
return str_getcsv($input, $delimiter, $enclosure, $escape);
321-
}
322-
323294
/**
324295
* Wrapper around global function function_exists()
325296
* @see function_exists()

src/Spout/Reader/CSV/Reader.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ class Reader extends AbstractReader
3232
/** @var string Defines the End of line */
3333
protected $endOfLineCharacter = "\n";
3434

35+
/** @var string */
36+
protected $autoDetectLineEndings;
37+
3538
/**
3639
* Sets the field delimiter for the CSV.
3740
* Needs to be called before opening the reader.
@@ -104,6 +107,9 @@ protected function doesSupportStreamWrapper()
104107
*/
105108
protected function openReader($filePath)
106109
{
110+
$this->autoDetectLineEndings = ini_get('auto_detect_line_endings');
111+
ini_set('auto_detect_line_endings', '1');
112+
107113
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
108114
if (!$this->filePointer) {
109115
throw new IOException("Could not open file $filePath for reading.");
@@ -140,5 +146,7 @@ protected function closeReader()
140146
if ($this->filePointer) {
141147
$this->globalFunctionsHelper->fclose($this->filePointer);
142148
}
149+
150+
ini_set('auto_detect_line_endings', $this->autoDetectLineEndings);
143151
}
144152
}

src/Spout/Reader/CSV/RowIterator.php

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
class RowIterator implements IteratorInterface
1515
{
1616
/**
17-
* If no value is given to stream_get_line(), it defaults to 8192 (which may be too low).
17+
* If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
1818
* Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
1919
*/
2020
const MAX_READ_BYTES_PER_LINE = 32768;
@@ -128,16 +128,12 @@ public function next()
128128
}
129129

130130
do {
131-
$lineData = false;
132-
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
133-
if ($utf8EncodedLineData !== false) {
134-
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
135-
}
131+
$rowData = $this->getNextUTF8EncodedRow();
136132
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
137-
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
133+
} while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
138134

139-
if ($lineData !== false) {
140-
$this->rowDataBuffer = $lineData;
135+
if ($rowData !== false) {
136+
$this->rowDataBuffer = $rowData;
141137
$this->numReadRows++;
142138
} else {
143139
// If we reach this point, it means end of file was reached.
@@ -147,24 +143,39 @@ public function next()
147143
}
148144

149145
/**
150-
* Returns the next line, converted if necessary to UTF-8.
151-
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
146+
* Returns the next row, converted if necessary to UTF-8.
147+
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
148+
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
152149
*
153-
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
150+
* @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
154151
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
155152
*/
156-
protected function getNextUTF8EncodedLine()
153+
protected function getNextUTF8EncodedRow()
157154
{
158-
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
159-
$encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
160-
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $encodedEOLDelimiter);
155+
$encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
156+
if (false === $encodedRowData) {
157+
return false;
158+
}
161159

162-
// If the line could have been read, it can be converted to UTF-8
163-
$utf8EncodedLineData = ($encodedLineData !== false) ?
164-
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
165-
false;
160+
foreach ($encodedRowData as $cellIndex => $cellValue) {
161+
switch($this->encoding) {
162+
case EncodingHelper::ENCODING_UTF16_LE:
163+
case EncodingHelper::ENCODING_UTF32_LE:
164+
// remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
165+
$cellValue = ltrim($cellValue);
166+
break;
167+
168+
case EncodingHelper::ENCODING_UTF16_BE:
169+
case EncodingHelper::ENCODING_UTF32_BE:
170+
// remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
171+
$cellValue = rtrim($cellValue);
172+
break;
173+
}
174+
175+
$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
176+
}
166177

167-
return $utf8EncodedLineData;
178+
return $encodedRowData;
168179
}
169180

170181
/**

tests/Spout/Reader/CSV/ReaderTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,15 @@ public function testReadShouldSupportCustomFieldEnclosure()
204204
$this->assertEquals('This is, a comma', $allRows[0][0]);
205205
}
206206

207+
/**
208+
* @return void
209+
*/
210+
public function testReadShouldNotTruncateLineBreak()
211+
{
212+
$allRows = $this->getAllRowsForFile('csv_with_line_breaks.csv', ',');
213+
$this->assertEquals("This is,\na comma", $allRows[0][0]);
214+
}
215+
207216
/**
208217
* @return array
209218
*/
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"This is,
2+
a comma",csv--12

0 commit comments

Comments
 (0)