Skip to content

Commit 3928c8a

Browse files
authored
Merge pull request grokability#16376 from uberbrady/improve_safety_csv_charset_detection
Add some safeties around the charset-detection and transliteration
2 parents 23ce54e + 646e3e8 commit 3928c8a

File tree

3 files changed

+50
-13
lines changed

3 files changed

+50
-13
lines changed

app/Http/Controllers/Api/ImportController.php

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,41 @@ public function store() : JsonResponse
6666
if (! ini_get('auto_detect_line_endings')) {
6767
ini_set('auto_detect_line_endings', '1');
6868
}
69-
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
70-
$encoding = $detector->getEncoding($file_contents);
71-
$reader = null;
72-
if (strcasecmp($encoding, 'UTF-8') != 0) {
73-
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
74-
if ($transliterated !== false) {
75-
$tmpname = tempnam(sys_get_temp_dir(), '');
76-
$tmpresults = file_put_contents($tmpname, $transliterated);
77-
if ($tmpresults !== false) {
69+
if (function_exists('iconv')) {
70+
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
71+
$encoding = $detector->getEncoding($file_contents);
72+
\Log::warning("Discovered encoding: $encoding in uploaded CSV");
73+
$reader = null;
74+
if (strcasecmp($encoding, 'UTF-8') != 0) {
75+
$transliterated = false;
76+
try {
77+
$transliterated = iconv(strtoupper($encoding), 'UTF-8', $file_contents);
78+
} catch (\Exception $e) {
79+
$transliterated = false; //blank out the partially-decoded string
80+
return response()->json(
81+
Helper::formatStandardApiResponse(
82+
'error',
83+
null,
84+
trans('admin/hardware/message.import.transliterate_failure', ["encoding" => $encoding])
85+
),
86+
422
87+
);
88+
}
89+
if ($transliterated !== false) {
90+
$tmpname = tempnam(sys_get_temp_dir(), '');
91+
$tmpresults = file_put_contents($tmpname, $transliterated);
7892
$transliterated = null; //save on memory?
79-
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
80-
if ($newfile->isValid()) {
81-
$file = $newfile;
93+
if ($tmpresults !== false) {
94+
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
95+
if ($newfile->isValid()) {
96+
$file = $newfile;
97+
}
8298
}
8399
}
84100
}
101+
$file_contents = null; //try to save on memory, I guess?
85102
}
86103
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
87-
$file_contents = null; //try to save on memory, I guess?
88104
89105
try {
90106
$import->header_row = $reader->fetchOne(0);

resources/lang/en-US/admin/hardware/message.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
'file_already_deleted' => 'The file selected was already deleted',
6767
'header_row_has_malformed_characters' => 'One or more attributes in the header row contain malformed UTF-8 characters',
6868
'content_row_has_malformed_characters' => 'One or more attributes in the first row of content contain malformed UTF-8 characters',
69+
'transliterate_failure' => 'Transliteration from :encoding to UTF-8 failed due to invalid characters in input'
6970
],
7071

7172

tests/Feature/Importing/Ui/ImportTest.php

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,24 @@ public function testStoreInternationalAsset(): void
4545
]);
4646
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
4747
}
48+
49+
public function testStoreInternationalAssetMisparse(): void
50+
{
51+
$evil_maker = function ($arr) {
52+
$results = '';
53+
foreach ($arr as $thing) {
54+
$results .= chr($thing);
55+
}
56+
return $results;
57+
};
58+
59+
// 0xC0 makes it 'not unicode', and 0xFF makes it 'likely WINDOWS-1251', and 0x98 at the end makes it 'not-valid-Windows-1251'
60+
$evil_content = $evil_maker([0xC0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x98]);
61+
62+
$this->actingAsForApi(User::factory()->superuser()->create());
63+
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $evil_content)]])
64+
->assertStatus(422)
65+
->assertStatusMessageIs('error')
66+
->assertMessagesAre(trans('admin/hardware/message.import.transliterate_failure', ["encoding" => "windows-1251"]));
67+
}
4868
}

0 commit comments

Comments
 (0)