Skip to content

Commit 446c7fb

Browse files
authored
Merge pull request #16096 from uberbrady/detect_csv_encodings_v2
2 parents 327491c + bbb2af7 commit 446c7fb

File tree

5 files changed

+131
-4
lines changed

5 files changed

+131
-4
lines changed

app/Http/Controllers/Api/ImportController.php

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
use App\Models\Asset;
1010
use App\Models\Company;
1111
use App\Models\Import;
12+
use Illuminate\Http\UploadedFile;
1213
use Illuminate\Support\Facades\Artisan;
1314
use Illuminate\Database\Eloquent\JsonEncodingException;
1415
use Illuminate\Support\Facades\Request;
1516
use Illuminate\Support\Facades\Session;
1617
use Illuminate\Support\Facades\Storage;
1718
use League\Csv\Reader;
19+
use Onnov\DetectEncoding\EncodingDetector;
1820
use Symfony\Component\HttpFoundation\File\Exception\FileException;
1921
use Illuminate\Support\Facades\Log;
2022
use Illuminate\Http\JsonResponse;
@@ -45,6 +47,8 @@ public function store() : JsonResponse
4547
$path = config('app.private_uploads').'/imports';
4648
$results = [];
4749
$import = new Import;
50+
$detector = new EncodingDetector();
51+
4852
foreach ($files as $file) {
4953
if (! in_array($file->getMimeType(), [
5054
'application/vnd.ms-excel',
@@ -55,15 +59,32 @@ public function store() : JsonResponse
5559
'text/comma-separated-values',
5660
'text/tsv', ])) {
5761
$results['error'] = 'File type must be CSV. Uploaded file is '.$file->getMimeType();
58-
5962
return response()->json(Helper::formatStandardApiResponse('error', null, $results['error']), 422);
6063
}
6164

6265
//TODO: is there a lighter way to do this?
6366
if (! ini_get('auto_detect_line_endings')) {
6467
ini_set('auto_detect_line_endings', '1');
6568
}
69+
$file_contents = $file->getContent(); //TODO - this *does* load the whole file in RAM, but we need that to be able to 'iconv' it?
70+
$encoding = $detector->getEncoding($file_contents);
71+
$reader = null;
72+
if (strcasecmp($encoding, 'UTF-8') != 0) {
73+
$transliterated = iconv($encoding, 'UTF-8', $file_contents);
74+
if ($transliterated !== false) {
75+
$tmpname = tempnam(sys_get_temp_dir(), '');
76+
$tmpresults = file_put_contents($tmpname, $transliterated);
77+
if ($tmpresults !== false) {
78+
$transliterated = null; //save on memory?
79+
$newfile = new UploadedFile($tmpname, $file->getClientOriginalName(), null, null, true); //WARNING: this is enabling 'test mode' - which is gross, but otherwise the file won't be treated as 'uploaded'
80+
if ($newfile->isValid()) {
81+
$file = $newfile;
82+
}
83+
}
84+
}
85+
}
6686
$reader = Reader::createFromFileObject($file->openFile('r')); //file pointer leak?
87+
$file_contents = null; //try to save on memory, I guess?
6788
6889
try {
6990
$import->header_row = $reader->fetchOne(0);

composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"php": "^8.1",
2121
"ext-curl": "*",
2222
"ext-fileinfo": "*",
23+
"ext-iconv": "*",
2324
"ext-json": "*",
2425
"ext-mbstring": "*",
2526
"ext-pdo": "*",
@@ -55,6 +56,7 @@
5556
"nunomaduro/collision": "^7.0",
5657
"okvpn/clock-lts": "^1.0",
5758
"onelogin/php-saml": "^3.4",
59+
"onnov/detect-encoding": "^2.0",
5860
"osa-eg/laravel-teams-notification": "^2.1",
5961
"paragonie/constant_time_encoding": "^2.3",
6062
"paragonie/sodium_compat": "^1.19",

composer.lock

Lines changed: 66 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/Feature/Importing/Ui/ImportTest.php

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
namespace Tests\Feature\Importing\Ui;
44

55
use App\Models\User;
6+
use Illuminate\Http\UploadedFile;
7+
use PHPUnit\Framework\Attributes\Test;
68
use Tests\TestCase;
79

810
class ImportTest extends TestCase
@@ -13,4 +15,35 @@ public function testPageRenders()
1315
->get(route('imports.index'))
1416
->assertOk();
1517
}
18+
19+
public function testStoreInternationalAsset(): void
20+
{
21+
$evil_string = 'це'; //cyrillic - windows-1251 (ONE)
22+
$csv_contents = "a,b,c\n$evil_string,$evil_string,$evil_string\n";
23+
24+
// now, deliberately transform our UTF-8 into Windows-1251 so we can test out the character-set detection
25+
$transliterated_contents = iconv('UTF-8', 'WINDOWS-1251', $csv_contents);
26+
//\Log::error("RAW TRANSLITERATED CONTENTS: $transliterated_contents"); // should show 'unicode missing glyph' symbol in logs.
27+
28+
$this->actingAsForApi(User::factory()->superuser()->create());
29+
$results = $this->post(route('api.imports.store'), ['files' => [UploadedFile::fake()->createWithContent("myname.csv", $transliterated_contents)]])
30+
->assertOk()
31+
->assertJsonStructure([
32+
"files" => [
33+
[
34+
"created_at",
35+
"field_map",
36+
"file_path",
37+
"filesize",
38+
"first_row",
39+
"header_row",
40+
"id",
41+
"import_type",
42+
"name",
43+
]
44+
]
45+
]);
46+
\Log::error(print_r($results, true));
47+
$this->assertEquals($evil_string, $results->json()['files'][0]['first_row'][0]);
48+
}
1649
}

tests/Support/Importing/FileBuilder.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,17 +206,23 @@ public function toCsv(): array
206206
*
207207
* @return string The filename.
208208
*/
209-
public function saveToImportsDirectory(?string $filename = null): string
209+
public function saveToImportsDirectory(?string $filename = null, ?string $locale = null): string
210210
{
211211
$filename ??= Str::random(40) . '.csv';
212212

213213
try {
214214
$stream = fopen(config('app.private_uploads') . "/imports/{$filename}", 'w');
215215

216216
foreach ($this->toCsv() as $row) {
217+
if ($locale) {
218+
$newrow = [];
219+
foreach ($row as $index => $cell) {
220+
$newrow[$index] = iconv('utf-8', $locale, (string) $cell);
221+
}
222+
$row = $newrow;
223+
}
217224
fputcsv($stream, $row);
218225
}
219-
220226
return $filename;
221227
} finally {
222228
if (is_resource($stream)) {

0 commit comments

Comments
 (0)