Skip to content

Commit 683b5bd

Browse files
authored
Ensure special characters in manifest filepaths are urlencoded (#35)
* Ensure filepaths are encoded/decoded according to the spec * Add allow-plugins config * Switch host OS for PHP 7.3 * Finalize checks for bag filepaths * Don't test for newlines in fetch files or manifest.
1 parent 978b538 commit 683b5bd

22 files changed

+335
-57
lines changed

.github/workflows/build.yml

+9-4
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,21 @@ on:
1010

1111
jobs:
1212
build:
13-
runs-on: ubuntu-latest
13+
runs-on: ${{ matrix.host-os }}
1414
continue-on-error: ${{ matrix.experimental }}
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
php-versions: ["7.3", "7.4"]
18+
php-versions: ["7.4", "8.0"]
1919
experimental: [false]
20+
host-os: ["ubuntu-latest"]
2021
include:
21-
- php-versions: "8.0"
22+
- php-versions: "7.3"
23+
experimental: false
24+
host-os: "ubuntu-18.04"
25+
- php-versions: "8.1"
2226
experimental: true
27+
host-os: "ubuntu-latest"
2328

2429
name: PHP ${{ matrix.php-versions }}
2530
steps:
@@ -30,7 +35,7 @@ jobs:
3035
uses: shivammathur/setup-php@v2
3136
with:
3237
php-version: ${{ matrix.php-versions }}
33-
tools: phpunit
38+
coverage: none
3439

3540
- name: Get composer cache directory
3641
id: composercache

composer.json

+5
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,10 @@
4646
"@check",
4747
"phpdbg -qrr ./vendor/bin/phpunit"
4848
]
49+
},
50+
"config": {
51+
"allow-plugins": {
52+
"symfony/flex": false
53+
}
4954
}
5055
}

src/AbstractManifest.php

+26-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
namespace whikloj\BagItTools;
44

5-
use whikloj\BagItTools\Exceptions\BagItException;
65
use whikloj\BagItTools\Exceptions\FilesystemException;
76

87
/**
@@ -247,9 +246,7 @@ protected function loadFile()
247246
if (preg_match("~^(\w+)\s+\*?(.*)$~", $line, $matches)) {
248247
$hash = $matches[1];
249248
$originalPath = $matches[2];
250-
if (substr($originalPath, 0, 2) == "./") {
251-
$this->addLoadWarning("Line {$lineCount} : Paths SHOULD not be relative");
252-
}
249+
$this->checkIncomingFilePath($originalPath, $lineCount);
253250
$path = $this->cleanUpRelPath($originalPath);
254251
// Normalized path in lowercase (for matching)
255252
$lowerNormalized = $this->normalizePath($path);
@@ -263,6 +260,8 @@ protected function loadFile()
263260
$this->hashes[$path] = $hash;
264261
$this->addToNormalizedList($lowerNormalized);
265262
}
263+
} else {
264+
$this->addLoadError("Line $lineCount : Line is not of the form 'checksum path'");
266265
}
267266
}
268267
fclose($fp);
@@ -286,13 +285,35 @@ protected function writeToDisk()
286285
throw new FilesystemException("Unable to write {$fullPath}");
287286
}
288287
foreach ($this->hashes as $path => $hash) {
288+
$path = BagUtils::encodeFilepath($path);
289289
$line = "{$hash} {$path}" . PHP_EOL;
290290
$line = $this->bag->encodeText($line);
291291
BagUtils::checkedFwrite($fp, $line);
292292
}
293293
fclose($fp);
294294
}
295295

296+
/**
297+
* Does validation on incoming file paths.
298+
*
299+
* @param string $filepath
300+
* The file path to be checked.
301+
* @param int $lineCount
302+
* The line of the manifest we are currently checking.
303+
*/
304+
private function checkIncomingFilePath(string $filepath, int $lineCount): void
305+
{
306+
if (substr($filepath, 0, 2) == "./") {
307+
$this->addLoadWarning("Line $lineCount : Paths SHOULD not be relative");
308+
}
309+
if (BagUtils::checkUnencodedFilepath($filepath)) {
310+
$this->addLoadError(
311+
"Line $lineCount: File paths containing Line Feed (LF), Carriage Return (CR) or a percent sign (%) " .
312+
"MUST be encoded, and only those characters can be encoded."
313+
);
314+
}
315+
}
316+
296317
/**
297318
* Calculate the hash of the file.
298319
*
@@ -423,6 +444,7 @@ private function cleanUpRelPath($filepath) : string
423444
{
424445
$filepath = $this->bag->makeAbsolute($filepath);
425446
$filepath = $this->cleanUpAbsPath($filepath);
447+
$filepath = BagUtils::decodeFilepath($filepath);
426448
return $this->bag->makeRelative($filepath);
427449
}
428450

src/Bag.php

+1-9
Original file line numberDiff line numberDiff line change
@@ -866,18 +866,10 @@ public function setAlgorithm($algorithm)
866866
*/
867867
public function addFetchFile($url, $destination, $size = null)
868868
{
869-
$fetchData = [
870-
'uri' => $url,
871-
'destination' => $destination,
872-
];
873-
if (!is_null($size) && is_int($size)) {
874-
$fetchData['size'] = $size;
875-
}
876869
if (!isset($this->fetchFile)) {
877870
$this->fetchFile = new Fetch($this, false);
878871
}
879-
// Download the file now to help with manifest handling, deleted when you package() or finalize().
880-
$this->fetchFile->download($fetchData);
872+
$this->fetchFile->addFile($url, $destination, $size);
881873
$this->changed = true;
882874
}
883875

src/BagUtils.php

+61
Original file line numberDiff line numberDiff line change
@@ -353,4 +353,65 @@ public static function checkedFwrite($fp, $content)
353353
throw new FilesystemException("Error writing to file");
354354
}
355355
}
356+
357+
/**
358+
* Decode a file path according to the special rules of the spec.
359+
*
360+
* RFC 8943 - sections 2.1.3 & 2.2.3
361+
* If _filename_ includes an LF, a CR, a CRLF, or a percent sign (%), those characters (and only those) MUST be
362+
* percent-encoded as described in [RFC3986].
363+
*
364+
* @param string $line
365+
* The original filepath from the manifest file.
366+
* @return string
367+
* The filepath with the special characters decoded.
368+
*/
369+
public static function decodeFilepath(string $line): string
370+
{
371+
// Strip newlines from the right.
372+
$decoded = rtrim($line, "\r\n");
373+
return str_replace(
374+
["%0A", "%0D", "%25"],
375+
["\n", "\r", "%"],
376+
$decoded
377+
);
378+
}
379+
380+
/**
381+
* Encode a file path according to the special rules of the spec.
382+
*
383+
* RFC 8943 - sections 2.1.3 & 2.2.3
384+
* If _filename_ includes an LF, a CR, a CRLF, or a percent sign (%), those characters (and only those) MUST be
385+
* percent-encoded as described in [RFC3986].
386+
*
387+
* @param string $line
388+
* The original file path.
389+
* @return string
390+
* The file path with the special manifest characters encoded.
391+
*/
392+
public static function encodeFilepath(string $line): string
393+
{
394+
// Strip newlines from the right.
395+
$encoded = rtrim($line, "\r\n");
396+
return str_replace(
397+
["%", "\n", "\r"],
398+
["%25", "%0A", "%0D"],
399+
$encoded
400+
);
401+
}
402+
403+
/**
404+
* Check for unencoded newlines, carriage returns or % symbols in a file path.
405+
*
406+
* @param string $filepath
407+
* The file path to check
408+
* @return bool
409+
* True if there are un-encoded characters
410+
* @see \whikloj\BagItTools\BagUtils::encodeFilepath()
411+
* @see \whikloj\BagItTools\BagUtils::decodeFilepath()
412+
*/
413+
public static function checkUnencodedFilepath(string $filepath): bool
414+
{
415+
return preg_match_all("/%(?!(25|0A|0D))/", $filepath);
416+
}
356417
}

src/Fetch.php

+38-30
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ public function getData() : array
112112
*/
113113
public function downloadAll()
114114
{
115+
if (count($this->getErrors()) > 0) {
116+
throw new BagItException("The fetch.txt file has errors, unable to download files.");
117+
}
115118
$this->resetErrors();
116119
$this->downloadQueue = [];
117120
foreach ($this->files as $file) {
@@ -146,15 +149,36 @@ private function validateData(array $fetchData)
146149
if (!$this->internalValidateUrl($uri)) {
147150
throw new BagItException("This library only supports http/https URLs");
148151
}
149-
if (!$this->validatePath($dest)) {
150-
// Skip destinations with %xx other than %0A, %0D and %25
151-
throw new BagItException("Destination paths can't have any percent encoded characters except CR, LF, & %");
152-
}
153152
if (!$this->bag->pathInBagData($dest)) {
154153
throw new BagItException("Path {$dest} resolves outside the bag.");
155154
}
156155
}
157156

157+
/**
158+
* Add a file to this fetch file.
159+
*
160+
* @param string $url
161+
* The remote URL for the file.
162+
* @param string $destination
163+
* The bag destination path for the file.
164+
* @param int|null $size
165+
* The expected size of the file, or null for unknown.
166+
* @throws \whikloj\BagItTools\Exceptions\BagItException
167+
* Errors with adding this file to your fetch file.
168+
*/
169+
public function addFile(string $url, string $destination, int $size = null): void
170+
{
171+
$fetchData = [
172+
'uri' => $url,
173+
'destination' => $destination,
174+
];
175+
if (is_int($size)) {
176+
$fetchData['size'] = $size;
177+
}
178+
// Download the file now to help with manifest handling, deleted when you package() or finalize().
179+
$this->download($fetchData);
180+
}
181+
158182
/**
159183
* Download a single file as it is added to the fetch file so we can generate checksums.
160184
*
@@ -327,13 +351,20 @@ private function loadFiles()
327351
$filesize = (int)$filesize;
328352
}
329353
$destination = BagUtils::baseInData($matches[3]);
354+
if (BagUtils::checkUnencodedFilepath($destination)) {
355+
$this->addError(
356+
"Line $lineCount: Filepaths containing Line Feed (LF), Carriage Return (CR) or a " .
357+
"percent sign (%) MUST be encoded, and only those characters can be encoded."
358+
);
359+
}
360+
$destination = BagUtils::decodeFilepath($destination);
330361
$this->files[] = [
331362
'uri' => $uri,
332363
'size' => $filesize,
333364
'destination' => $destination,
334365
];
335366
} else {
336-
$this->addError("Line {$lineCount} : This line is not valid.");
367+
$this->addError("Line $lineCount : This line is not valid.");
337368
}
338369
}
339370
}
@@ -513,7 +544,8 @@ private function writeToDisk()
513544
throw new FilesystemException("Unable to write {$this->filename}");
514545
}
515546
foreach ($this->files as $fileData) {
516-
$line = "{$fileData['uri']} {$fileData['size']} {$fileData['destination']}" . PHP_EOL;
547+
$destination = BagUtils::encodeFilepath($fileData['destination']);
548+
$line = "{$fileData['uri']} {$fileData['size']} $destination" . PHP_EOL;
517549
$line = $this->bag->encodeText($line);
518550
BagUtils::checkedFwrite($fp, $line);
519551
}
@@ -555,30 +587,6 @@ private function internalValidateUrl($url) : bool
555587
return true;
556588
}
557589

558-
/**
559-
* Validate the path for fetch files.
560-
*
561-
* @param string $dest
562-
* The destination file path.
563-
* @return bool
564-
* True if it is valid.
565-
*/
566-
private function validatePath($dest) : bool
567-
{
568-
// You can't have any encoded characters in the destination string except LF, CR, CRLF and % itself.
569-
if (strpos($dest, '%') !== false) {
570-
$parts = explode('%', $dest);
571-
foreach ($parts as $part) {
572-
$char = substr($part, 0, 2);
573-
$char = strtolower($char);
574-
if (!($char == '0a' || $char == '0d' || $char == '25')) {
575-
return false;
576-
}
577-
}
578-
}
579-
return true;
580-
}
581-
582590
/**
583591
* Check if the url is already in the file.
584592
*

tests/BagInternalTest.php

+31
Original file line numberDiff line numberDiff line change
@@ -267,4 +267,35 @@ public function testArrayKeyExistsNoCase()
267267
['BOO', 'name', $test_array]
268268
));
269269
}
270+
271+
/**
272+
* @covers \whikloj\BagItTools\AbstractManifest::checkIncomingFilePath
273+
*/
274+
public function testCheckFilePathEncoding(): void
275+
{
276+
$bag = Bag::create($this->tmpdir);
277+
$payload = $bag->getPayloadManifests()['sha512'];
278+
$class = new \ReflectionClass('whikloj\BagItTools\PayloadManifest');
279+
$methodCall = $class->getMethod('checkIncomingFilePath');
280+
$methodCall->setAccessible(true);
281+
$loadIssues = $class->getProperty('loadIssues');
282+
$loadIssues->setAccessible(true);
283+
// Initially no errors
284+
$this->assertCount(0, $loadIssues->getValue($payload)['error']);
285+
// unencoded % symbol
286+
$methodCall->invokeArgs($payload, ["fail-for-%-filename.txt", 1]);
287+
$this->assertCount(1, $loadIssues->getValue($payload)['error']);
288+
// Urlencoded character that is not %25, %0A or %0D
289+
$methodCall->invokeArgs($payload, ["fail-for-%2F-filename.txt", 1]);
290+
$this->assertCount(2, $loadIssues->getValue($payload)['error']);
291+
// No issue with encoded %
292+
$methodCall->invokeArgs($payload, ["succeed-for-%25-filename.txt", 1]);
293+
$this->assertCount(2, $loadIssues->getValue($payload)['error']);
294+
// No issue for encoded line feed
295+
$methodCall->invokeArgs($payload, ["succeed-for-%0A-filename.txt", 1]);
296+
$this->assertCount(2, $loadIssues->getValue($payload)['error']);
297+
// No issue for encoded carriage return
298+
$methodCall->invokeArgs($payload, ["succeed-for-%0D-filename.txt", 1]);
299+
$this->assertCount(2, $loadIssues->getValue($payload)['error']);
300+
}
270301
}

tests/BagUtilsTest.php

+16-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public function testGetAllFiles()
110110
$this->assertCount(2, $files);
111111

112112
$files = BagUtils::getAllFiles(self::TEST_RESOURCES . DIRECTORY_SEPARATOR . 'fetchFiles');
113-
$this->assertCount(4, $files);
113+
$this->assertCount(5, $files);
114114

115115
$files = BagUtils::getAllFiles(self::TEST_EXTENDED_BAG_DIR);
116116
$this->assertCount(7, $files);
@@ -211,4 +211,19 @@ public function testCheckedFwrite()
211211
// Write to the file.
212212
BagUtils::checkedFwrite($fp, "Some example text");
213213
}
214+
215+
/**
216+
* @covers ::checkUnencodedFilepath
217+
*/
218+
public function testCheckUnencodedFilepath(): void
219+
{
220+
$this->assertTrue(BagUtils::checkUnencodedFilepath("some/path/with%2ffake/slashes"));
221+
$this->assertTrue(BagUtils::checkUnencodedFilepath("some/path/with%2Ffake/slashes"));
222+
$this->assertFalse(BagUtils::checkUnencodedFilepath("some/path/with%252ffake/slashes"));
223+
$this->assertFalse(BagUtils::checkUnencodedFilepath("some/path/with%252Ffake/slashes"));
224+
$this->assertFalse(BagUtils::checkUnencodedFilepath("some/path/with%0Aencoded/newlines"));
225+
$this->assertFalse(BagUtils::checkUnencodedFilepath("some/path/with%0Dencoded/carriage/returns"));
226+
$this->assertTrue(BagUtils::checkUnencodedFilepath("some/path/with%22encoded/quotes"));
227+
$this->assertFalse(BagUtils::checkUnencodedFilepath("some/path/with/nothing"));
228+
}
214229
}

0 commit comments

Comments
 (0)