Skip to content

Commit 57eb89e

Browse files
authored
Split BagIt spec files to allow for CR, LF or CRLF line endings (#37)
1 parent 683b5bd commit 57eb89e

13 files changed

+180
-21
lines changed

src/AbstractManifest.php

+4-5
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,14 @@ protected function loadFile()
230230
$this->resetLoadIssues();
231231
$fullPath = $this->bag->makeAbsolute($this->filename);
232232
if (file_exists($fullPath)) {
233-
$fp = fopen($fullPath, "rb");
234-
if ($fp === false) {
233+
$file_contents = file_get_contents($fullPath);
234+
if ($file_contents === false) {
235235
throw new FilesystemException("Unable to read file {$fullPath}");
236236
}
237237
$lineCount = 0;
238-
while (!feof($fp)) {
238+
$lines = BagUtils::splitFileDataOnLineEndings($file_contents);
239+
foreach ($lines as $line) {
239240
$lineCount += 1;
240-
$line = fgets($fp);
241241
$line = $this->bag->decodeText($line);
242242
$line = trim($line);
243243
if (empty($line)) {
@@ -264,7 +264,6 @@ protected function loadFile()
264264
$this->addLoadError("Line $lineCount : Line is not of the form 'checksum path'");
265265
}
266266
}
267-
fclose($fp);
268267
}
269268
}
270269

src/Bag.php

+9-10
Original file line numberDiff line numberDiff line change
@@ -1214,19 +1214,19 @@ private function loadBagInfo()
12141214
$info_file = 'bag-info.txt';
12151215
$fullPath = $this->makeAbsolute($info_file);
12161216
if (file_exists($fullPath)) {
1217-
$fp = fopen($fullPath, 'rb');
1218-
if ($fp === false) {
1217+
$file_contents = file_get_contents($fullPath);
1218+
if ($file_contents === false) {
12191219
throw new FilesystemException("Unable to access {$info_file}");
12201220
}
12211221
$bagData = [];
12221222
$lineCount = 0;
1223-
while (!feof($fp)) {
1224-
$line = fgets($fp);
1223+
$lines = BagUtils::splitFileDataOnLineEndings($file_contents);
1224+
foreach ($lines as $line) {
12251225
$lineCount += 1;
12261226
if (trim($line) == "") {
12271227
continue;
12281228
}
1229-
$line = $this->decodeText($line);
1229+
$line = $this->decodeText($line) . PHP_EOL;
12301230
if (!empty($line)) {
12311231
$lineLength = strlen($line);
12321232
if (substr($line, 0, 2) == " " || $line[0] == "\t") {
@@ -1243,7 +1243,7 @@ private function loadBagInfo()
12431243
}
12441244
$bagData[count($bagData)-1]['value'] = $previousValue;
12451245
}
1246-
} elseif (preg_match("~^(\s+)?([^:]+?)(\s+)?:(.*)([\r\n]+)~", $line, $matches)) {
1246+
} elseif (preg_match("~^(\s+)?([^:]+?)(\s+)?:(.*)~", $line, $matches)) {
12471247
// First line
12481248
$current_tag = $matches[2];
12491249
if ($this->mustNotRepeatBagInfoExists($current_tag)) {
@@ -1268,19 +1268,18 @@ private function loadBagInfo()
12681268
$value = $matches[4];
12691269
} else {
12701270
// Shorter line, save the newline.
1271-
$value = $matches[4] . $matches[5];
1271+
$value = $matches[4] . PHP_EOL;
12721272
}
12731273
$bagData[] = [
12741274
'tag' => $current_tag,
12751275
// Newline is removed by preg_match, re-add it here.
12761276
'value' => Bag::trimSpacesOnly($value),
12771277
];
1278-
} elseif (!empty($line)) {
1278+
} else {
12791279
$this->addBagError($info_file, "Invalid tag.");
12801280
}
12811281
}
12821282
}
1283-
fclose($fp);
12841283
// We left newlines on the end of each tag value, those can be stripped.
12851284
array_walk($bagData, function (&$item) {
12861285
$item['value'] = rtrim($item['value']);
@@ -1765,7 +1764,7 @@ private function loadBagIt()
17651764
if ($contents === false) {
17661765
throw new FilesystemException("Unable to read {$fullPath}");
17671766
}
1768-
$lines = explode(PHP_EOL, $contents);
1767+
$lines = BagUtils::splitFileDataOnLineEndings($contents);
17691768
// remove blank lines.
17701769
$lines = array_filter($lines);
17711770
array_walk(

src/BagUtils.php

+13
Original file line numberDiff line numberDiff line change
@@ -414,4 +414,17 @@ public static function checkUnencodedFilepath(string $filepath): bool
414414
{
415415
return preg_match_all("/%(?!(25|0A|0D))/", $filepath);
416416
}
417+
418+
/**
419+
* Split the file data on any of the allowed line endings.
420+
*
421+
* @param string $data
422+
* The file data as a single string.
423+
* @return array
424+
* Array split on \r\n, \r, and \n
425+
*/
426+
public static function splitFileDataOnLineEndings(string $data): array
427+
{
428+
return preg_split("/(\r\n|\r|\n)/", $data);
429+
}
417430
}

src/Fetch.php

+5-5
Original file line numberDiff line numberDiff line change
@@ -330,14 +330,14 @@ private function loadFiles()
330330
{
331331
$this->resetErrors();
332332
if (file_exists($this->filename)) {
333-
$fp = fopen($this->filename, "rb");
334-
if ($fp === false) {
333+
$file_contents = file_get_contents($this->filename);
334+
if ($file_contents === false) {
335335
throw new FilesystemException("Unable to read file {$this->filename}");
336336
}
337337
$lineCount = 0;
338-
while (!feof($fp)) {
338+
$lines = BagUtils::splitFileDataOnLineEndings($file_contents);
339+
foreach ($lines as $line) {
339340
$lineCount += 1;
340-
$line = fgets($fp);
341341
$line = $this->bag->decodeText($line);
342342
$line = trim($line);
343343
if (empty($line)) {
@@ -364,7 +364,7 @@ private function loadFiles()
364364
'destination' => $destination,
365365
];
366366
} else {
367-
$this->addError("Line $lineCount : This line is not valid.");
367+
$this->addError("Line $lineCount: This line is not valid.");
368368
}
369369
}
370370
}

tests/BagUtilsTest.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public function testGetAllFiles()
110110
$this->assertCount(2, $files);
111111

112112
$files = BagUtils::getAllFiles(self::TEST_RESOURCES . DIRECTORY_SEPARATOR . 'fetchFiles');
113-
$this->assertCount(5, $files);
113+
$this->assertCount(8, $files);
114114

115115
$files = BagUtils::getAllFiles(self::TEST_EXTENDED_BAG_DIR);
116116
$this->assertCount(7, $files);

tests/ExtendedBagTest.php

+85
Original file line numberDiff line numberDiff line change
@@ -533,4 +533,89 @@ public function testLoadWrappedLines()
533533
$this->assertEquals("This is some crazy information about a new way of searching for : the stuff. " .
534534
"Why do this? Because we can.", $testbag2->getBagInfoByTag('External-Description')[0]);
535535
}
536+
537+
/**
538+
* Repeat the reading of a bag but with CR instead of LF endings.
539+
*
540+
* @covers \whikloj\BagItTools\AbstractManifest::loadFile
541+
*
542+
* @see \whikloj\BagItTools\Test\ExtendedBagTest::testLoadExtendedBag()
543+
*/
544+
public function testLoadExtendedCRLineEndings(): void
545+
{
546+
$this->tmpdir = $this->prepareExtendedTestBag();
547+
$this->switchLineEndingsTo("\r");
548+
549+
$bag = Bag::load($this->tmpdir);
550+
$this->assertTrue($bag->isExtended());
551+
$payloads = $bag->getPayloadManifests();
552+
$tags = $bag->getTagManifests();
553+
$this->assertCount(1, $payloads);
554+
$this->assertCount(1, $tags);
555+
$this->assertArrayHasKey('sha1', $payloads);
556+
$this->assertArrayHasKey('sha1', $tags);
557+
$this->assertCount(2, $payloads['sha1']->getHashes());
558+
$this->assertCount(4, $tags['sha1']->getHashes());
559+
$this->assertArrayHasKey('bagit.txt', $tags['sha1']->getHashes());
560+
$this->assertArrayHasKey('bag-info.txt', $tags['sha1']->getHashes());
561+
$this->assertArrayHasKey('manifest-sha1.txt', $tags['sha1']->getHashes());
562+
$this->assertArrayHasKey('alt_tags/random_tags.txt', $tags['sha1']->getHashes());
563+
$this->assertTrue($bag->hasBagInfoTag('contact-phone'));
564+
$this->assertFalse($bag->hasBagInfoTag('payload-oxum'));
565+
$this->assertFalse($bag->hasBagInfoTag('bag-size'));
566+
}
567+
568+
/**
569+
* Repeat the reading of a bag but with CRLF instead of just LF endings.
570+
*
571+
* @covers \whikloj\BagItTools\AbstractManifest::loadFile
572+
*
573+
* @see \whikloj\BagItTools\Test\ExtendedBagTest::testLoadExtendedBag()
574+
*/
575+
public function testLoadExtendedCRLFLineEndings(): void
576+
{
577+
$this->tmpdir = $this->prepareExtendedTestBag();
578+
$this->switchLineEndingsTo("\r\n");
579+
580+
$bag = Bag::load($this->tmpdir);
581+
$this->assertTrue($bag->isExtended());
582+
$payloads = $bag->getPayloadManifests();
583+
$tags = $bag->getTagManifests();
584+
$this->assertCount(1, $payloads);
585+
$this->assertCount(1, $tags);
586+
$this->assertArrayHasKey('sha1', $payloads);
587+
$this->assertArrayHasKey('sha1', $tags);
588+
$this->assertCount(2, $payloads['sha1']->getHashes());
589+
$this->assertCount(4, $tags['sha1']->getHashes());
590+
$this->assertArrayHasKey('bagit.txt', $tags['sha1']->getHashes());
591+
$this->assertArrayHasKey('bag-info.txt', $tags['sha1']->getHashes());
592+
$this->assertArrayHasKey('manifest-sha1.txt', $tags['sha1']->getHashes());
593+
$this->assertArrayHasKey('alt_tags/random_tags.txt', $tags['sha1']->getHashes());
594+
$this->assertTrue($bag->hasBagInfoTag('contact-phone'));
595+
$this->assertFalse($bag->hasBagInfoTag('payload-oxum'));
596+
$this->assertFalse($bag->hasBagInfoTag('bag-size'));
597+
}
598+
599+
/**
600+
* Switch the line endings of the test extended bag from \r to
601+
*
602+
* @param string $newEnding
603+
* What to switch the line endings to.
604+
*/
605+
private function switchLineEndingsTo(string $newEnding): void
606+
{
607+
$files = [
608+
"bagit.txt",
609+
"bag-info.txt",
610+
"manifest-sha1.txt",
611+
"tagmanifest-sha1.txt",
612+
];
613+
foreach ($files as $file) {
614+
$path = $this->tmpdir . DIRECTORY_SEPARATOR . $file;
615+
file_put_contents(
616+
$path,
617+
str_replace("\n", $newEnding, file_get_contents($path))
618+
);
619+
}
620+
}
536621
}

tests/FetchTest.php

+30
Original file line numberDiff line numberDiff line change
@@ -713,4 +713,34 @@ public function testCreateFetchWithEncodedCharacters(): void
713713
$manifest = $bag->getPayloadManifests()['sha512'];
714714
$this->assertArrayEquals($expected_in_memory, array_keys($manifest->getHashes()));
715715
}
716+
717+
public function testReadCRLineEndings(): void
718+
{
719+
$fetch = $this->setupBag('fetch-CR.txt');
720+
$this->assertCount(0, $fetch->getErrors());
721+
$cr_data = $fetch->getData();
722+
$this->assertCount(2, $cr_data);
723+
$this->assertTrue(in_array('http://example.org/some/file/1', array_column($cr_data, 'uri')));
724+
$this->assertTrue(in_array('http://example.org/some/file/2', array_column($cr_data, 'uri')));
725+
}
726+
727+
public function testReadLFLineEndings(): void
728+
{
729+
$fetch = $this->setupBag('fetch-LF.txt');
730+
$this->assertCount(0, $fetch->getErrors());
731+
$cr_data = $fetch->getData();
732+
$this->assertCount(2, $cr_data);
733+
$this->assertTrue(in_array('http://example.org/some/file/1', array_column($cr_data, 'uri')));
734+
$this->assertTrue(in_array('http://example.org/some/file/2', array_column($cr_data, 'uri')));
735+
}
736+
737+
public function testReadCRLFLineEndings(): void
738+
{
739+
$fetch = $this->setupBag('fetch-CRLF.txt');
740+
$this->assertCount(0, $fetch->getErrors());
741+
$cr_data = $fetch->getData();
742+
$this->assertCount(2, $cr_data);
743+
$this->assertTrue(in_array('http://example.org/some/file/1', array_column($cr_data, 'uri')));
744+
$this->assertTrue(in_array('http://example.org/some/file/2', array_column($cr_data, 'uri')));
745+
}
716746
}

tests/ManifestTest.php

+24
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,30 @@ public function testLoadBagWithUnencodedFilepaths(): void
224224
$this->assertCount(2, $payload->getHashes());
225225
}
226226

227+
/**
228+
* Ensure we properly load manifests with CR line endings only.
229+
*/
230+
public function testLoadBagWithCRLineEndings(): void
231+
{
232+
$this->prepareManifest("TestBag-manifest-CR-sha256.txt");
233+
$bag = Bag::load($this->tmpdir);
234+
$this->assertTrue($bag->validate());
235+
$payload = $bag->getPayloadManifests()['sha256'];
236+
$this->assertCount(3, $payload->getHashes());
237+
}
238+
239+
/**
240+
* Ensure we properly load manifests with CRLF line endings only.
241+
*/
242+
public function testLoadBagWithCRLFLineEndings(): void
243+
{
244+
$this->prepareManifest("TestBag-manifest-CRLF-sha256.txt");
245+
$bag = Bag::load($this->tmpdir);
246+
$this->assertTrue($bag->validate());
247+
$payload = $bag->getPayloadManifests()['sha256'];
248+
$this->assertCount(3, $payload->getHashes());
249+
}
250+
227251
/**
228252
* Utility to set a bag with a specific manifest file.
229253
* @param string $manifest_filename

tests/resources/fetchFiles/fetch-CR.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
http://example.org/some/file/1 - data/some-file.txthttp://example.org/some/file/2 - data/some-other-file.txt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
http://example.org/some/file/1 - data/some-file.txt
2+
http://example.org/some/file/2 - data/some-other-file.txt
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
http://example.org/some/file/1 - data/some-file.txt
2+
http://example.org/some/file/2 - data/some-other-file.txt

tests/resources/manifests/TestBag-manifest-CR-sha256.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3663a4f1d58f56248bf3708a27c1f143a9ec96ea5dbb78d627a4330b73f8b6db data/jekyll_and_hyde.txt0fd56c020e20bf86fd89b3357b30203c684411afca5c7aa98023f32f7536e936 data/pictures/another_picture.txt2c9d71a5db0a38b99b897f2397f7e252f451a7d219d044a47f22ffd3637e64e3 data/pictures/some_more_data.txt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
3663a4f1d58f56248bf3708a27c1f143a9ec96ea5dbb78d627a4330b73f8b6db data/jekyll_and_hyde.txt
2+
0fd56c020e20bf86fd89b3357b30203c684411afca5c7aa98023f32f7536e936 data/pictures/another_picture.txt
3+
2c9d71a5db0a38b99b897f2397f7e252f451a7d219d044a47f22ffd3637e64e3 data/pictures/some_more_data.txt

0 commit comments

Comments
 (0)