Skip to content

Commit 784e813

Browse files
Change Windows newline detection to occur on all platforms, not just Unix. (#320)
1 parent 5d564b6 commit 784e813

8 files changed

Lines changed: 125 additions & 131 deletions

File tree

common/src/tsv_utils/common/utils.d

Lines changed: 73 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ $(LIST
4141
* [getTsvFieldValue] - A convenience function when only a single value is needed
4242
from an input line.
4343
44-
* [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
45-
Windows newlines in input.
44+
* [throwIfWindowsNewline] - A utility for detecting Windows newlines in input.
4645
)
4746
4847
Copyright (c) 2015-2020, eBay Inc.
@@ -1426,103 +1425,98 @@ if (isSomeChar!C)
14261425
assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
14271426
}
14281427

1429-
/** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1430-
* A Yes value indicates the Unix newline was already removed, as might be done via
1431-
* std.File.byLine or similar mechanism.
1432-
*/
1428+
/**
1429+
Yes|No.newlineWasRemoved is a template parameter to throwIfWindowsNewline. A Yes
1430+
value indicates the Unix newline was already removed, as might be done via
1431+
std.File.byLine or similar mechanism.
1432+
*/
14331433
alias NewlineWasRemoved = Flag!"newlineWasRemoved";
14341434

14351435
/**
1436-
throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1437-
line ending is found on a build compiled for a Unix platform. This is used by
1438-
the TSV Utilities to detect Window/DOS line endings and terminate processing
1439-
with an error message to the user.
1440-
*/
1441-
void throwIfWindowsNewlineOnUnix
1436+
throwIfWindowsLineNewline throws an exception if the 'line' argument ends with a
1437+
Windows/DOS line ending. This is used by TSV Utilities tools to detect Window/DOS
1438+
line endings and terminate processing with an error message to the user.
1439+
1440+
The 'nlWasRemoved' template parameter can be used if a Unix newline character was
1441+
already removed. In this case the CR character from a Windows CRLF remains and can be
1442+
detected. This is useful when reading files in binary mode, stripping Unix newlines.
1443+
*/
1444+
void throwIfWindowsNewline
14421445
(NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
14431446
(const char[] line, const char[] filename, size_t lineNum)
14441447
{
1445-
version(Posix)
1448+
static if (nlWasRemoved)
14461449
{
1447-
static if (nlWasRemoved)
1448-
{
1449-
immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1450-
}
1451-
else
1452-
{
1453-
immutable bool hasWindowsLineEnding =
1454-
line.length > 1 &&
1455-
line[$ - 2] == '\r' &&
1456-
line[$ - 1] == '\n';
1457-
}
1450+
immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1451+
}
1452+
else
1453+
{
1454+
immutable bool hasWindowsLineEnding =
1455+
line.length > 1 &&
1456+
line[$ - 2] == '\r' &&
1457+
line[$ - 1] == '\n';
1458+
}
14581459

1459-
if (hasWindowsLineEnding)
1460-
{
1461-
import std.format;
1462-
throw new Exception(
1463-
format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s",
1464-
(filename == "-") ? "Standard Input" : filename, lineNum));
1465-
}
1460+
if (hasWindowsLineEnding)
1461+
{
1462+
import std.format;
1463+
throw new Exception(
1464+
format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s",
1465+
(filename == "-") ? "Standard Input" : filename, lineNum));
14661466
}
14671467
}
14681468

1469-
// throwIfWindowsNewlineOnUnix
1469+
// throwIfWindowsNewline
14701470
@safe unittest
14711471
{
1472-
/* Note: Currently only building on Posix. Need to add non-Posix test cases
1473-
* if Windows builds are ever done.
1474-
*/
1475-
version(Posix)
1476-
{
1477-
import std.exception;
1472+
import std.exception;
14781473

1479-
assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1480-
assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1481-
assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1482-
assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1474+
assertNotThrown(throwIfWindowsNewline("", "afile.tsv", 1));
1475+
assertNotThrown(throwIfWindowsNewline("a", "afile.tsv", 2));
1476+
assertNotThrown(throwIfWindowsNewline("ab", "afile.tsv", 3));
1477+
assertNotThrown(throwIfWindowsNewline("abc", "afile.tsv", 4));
14831478

1484-
assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1485-
assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1486-
assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1487-
assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1479+
assertThrown(throwIfWindowsNewline("\r", "afile.tsv", 1));
1480+
assertThrown(throwIfWindowsNewline("a\r", "afile.tsv", 2));
1481+
assertThrown(throwIfWindowsNewline("ab\r", "afile.tsv", 3));
1482+
assertThrown(throwIfWindowsNewline("abc\r", "afile.tsv", 4));
14881483

1489-
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1490-
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1491-
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1492-
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1484+
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1485+
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1486+
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1487+
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
14931488

1494-
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1495-
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1496-
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1497-
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1489+
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1490+
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1491+
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1492+
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
14981493

1499-
/* Standard Input formatting. */
1500-
import std.algorithm : endsWith;
1501-
bool exceptionCaught = false;
1494+
/* Standard Input formatting. */
1495+
import std.algorithm : endsWith;
1496+
bool exceptionCaught = false;
15021497

1503-
try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1504-
catch (Exception e)
1505-
{
1506-
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1507-
exceptionCaught = true;
1508-
}
1509-
finally
1510-
{
1511-
assert(exceptionCaught);
1512-
exceptionCaught = false;
1513-
}
1498+
try (throwIfWindowsNewline("\r", "-", 99));
1499+
catch (Exception e)
1500+
{
1501+
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1502+
exceptionCaught = true;
1503+
}
1504+
finally
1505+
{
1506+
assert(exceptionCaught);
1507+
exceptionCaught = false;
1508+
}
15141509

1515-
try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1516-
catch (Exception e)
1517-
{
1518-
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1519-
exceptionCaught = true;
1520-
}
1521-
finally
1522-
{
1523-
assert(exceptionCaught);
1524-
exceptionCaught = false;
1525-
}
1510+
try (throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "-", 99));
1511+
catch (Exception e)
1512+
{
1513+
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1514+
exceptionCaught = true;
1515+
}
1516+
finally
1517+
{
1518+
assert(exceptionCaught);
1519+
exceptionCaught = false;
15261520
}
15271521
}
15281522

tsv-filter/src/tsv_utils/tsv-filter.d

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ struct TsvFilterOptions
753753
import std.getopt;
754754
import std.path : baseName, stripExtension;
755755
import tsv_utils.common.getopt_inorder;
756-
import tsv_utils.common.utils : throwIfWindowsNewlineOnUnix;
756+
import tsv_utils.common.utils : throwIfWindowsNewline;
757757

758758
bool helpVerbose = false; // --help-verbose
759759
bool helpOptions = false; // --help-options
@@ -974,7 +974,7 @@ struct TsvFilterOptions
974974

975975
if (hasHeader)
976976
{
977-
throwIfWindowsNewlineOnUnix(inputSources.front.header, inputSources.front.name, 1);
977+
throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
978978
headerFields = inputSources.front.header.split(delim).to!(string[]);
979979
fieldListArgProcessing();
980980
}
@@ -995,7 +995,7 @@ void tsvFilter(ref TsvFilterOptions cmdopt)
995995
import std.algorithm : all, any, splitter;
996996
import std.range;
997997
import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, InputSourceRange,
998-
throwIfWindowsNewlineOnUnix;
998+
throwIfWindowsNewline;
999999

10001000
/* inputSources must be an InputSourceRange and include at least stdin. */
10011001
assert(!cmdopt.inputSources.empty);
@@ -1028,11 +1028,11 @@ void tsvFilter(ref TsvFilterOptions cmdopt)
10281028

10291029
foreach (inputStream; cmdopt.inputSources)
10301030
{
1031-
if (cmdopt.hasHeader) throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
1031+
if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);
10321032

10331033
foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
10341034
{
1035-
if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, inputStream.name, lineNum);
1035+
if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);
10361036

10371037
/* Copy the needed number of fields to the fields array. */
10381038
int fieldIndex = -1;

tsv-join/src/tsv_utils/tsv-join.d

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ struct TsvJoinOptions
121121
import std.path : baseName, stripExtension;
122122
import std.typecons : Yes, No;
123123
import tsv_utils.common.fieldlist;
124-
import tsv_utils.common.utils : throwIfWindowsNewlineOnUnix;
124+
import tsv_utils.common.utils : throwIfWindowsNewline;
125125

126126
bool helpVerbose = false; // --help-verbose
127127
bool helpFields = false; // --help-fields
@@ -356,10 +356,10 @@ struct TsvJoinOptions
356356
{
357357
if (!filterSource.front.byLine.empty)
358358
{
359-
throwIfWindowsNewlineOnUnix(filterSource.front.byLine.front, filterSource.front.name, 1);
359+
throwIfWindowsNewline(filterSource.front.byLine.front, filterSource.front.name, 1);
360360
filterFileHeaderFields = filterSource.front.byLine.front.split(delim).to!(string[]);
361361
}
362-
throwIfWindowsNewlineOnUnix(inputSources.front.header, inputSources.front.name, 1);
362+
throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
363363
inputSourceHeaderFields = inputSources.front.header.split(delim).to!(string[]);
364364
fieldListArgProcessing();
365365
}
@@ -403,7 +403,7 @@ int main(string[] cmdArgs)
403403
void tsvJoin(ref TsvJoinOptions cmdopt)
404404
{
405405
import tsv_utils.common.utils : ByLineSourceRange, bufferedByLine, BufferedOutputRange,
406-
isFlushableOutputRange, InputFieldReordering, InputSourceRange, throwIfWindowsNewlineOnUnix;
406+
isFlushableOutputRange, InputFieldReordering, InputSourceRange, throwIfWindowsNewline;
407407
import std.algorithm : splitter;
408408
import std.array : join;
409409
import std.range;
@@ -511,7 +511,7 @@ void tsvJoin(ref TsvJoinOptions cmdopt)
511511

512512
debug writeln(" --> [key]:[append] => [", key, "]:[", appendValues, "]");
513513

514-
if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, filterStream.name, lineNum);
514+
if (lineNum == 1) throwIfWindowsNewline(line, filterStream.name, lineNum);
515515

516516
if (lineNum == 1 && cmdopt.hasHeader)
517517
{
@@ -576,13 +576,13 @@ void tsvJoin(ref TsvJoinOptions cmdopt)
576576

577577
foreach (inputStream; cmdopt.inputSources)
578578
{
579-
if (cmdopt.hasHeader) throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
579+
if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);
580580

581581
foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
582582
{
583583
debug writeln("[input line] |", line, "|");
584584

585-
if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, inputStream.name, lineNum);
585+
if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);
586586

587587
/*
588588
* Next block checks if the input line matches a hash entry. Two cases:

0 commit comments

Comments
 (0)