Skip to content

Commit a6b0168

Browse files
tsv-summarize: Fix edge cases in number formatting. (#28)
1 parent 2bdb87a commit a6b0168

1 file changed

Lines changed: 235 additions & 59 deletions

File tree

common/src/tsvutil.d

Lines changed: 235 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,7 @@ follows:
464464
- If the specified floatPrecision is between 0 and readablePrecisionMax, then floatPrecision
465465
is used to set the significant digits following the decimal point. Otherwise, it is used
466466
to set total significant digits. This does not apply to really large numbers, for doubles,
467-
those larger than 2^53. The effect is an alternate way to choose between '%e' and '%f'
468-
behavior than done by default by the '%g' format specifier.
467+
those larger than 2^53. Trailing zeros are chopped in all cases.
469468
*/
470469
import std.traits : isFloatingPoint, isIntegral, Unqual;
471470
auto formatNumber(T, size_t readablePrecisionMax = 6)(T num, const size_t floatPrecision = 12)
@@ -493,59 +492,76 @@ auto formatNumber(T, size_t readablePrecisionMax = 6)(T num, const size_t floatP
493492
{
494493
static assert(is(UT == float) || is(UT == double));
495494

496-
import std.math : fabs;
497-
import core.stdc.math : modf, modff;
498-
499-
static if (is(UT == float)) alias modfUT = modff;
500-
else static if (is(UT == double)) alias modfUT = modf;
501-
else static assert(0);
502-
503-
enum UT maxConsecutiveUTInteger = 2.0^^UT.mant_dig;
504-
enum bool maxUTIntFitsInLong = (maxConsecutiveUTInteger <= long.max);
505-
506-
immutable UT absNum = num.fabs;
507-
UT integerPart;
508-
509-
if (absNum < 1.0 || absNum > maxConsecutiveUTInteger)
510-
{
511-
/* No adjustments for very large, fractional only, or zero valued floats.
512-
* Note that zero prints like a decimal using %g, which is the intent.
513-
*/
514-
return format("%.*g", floatPrecision, num);
515-
}
516-
else if (maxUTIntFitsInLong && modfUT(num, &integerPart) == 0.0)
517-
{
518-
/* Integral value in the range of consecutive integers supported by the
519-
* floating point type. Print like a decimal number.
520-
* (Note: modfUT (modf/modff) returns the fractional part.)
521-
*/
522-
return format("%d", num.to!long);
523-
}
524-
else if (floatPrecision <= readablePrecisionMax && floatPrecision != 0)
495+
if (floatPrecision <= readablePrecisionMax)
525496
{
526-
/* Number with a fractional part, precision in the range normally used
527-
* for human readable numbers. Number may be large, but not huge. Adjust
528-
* the precision to be used as significant digits past decimal.
497+
/* Print with a fixed precision beyond the decimal point (%.*f), but
498+
* remove trailing zeros. Notes:
499+
* - This handles integer values stored in floating point types.
500+
* - Values like NaN and infinity also handled.
529501
*/
502+
auto str = format("%.*f", floatPrecision, num);
503+
size_t trimToLength = str.length;
530504

531-
size_t adjustedFloatPrecision = floatPrecision;
532-
UT x = absNum;
533-
while (x >= 1.0e+4)
505+
if (floatPrecision != 0 && str.length > floatPrecision + 1)
534506
{
535-
x /= 1.0e+4;
536-
adjustedFloatPrecision += 4;
507+
import std.ascii : isDigit;
508+
assert(str.length - floatPrecision - 1 > 0);
509+
size_t decimalIndex = str.length - floatPrecision - 1;
510+
511+
if (str[decimalIndex] == '.' && str[decimalIndex - 1].isDigit)
512+
{
513+
size_t lastNonZeroDigit = str.length - 1;
514+
assert(decimalIndex < lastNonZeroDigit);
515+
while (str[lastNonZeroDigit] == '0') lastNonZeroDigit--;
516+
trimToLength = (decimalIndex < lastNonZeroDigit)
517+
? lastNonZeroDigit + 1
518+
: decimalIndex;
519+
}
537520
}
538-
539-
if (x < 1.0e+1) adjustedFloatPrecision += 1;
540-
else if (x < 1.0e+2) adjustedFloatPrecision += 2;
541-
else if (x < 1.0e+3) adjustedFloatPrecision += 3;
542-
else adjustedFloatPrecision += 4;
543521

544-
return format("%.*g", adjustedFloatPrecision, num);
522+
return str[0 .. trimToLength];
545523
}
546524
else
547525
{
548-
return format("%.*g", floatPrecision, num);
526+
/* Determine if the number is subject to special integer value printing.
527+
* Goal is to avoid exponential notion for integer values that '%.*g'
528+
* generates. Numbers within the significant digit range of floatPrecision
529+
* will print as desired with '%.*g', whether there is a fractional part
530+
* or not. The '%.*g' format, with exponential notation, is also used for
531+
* really large numbers. "Really large" being numbers outside the range
532+
* of integers exactly representable by the floating point type.
533+
*/
534+
535+
enum UT maxConsecutiveUTInteger = 2.0^^UT.mant_dig;
536+
enum bool maxUTIntFitsInLong = (maxConsecutiveUTInteger <= long.max);
537+
538+
import std.math : fabs;
539+
immutable UT absNum = num.fabs;
540+
541+
if (!maxUTIntFitsInLong ||
542+
absNum < 10.0^^floatPrecision ||
543+
absNum > maxConsecutiveUTInteger)
544+
{
545+
/* Within signficant digits range or very large. */
546+
return format("%.*g", floatPrecision, num);
547+
}
548+
else
549+
{
550+
/* Check for integral values needing to be printed in decimal format.
551+
* modf/modff are used to determine if the value has a non-zero
552+
* fractional component.
553+
*/
554+
import core.stdc.math : modf, modff;
555+
556+
static if (is(UT == float)) alias modfUT = modff;
557+
else static if (is(UT == double)) alias modfUT = modf;
558+
else static assert(0);
559+
560+
UT integerPart;
561+
562+
if (modfUT(num, &integerPart) == 0.0) return format("%d", num.to!long);
563+
else return format("%.*g", floatPrecision, num);
564+
}
549565
}
550566
}
551567
}
@@ -557,7 +573,7 @@ unittest // formatNumber unit tests
557573
import std.conv;
558574
import std.format;
559575

560-
// Integer
576+
/* Integers */
561577
assert(formatNumber(0) == "0");
562578
assert(formatNumber(1) == "1");
563579
assert(formatNumber(-1) == "-1");
@@ -569,12 +585,32 @@ unittest // formatNumber unit tests
569585
const int a2 = -33234; assert(a2.formatNumber == "-33234");
570586
immutable long a3 = -12345678912345; assert(a3.formatNumber == "-12345678912345");
571587

572-
import std.stdio;
573-
574-
// Doubles
588+
// Specifying precision should never matter for integer values.
589+
assert(formatNumber(0, 0) == "0");
590+
assert(formatNumber(1, 0) == "1");
591+
assert(formatNumber(-1, 0) == "-1");
592+
assert(formatNumber(999, 0) == "999");
593+
assert(formatNumber(12345678912345, 0) == "12345678912345");
594+
assert(formatNumber(-12345678912345, 0) == "-12345678912345");
595+
596+
assert(formatNumber(0, 3) == "0");
597+
assert(formatNumber(1, 3) == "1");
598+
assert(formatNumber(-1, 3 ) == "-1");
599+
assert(formatNumber(999, 3) == "999");
600+
assert(formatNumber(12345678912345, 3) == "12345678912345");
601+
assert(formatNumber(-12345678912345, 3) == "-12345678912345");
602+
603+
assert(formatNumber(0, 9) == "0");
604+
assert(formatNumber(1, 9) == "1");
605+
assert(formatNumber(-1, 9 ) == "-1");
606+
assert(formatNumber(999, 9) == "999");
607+
assert(formatNumber(12345678912345, 9) == "12345678912345");
608+
assert(formatNumber(-12345678912345, 9) == "-12345678912345");
609+
610+
/* Doubles */
575611
assert(formatNumber(0.0) == "0");
576612
assert(formatNumber(0.2) == "0.2");
577-
assert(formatNumber(0.123412, 0) == "0.1");
613+
assert(formatNumber(0.123412, 0) == "0");
578614
assert(formatNumber(0.123412, 1) == "0.1");
579615
assert(formatNumber(0.123412, 2) == "0.12");
580616
assert(formatNumber(0.123412, 5) == "0.12341");
@@ -585,6 +621,11 @@ unittest // formatNumber unit tests
585621
assert(formatNumber(99.123412, 5) == "99.12341");
586622
assert(formatNumber(99.123412, 6) == "99.123412");
587623
assert(formatNumber(99.123412, 7) == "99.12341");
624+
assert(formatNumber(999.123412, 0) == "999");
625+
assert(formatNumber(999.123412, 1) == "999.1");
626+
assert(formatNumber(999.123412, 2) == "999.12");
627+
assert(formatNumber(999.123412, 3) == "999.123");
628+
assert(formatNumber(999.123412, 4) == "999.1234");
588629
assert(formatNumber(999.123412, 5) == "999.12341");
589630
assert(formatNumber(999.123412, 6) == "999.123412");
590631
assert(formatNumber(999.123412, 7) == "999.1234");
@@ -594,16 +635,78 @@ unittest // formatNumber unit tests
594635
assert(formatNumber(1234567891234.0, 0) == "1234567891234");
595636
assert(formatNumber(1234567891234.0, 1) == "1234567891234");
596637

638+
// Test round off cases
639+
assert(formatNumber(0.6, 0) == "1");
640+
assert(formatNumber(0.6, 1) == "0.6");
641+
assert(formatNumber(0.06, 0) == "0");
642+
assert(formatNumber(0.06, 1) == "0.1");
643+
assert(formatNumber(0.06, 2) == "0.06");
644+
assert(formatNumber(0.06, 3) == "0.06");
645+
assert(formatNumber(9.49999, 0) == "9");
646+
assert(formatNumber(9.49999, 1) == "9.5");
647+
assert(formatNumber(9.6, 0) == "10");
648+
assert(formatNumber(9.6, 1) == "9.6");
649+
assert(formatNumber(99.99, 0) == "100");
650+
assert(formatNumber(99.99, 1) == "100");
651+
assert(formatNumber(99.99, 2) == "99.99");
652+
assert(formatNumber(9999.9996, 3) == "10000");
653+
assert(formatNumber(9999.9996, 4) == "9999.9996");
654+
assert(formatNumber(99999.99996, 4) == "100000");
655+
assert(formatNumber(99999.99996, 5) == "99999.99996");
656+
assert(formatNumber(999999.999996, 5) == "1000000");
657+
assert(formatNumber(999999.999996, 6) == "999999.999996");
658+
659+
/* Turn off precision, the 'human readable' style.
660+
* Note: Remains o if both are zero (first test). If it becomes desirable to support
661+
* turning it off when for the precision equal zero case the simple extension is to
662+
* allow the 'human readable' precision template parameter to be negative.
663+
*/
664+
assert(formatNumber!(double, 0)(999.123412, 0) == "999");
665+
assert(formatNumber!(double, 0)(999.123412, 1) == "1e+03");
666+
assert(formatNumber!(double, 0)(999.123412, 2) == "1e+03");
667+
assert(formatNumber!(double, 0)(999.123412, 3) == "999");
668+
assert(formatNumber!(double, 0)(999.123412, 4) == "999.1");
669+
670+
// Default number printing
671+
assert(formatNumber(1.2) == "1.2");
672+
assert(formatNumber(12.3) == "12.3");
673+
assert(formatNumber(12.34) == "12.34");
674+
assert(formatNumber(123.45) == "123.45");
675+
assert(formatNumber(123.456) == "123.456");
676+
assert(formatNumber(1234.567) == "1234.567");
677+
assert(formatNumber(1234.5678) == "1234.5678");
678+
assert(formatNumber(12345.6789) == "12345.6789");
679+
assert(formatNumber(12345.67891) == "12345.67891");
680+
assert(formatNumber(123456.78912) == "123456.78912");
681+
assert(formatNumber(123456.789123) == "123456.789123");
682+
assert(formatNumber(1234567.891234) == "1234567.89123");
683+
assert(formatNumber(12345678.912345) == "12345678.9123");
684+
assert(formatNumber(123456789.12345) == "123456789.123");
685+
assert(formatNumber(1234567891.2345) == "1234567891.23");
686+
assert(formatNumber(12345678912.345) == "12345678912.3");
687+
assert(formatNumber(123456789123.45) == "123456789123");
688+
assert(formatNumber(1234567891234.5) == "1.23456789123e+12");
689+
assert(formatNumber(12345678912345.6) == "1.23456789123e+13");
690+
assert(formatNumber(123456789123456.0) == "123456789123456");
691+
assert(formatNumber(0.3) == "0.3");
692+
assert(formatNumber(0.03) == "0.03");
693+
assert(formatNumber(0.003) == "0.003");
694+
assert(formatNumber(0.0003) == "0.0003");
695+
assert(formatNumber(0.00003) == "3e-05" || formatNumber(0.00003) == "3e-5");
696+
assert(formatNumber(0.000003) == "3e-06" || formatNumber(0.000003) == "3e-6");
697+
assert(formatNumber(0.0000003) == "3e-07" || formatNumber(0.0000003) == "3e-7");
698+
699+
// Large number inside and outside the contiguous integer representation range
597700
double dlarge = 2.0^^(double.mant_dig - 2) - 10.0;
598701
double dhuge = 2.0^^(double.mant_dig + 1) + 1000.0;
599702

600703
assert(dlarge.formatNumber == format("%d", dlarge.to!long));
601-
assert(dhuge.formatNumber!(double, 12) == format("%.12g", dhuge));
704+
assert(dhuge.formatNumber!(double) == format("%.12g", dhuge));
602705

603-
// Negative values
604-
assert(formatNumber(-0.0) == "-0");
706+
// Negative values - Repeat most of above tests.
707+
assert(formatNumber(-0.0) == "-0" || formatNumber(-0.0) == "0");
605708
assert(formatNumber(-0.2) == "-0.2");
606-
assert(formatNumber(-0.123412, 0) == "-0.1");
709+
assert(formatNumber(-0.123412, 0) == "-0");
607710
assert(formatNumber(-0.123412, 1) == "-0.1");
608711
assert(formatNumber(-0.123412, 2) == "-0.12");
609712
assert(formatNumber(-0.123412, 5) == "-0.12341");
@@ -614,6 +717,11 @@ unittest // formatNumber unit tests
614717
assert(formatNumber(-99.123412, 5) == "-99.12341");
615718
assert(formatNumber(-99.123412, 6) == "-99.123412");
616719
assert(formatNumber(-99.123412, 7) == "-99.12341");
720+
assert(formatNumber(-999.123412, 0) == "-999");
721+
assert(formatNumber(-999.123412, 1) == "-999.1");
722+
assert(formatNumber(-999.123412, 2) == "-999.12");
723+
assert(formatNumber(-999.123412, 3) == "-999.123");
724+
assert(formatNumber(-999.123412, 4) == "-999.1234");
617725
assert(formatNumber(-999.123412, 5) == "-999.12341");
618726
assert(formatNumber(-999.123412, 6) == "-999.123412");
619727
assert(formatNumber(-999.123412, 7) == "-999.1234");
@@ -623,11 +731,69 @@ unittest // formatNumber unit tests
623731
assert(formatNumber(-1234567891234.0, 0) == "-1234567891234");
624732
assert(formatNumber(-1234567891234.0, 1) == "-1234567891234");
625733

734+
// Test round off cases
735+
assert(formatNumber(-0.6, 0) == "-1");
736+
assert(formatNumber(-0.6, 1) == "-0.6");
737+
assert(formatNumber(-0.06, 0) == "-0");
738+
assert(formatNumber(-0.06, 1) == "-0.1");
739+
assert(formatNumber(-0.06, 2) == "-0.06");
740+
assert(formatNumber(-0.06, 3) == "-0.06");
741+
assert(formatNumber(-9.49999, 0) == "-9");
742+
assert(formatNumber(-9.49999, 1) == "-9.5");
743+
assert(formatNumber(-9.6, 0) == "-10");
744+
assert(formatNumber(-9.6, 1) == "-9.6");
745+
assert(formatNumber(-99.99, 0) == "-100");
746+
assert(formatNumber(-99.99, 1) == "-100");
747+
assert(formatNumber(-99.99, 2) == "-99.99");
748+
assert(formatNumber(-9999.9996, 3) == "-10000");
749+
assert(formatNumber(-9999.9996, 4) == "-9999.9996");
750+
assert(formatNumber(-99999.99996, 4) == "-100000");
751+
assert(formatNumber(-99999.99996, 5) == "-99999.99996");
752+
assert(formatNumber(-999999.999996, 5) == "-1000000");
753+
assert(formatNumber(-999999.999996, 6) == "-999999.999996");
754+
755+
assert(formatNumber!(double, 0)(-999.123412, 0) == "-999");
756+
assert(formatNumber!(double, 0)(-999.123412, 1) == "-1e+03");
757+
assert(formatNumber!(double, 0)(-999.123412, 2) == "-1e+03");
758+
assert(formatNumber!(double, 0)(-999.123412, 3) == "-999");
759+
assert(formatNumber!(double, 0)(-999.123412, 4) == "-999.1");
760+
761+
// Default number printing
762+
assert(formatNumber(-1.2) == "-1.2");
763+
assert(formatNumber(-12.3) == "-12.3");
764+
assert(formatNumber(-12.34) == "-12.34");
765+
assert(formatNumber(-123.45) == "-123.45");
766+
assert(formatNumber(-123.456) == "-123.456");
767+
assert(formatNumber(-1234.567) == "-1234.567");
768+
assert(formatNumber(-1234.5678) == "-1234.5678");
769+
assert(formatNumber(-12345.6789) == "-12345.6789");
770+
assert(formatNumber(-12345.67891) == "-12345.67891");
771+
assert(formatNumber(-123456.78912) == "-123456.78912");
772+
assert(formatNumber(-123456.789123) == "-123456.789123");
773+
assert(formatNumber(-1234567.891234) == "-1234567.89123");
774+
775+
assert(formatNumber(-12345678.912345) == "-12345678.9123");
776+
assert(formatNumber(-123456789.12345) == "-123456789.123");
777+
assert(formatNumber(-1234567891.2345) == "-1234567891.23");
778+
assert(formatNumber(-12345678912.345) == "-12345678912.3");
779+
assert(formatNumber(-123456789123.45) == "-123456789123");
780+
assert(formatNumber(-1234567891234.5) == "-1.23456789123e+12");
781+
assert(formatNumber(-12345678912345.6) == "-1.23456789123e+13");
782+
assert(formatNumber(-123456789123456.0) == "-123456789123456");
783+
784+
assert(formatNumber(-0.3) == "-0.3");
785+
assert(formatNumber(-0.03) == "-0.03");
786+
assert(formatNumber(-0.003) == "-0.003");
787+
assert(formatNumber(-0.0003) == "-0.0003");
788+
assert(formatNumber(-0.00003) == "-3e-05" || formatNumber(-0.00003) == "-3e-5");
789+
assert(formatNumber(-0.000003) == "-3e-06" || formatNumber(-0.000003) == "-3e-6");
790+
assert(formatNumber(-0.0000003) == "-3e-07" || formatNumber(-0.0000003) == "-3e-7");
791+
626792
const double dlargeNeg = -2.0^^(double.mant_dig - 2) + 10.0;
627793
immutable double dhugeNeg = -2.0^^(double.mant_dig + 1) - 1000.0;
628794

629795
assert(dlargeNeg.formatNumber == format("%d", dlargeNeg.to!long));
630-
assert(dhugeNeg.formatNumber!(double, 12) == format("%.12g", dhugeNeg));
796+
assert(dhugeNeg.formatNumber!(double) == format("%.12g", dhugeNeg));
631797

632798
// Type qualifiers
633799
const double b1 = 0.0; assert(formatNumber(b1) == "0");
@@ -636,10 +802,20 @@ unittest // formatNumber unit tests
636802
immutable double b4 = 99.123412; assert(formatNumber(b4, 5) == "99.12341");
637803
immutable double b5 = 99.123412; assert(formatNumber(b5, 7) == "99.12341");
638804

805+
// Special values
806+
assert(formatNumber(double.nan) == "nan");
807+
assert(formatNumber(double.nan, 0) == "nan");
808+
assert(formatNumber(double.nan, 1) == "nan");
809+
assert(formatNumber(double.nan, 9) == "nan");
810+
assert(formatNumber(double.infinity) == "inf");
811+
assert(formatNumber(double.infinity, 0) == "inf");
812+
assert(formatNumber(double.infinity, 1) == "inf");
813+
assert(formatNumber(double.infinity, 9) == "inf");
814+
639815
// Float. Mix negative and type qualifiers in.
640816
assert(formatNumber(0.0f) == "0");
641817
assert(formatNumber(0.5f) == "0.5");
642-
assert(formatNumber(0.123412f, 0) == "0.1");
818+
assert(formatNumber(0.123412f, 0) == "0");
643819
assert(formatNumber(0.123412f, 1) == "0.1");
644820
assert(formatNumber(-0.123412f, 2) == "-0.12");
645821
assert(formatNumber(9.123412f, 5) == "9.12341");
@@ -655,8 +831,8 @@ unittest // formatNumber unit tests
655831
immutable float c5 = 12345678.0f; assert(formatNumber(c5, 0) == "12345678");
656832
immutable float c6 = 12345678.0f; assert(formatNumber(c6, 1) == "12345678");
657833

658-
double flarge = 2.0^^(float.mant_dig - 2) - 10.0;
659-
double fhuge = 2.0^^(float.mant_dig + 1) + 1000.0;
834+
float flarge = 2.0^^(float.mant_dig - 2) - 10.0;
835+
float fhuge = 2.0^^(float.mant_dig + 1) + 1000.0;
660836

661837
assert(flarge.formatNumber == format("%d", flarge.to!long));
662838
assert(fhuge.formatNumber!(float, 12) == format("%.12g", fhuge));

0 commit comments

Comments
 (0)