Skip to content

Commit fdcafe3

Browse files
committed
Fix CSV floating point serialization issue
Fix CSV serialization issue (particularly impacting multiples of 10) caused by inaccurate digit calculation
1 parent a6ce8b2 commit fdcafe3

File tree

5 files changed

+139
-40
lines changed

5 files changed

+139
-40
lines changed

CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ if (CSV_DEVELOPER)
6363
if (MSVC)
6464
target_link_options(csv PUBLIC /PROFILE)
6565
endif()
66-
66+
6767
# More error messages.
6868
if (UNIX)
6969
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
@@ -73,7 +73,7 @@ if (CSV_DEVELOPER)
7373
endif()
7474

7575
# Generate a single header library
76-
if(CMAKE_VERSION VERSION_LESS "3.12" OR ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
76+
if(CMAKE_VERSION VERSION_LESS "3.12")
7777
find_package(PythonInterp 3 QUIET)
7878
else()
7979
find_package(Python3 COMPONENTS Interpreter)

include/internal/csv_writer.hpp

+24-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,27 @@ namespace csv {
1717
namespace internals {
1818
static int DECIMAL_PLACES = 5;
1919

20+
/**
21+
* Calculate the number of digits in a number
22+
*/
23+
template<
24+
typename T,
25+
csv::enable_if_t<std::is_arithmetic<T>::value, int> = 0
26+
>
27+
int num_digits(T x)
28+
{
29+
x = abs(x);
30+
31+
int digits = 0;
32+
33+
while (x >= 1) {
34+
x /= 10;
35+
digits++;
36+
}
37+
38+
return digits;
39+
}
40+
2041
/** to_string() for unsigned integers */
2142
template<typename T,
2243
csv::enable_if_t<std::is_unsigned<T>::value, int> = 0>
@@ -65,12 +86,12 @@ namespace csv {
6586
if (value < 0) result = "-";
6687

6788
if (integral_part == 0) {
89+
6890
result = "0";
6991
}
7092
else {
71-
for (int n_digits = (int)(std::log(integral_part) / std::log(10));
72-
n_digits + 1 > 0; n_digits --) {
73-
int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits));
93+
for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) {
94+
int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1));
7495
result += (char)('0' + digit);
7596
}
7697
}

single_include/csv.hpp

+42-12
Original file line numberDiff line numberDiff line change
@@ -5301,9 +5301,9 @@ namespace csv {
53015301
return DataType::CSV_NULL;
53025302

53035303
bool ws_allowed = true,
5304-
neg_allowed = true,
53055304
dot_allowed = true,
53065305
digit_allowed = true,
5306+
is_negative = false,
53075307
has_digit = false,
53085308
prob_float = false;
53095309

@@ -5333,7 +5333,7 @@ namespace csv {
53335333
return DataType::CSV_STRING;
53345334
}
53355335

5336-
neg_allowed = false;
5336+
is_negative = true;
53375337
break;
53385338
case '.':
53395339
if (!dot_allowed) {
@@ -5357,7 +5357,7 @@ namespace csv {
53575357

53585358
return _process_potential_exponential(
53595359
in.substr(exponent_start_idx),
5360-
neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part),
5360+
is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part,
53615361
out
53625362
);
53635363
}
@@ -5391,7 +5391,7 @@ namespace csv {
53915391
if (has_digit) {
53925392
long double number = integral_part + decimal_part;
53935393
if (out) {
5394-
*out = neg_allowed ? number : -number;
5394+
*out = is_negative ? -number : number;
53955395
}
53965396

53975397
return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number);
@@ -6532,6 +6532,27 @@ namespace csv {
65326532
namespace internals {
65336533
static int DECIMAL_PLACES = 5;
65346534

6535+
/**
6536+
* Calculate the number of digits in a number
6537+
*/
6538+
template<
6539+
typename T,
6540+
csv::enable_if_t<std::is_arithmetic<T>::value, int> = 0
6541+
>
6542+
int num_digits(T x)
6543+
{
6544+
x = abs(x);
6545+
6546+
int digits = 0;
6547+
6548+
while (x >= 1) {
6549+
x /= 10;
6550+
digits++;
6551+
}
6552+
6553+
return digits;
6554+
}
6555+
65356556
/** to_string() for unsigned integers */
65366557
template<typename T,
65376558
csv::enable_if_t<std::is_unsigned<T>::value, int> = 0>
@@ -6566,6 +6587,10 @@ namespace csv {
65666587
csv::enable_if_t<std::is_floating_point<T>::value, int> = 0
65676588
>
65686589
inline std::string to_string(T value) {
6590+
#ifdef __clang__
6591+
return std::to_string(value);
6592+
#else
6593+
// TODO: Figure out why the below code doesn't work on clang
65696594
std::string result;
65706595

65716596
T integral_part;
@@ -6576,12 +6601,12 @@ namespace csv {
65766601
if (value < 0) result = "-";
65776602

65786603
if (integral_part == 0) {
6604+
65796605
result = "0";
65806606
}
65816607
else {
6582-
for (int n_digits = (int)(std::log(integral_part) / std::log(10));
6583-
n_digits + 1 > 0; n_digits --) {
6584-
int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits));
6608+
for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) {
6609+
int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1));
65856610
result += (char)('0' + digit);
65866611
}
65876612
}
@@ -6601,16 +6626,19 @@ namespace csv {
66016626
}
66026627

66036628
return result;
6629+
#endif
66046630
}
66056631
}
66066632

66076633
/** Sets how many places after the decimal will be written for floating point numbers
66086634
*
66096635
* @param precision Number of decimal places
66106636
*/
6637+
#ifndef __clang___
66116638
inline static void set_decimal_places(int precision) {
66126639
internals::DECIMAL_PLACES = precision;
66136640
}
6641+
#endif
66146642

66156643
/** @name CSV Writing */
66166644
///@{
@@ -7723,7 +7751,7 @@ namespace csv {
77237751
for (; start < this->sv.size() && this->sv[start] == ' '; start++);
77247752
for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++);
77257753

7726-
unsigned long long int value = 0;
7754+
unsigned long long int value_ = 0;
77277755

77287756
size_t digits = (end - start);
77297757
size_t base16_exponent = digits - 1;
@@ -7774,11 +7802,11 @@ namespace csv {
77747802
return false;
77757803
}
77767804

7777-
value += digit * pow(16, base16_exponent);
7805+
value_ += digit * pow(16, base16_exponent);
77787806
base16_exponent--;
77797807
}
77807808

7781-
parsedValue = value;
7809+
parsedValue = value_;
77827810
return true;
77837811
}
77847812

@@ -7972,7 +8000,8 @@ namespace csv {
79728000
}
79738001

79748002
// create a result string of necessary size
7975-
std::string result(s.size() + space, '\\');
8003+
size_t result_size = s.size() + space;
8004+
std::string result(result_size, '\\');
79768005
std::size_t pos = 0;
79778006

79788007
for (const auto& c : s)
@@ -8047,7 +8076,7 @@ namespace csv {
80478076
if (c >= 0x00 && c <= 0x1f)
80488077
{
80498078
// print character c as \uxxxx
8050-
sprintf(&result[pos + 1], "u%04x", int(c));
8079+
snprintf(&result[pos + 1], result_size - pos - 1, "u%04x", int(c));
80518080
pos += 6;
80528081
// overwrite trailing null character
80538082
result[pos] = '\\';
@@ -8137,6 +8166,7 @@ namespace csv {
81378166
return ret;
81388167
}
81398168
}
8169+
81408170
/** @file
81418171
* Calculates statistics from CSV files
81428172
*/

single_include_test/csv.hpp

+42-12
Original file line numberDiff line numberDiff line change
@@ -5301,9 +5301,9 @@ namespace csv {
53015301
return DataType::CSV_NULL;
53025302

53035303
bool ws_allowed = true,
5304-
neg_allowed = true,
53055304
dot_allowed = true,
53065305
digit_allowed = true,
5306+
is_negative = false,
53075307
has_digit = false,
53085308
prob_float = false;
53095309

@@ -5333,7 +5333,7 @@ namespace csv {
53335333
return DataType::CSV_STRING;
53345334
}
53355335

5336-
neg_allowed = false;
5336+
is_negative = true;
53375337
break;
53385338
case '.':
53395339
if (!dot_allowed) {
@@ -5357,7 +5357,7 @@ namespace csv {
53575357

53585358
return _process_potential_exponential(
53595359
in.substr(exponent_start_idx),
5360-
neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part),
5360+
is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part,
53615361
out
53625362
);
53635363
}
@@ -5391,7 +5391,7 @@ namespace csv {
53915391
if (has_digit) {
53925392
long double number = integral_part + decimal_part;
53935393
if (out) {
5394-
*out = neg_allowed ? number : -number;
5394+
*out = is_negative ? -number : number;
53955395
}
53965396

53975397
return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number);
@@ -6532,6 +6532,27 @@ namespace csv {
65326532
namespace internals {
65336533
static int DECIMAL_PLACES = 5;
65346534

6535+
/**
6536+
* Calculate the number of digits in a number
6537+
*/
6538+
template<
6539+
typename T,
6540+
csv::enable_if_t<std::is_arithmetic<T>::value, int> = 0
6541+
>
6542+
int num_digits(T x)
6543+
{
6544+
x = abs(x);
6545+
6546+
int digits = 0;
6547+
6548+
while (x >= 1) {
6549+
x /= 10;
6550+
digits++;
6551+
}
6552+
6553+
return digits;
6554+
}
6555+
65356556
/** to_string() for unsigned integers */
65366557
template<typename T,
65376558
csv::enable_if_t<std::is_unsigned<T>::value, int> = 0>
@@ -6566,6 +6587,10 @@ namespace csv {
65666587
csv::enable_if_t<std::is_floating_point<T>::value, int> = 0
65676588
>
65686589
inline std::string to_string(T value) {
6590+
#ifdef __clang__
6591+
return std::to_string(value);
6592+
#else
6593+
// TODO: Figure out why the below code doesn't work on clang
65696594
std::string result;
65706595

65716596
T integral_part;
@@ -6576,12 +6601,12 @@ namespace csv {
65766601
if (value < 0) result = "-";
65776602

65786603
if (integral_part == 0) {
6604+
65796605
result = "0";
65806606
}
65816607
else {
6582-
for (int n_digits = (int)(std::log(integral_part) / std::log(10));
6583-
n_digits + 1 > 0; n_digits --) {
6584-
int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits));
6608+
for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) {
6609+
int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1));
65856610
result += (char)('0' + digit);
65866611
}
65876612
}
@@ -6601,16 +6626,19 @@ namespace csv {
66016626
}
66026627

66036628
return result;
6629+
#endif
66046630
}
66056631
}
66066632

66076633
/** Sets how many places after the decimal will be written for floating point numbers
66086634
*
66096635
* @param precision Number of decimal places
66106636
*/
6637+
#ifndef __clang___
66116638
inline static void set_decimal_places(int precision) {
66126639
internals::DECIMAL_PLACES = precision;
66136640
}
6641+
#endif
66146642

66156643
/** @name CSV Writing */
66166644
///@{
@@ -7723,7 +7751,7 @@ namespace csv {
77237751
for (; start < this->sv.size() && this->sv[start] == ' '; start++);
77247752
for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++);
77257753

7726-
unsigned long long int value = 0;
7754+
unsigned long long int value_ = 0;
77277755

77287756
size_t digits = (end - start);
77297757
size_t base16_exponent = digits - 1;
@@ -7774,11 +7802,11 @@ namespace csv {
77747802
return false;
77757803
}
77767804

7777-
value += digit * pow(16, base16_exponent);
7805+
value_ += digit * pow(16, base16_exponent);
77787806
base16_exponent--;
77797807
}
77807808

7781-
parsedValue = value;
7809+
parsedValue = value_;
77827810
return true;
77837811
}
77847812

@@ -7972,7 +8000,8 @@ namespace csv {
79728000
}
79738001

79748002
// create a result string of necessary size
7975-
std::string result(s.size() + space, '\\');
8003+
size_t result_size = s.size() + space;
8004+
std::string result(result_size, '\\');
79768005
std::size_t pos = 0;
79778006

79788007
for (const auto& c : s)
@@ -8047,7 +8076,7 @@ namespace csv {
80478076
if (c >= 0x00 && c <= 0x1f)
80488077
{
80498078
// print character c as \uxxxx
8050-
sprintf(&result[pos + 1], "u%04x", int(c));
8079+
snprintf(&result[pos + 1], result_size - pos - 1, "u%04x", int(c));
80518080
pos += 6;
80528081
// overwrite trailing null character
80538082
result[pos] = '\\';
@@ -8137,6 +8166,7 @@ namespace csv {
81378166
return ret;
81388167
}
81398168
}
8169+
81408170
/** @file
81418171
* Calculates statistics from CSV files
81428172
*/

0 commit comments

Comments
 (0)