Skip to content

Commit 4ff7bb2

Browse files
committed
feat(c/driver/postgresql): customize numeric conversion
- introduces statement-level option `adbc.postgresql.numeric_conversion` - the option is used to tell result reader what strategy to use when converting numeric values to Arrow data; since this cannot be done 1-1, the reader has to convert to other data type - clients can use this option to specify the strategy - value can be either `to_string` or `to_double` - when not specified defaults to `to_string` - `to_string` -> numerics converted loss-less to string representation - when `to_double` -> numeric converted to double (with possible loss of precision)
1 parent 51abae1 commit 4ff7bb2

File tree

5 files changed

+346
-84
lines changed

5 files changed

+346
-84
lines changed

c/driver/postgresql/copy/postgres_copy_reader_test.cc

+109-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
#include <gtest/gtest.h>
19+
#include <cmath>
1920
#include <nanoarrow/nanoarrow.hpp>
2021

2122
#include "postgres_copy_test_common.h"
@@ -25,8 +26,11 @@ namespace adbcpq {
2526

2627
class PostgresCopyStreamTester {
2728
public:
28-
ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) {
29-
NANOARROW_RETURN_NOT_OK(reader_.Init(root_type));
29+
ArrowErrorCode Init(
30+
const PostgresType& root_type,
31+
NumericConversionStrategy numeric_conversion = NumericConversionStrategy::kToString,
32+
ArrowError* error = nullptr) {
33+
NANOARROW_RETURN_NOT_OK(reader_.Init(root_type, numeric_conversion));
3034
NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error));
3135
NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error));
3236
return NANOARROW_OK;
@@ -373,6 +377,59 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric) {
373377
EXPECT_EQ(std::string(item.data, item.size_bytes), "inf");
374378
}
375379

380+
TEST(PostgresCopyUtilsTest, PostgresCopyReadNumericToDouble) {
381+
ArrowBufferView data;
382+
data.data.as_uint8 = kTestPgCopyNumeric;
383+
data.size_bytes = sizeof(kTestPgCopyNumeric);
384+
385+
auto col_type = PostgresType(PostgresTypeId::kNumeric);
386+
PostgresType input_type(PostgresTypeId::kRecord);
387+
input_type.AppendChild("col", col_type);
388+
389+
PostgresCopyStreamTester tester;
390+
ASSERT_EQ(tester.Init(input_type, NumericConversionStrategy::kToDouble), NANOARROW_OK);
391+
ASSERT_EQ(tester.ReadAll(&data), ENODATA);
392+
ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric, sizeof(kTestPgCopyNumeric));
393+
ASSERT_EQ(data.size_bytes, 0);
394+
395+
nanoarrow::UniqueArray array;
396+
ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK);
397+
ASSERT_EQ(array->length, 9);
398+
ASSERT_EQ(array->n_children, 1);
399+
400+
nanoarrow::UniqueSchema schema;
401+
tester.GetSchema(schema.get());
402+
403+
nanoarrow::UniqueArrayView array_view;
404+
ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr),
405+
NANOARROW_OK);
406+
ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_DOUBLE);
407+
ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK);
408+
409+
auto validity = reinterpret_cast<const uint8_t*>(array->children[0]->buffers[0]);
410+
auto data_buffer = reinterpret_cast<const double*>(array->children[0]->buffers[1]);
411+
ASSERT_NE(validity, nullptr);
412+
ASSERT_NE(data_buffer, nullptr);
413+
ASSERT_TRUE(ArrowBitGet(validity, 0));
414+
ASSERT_TRUE(ArrowBitGet(validity, 1));
415+
ASSERT_TRUE(ArrowBitGet(validity, 2));
416+
ASSERT_TRUE(ArrowBitGet(validity, 3));
417+
ASSERT_TRUE(ArrowBitGet(validity, 4));
418+
ASSERT_TRUE(ArrowBitGet(validity, 5));
419+
ASSERT_TRUE(ArrowBitGet(validity, 6));
420+
ASSERT_TRUE(ArrowBitGet(validity, 7));
421+
ASSERT_FALSE(ArrowBitGet(validity, 8));
422+
423+
ASSERT_DOUBLE_EQ(data_buffer[0], 1000000);
424+
ASSERT_DOUBLE_EQ(data_buffer[1], 0.00001234);
425+
ASSERT_DOUBLE_EQ(data_buffer[2], 1.0);
426+
ASSERT_DOUBLE_EQ(data_buffer[3], -123.456);
427+
ASSERT_DOUBLE_EQ(data_buffer[4], 123.456);
428+
ASSERT_TRUE(std::isnan(data_buffer[5]));
429+
ASSERT_TRUE(data_buffer[6] == -std::numeric_limits<double>::infinity());
430+
ASSERT_TRUE(data_buffer[7] == std::numeric_limits<double>::infinity());
431+
}
432+
376433
TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric16_10) {
377434
ArrowBufferView data;
378435
data.data.as_uint8 = kTestPgCopyNumeric16_10;
@@ -427,6 +484,56 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric16_10) {
427484
EXPECT_EQ(std::string(item.data, item.size_bytes), "nan");
428485
}
429486

487+
TEST(PostgresCopyUtilsTest, PostgresCopyReadNumeric16_10ToDouble) {
488+
ArrowBufferView data;
489+
data.data.as_uint8 = kTestPgCopyNumeric16_10;
490+
data.size_bytes = sizeof(kTestPgCopyNumeric16_10);
491+
492+
auto col_type = PostgresType(PostgresTypeId::kNumeric);
493+
PostgresType input_type(PostgresTypeId::kRecord);
494+
input_type.AppendChild("col", col_type);
495+
496+
PostgresCopyStreamTester tester;
497+
ASSERT_EQ(tester.Init(input_type, NumericConversionStrategy::kToDouble), NANOARROW_OK);
498+
ASSERT_EQ(tester.ReadAll(&data), ENODATA);
499+
ASSERT_EQ(data.data.as_uint8 - kTestPgCopyNumeric16_10,
500+
sizeof(kTestPgCopyNumeric16_10));
501+
ASSERT_EQ(data.size_bytes, 0);
502+
503+
nanoarrow::UniqueArray array;
504+
ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK);
505+
ASSERT_EQ(array->length, 7);
506+
ASSERT_EQ(array->n_children, 1);
507+
508+
nanoarrow::UniqueSchema schema;
509+
tester.GetSchema(schema.get());
510+
511+
nanoarrow::UniqueArrayView array_view;
512+
ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr),
513+
NANOARROW_OK);
514+
ASSERT_EQ(array_view->children[0]->storage_type, NANOARROW_TYPE_DOUBLE);
515+
ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK);
516+
517+
auto validity = reinterpret_cast<const uint8_t*>(array->children[0]->buffers[0]);
518+
auto data_buffer = reinterpret_cast<const double*>(array->children[0]->buffers[1]);
519+
ASSERT_NE(validity, nullptr);
520+
ASSERT_NE(data_buffer, nullptr);
521+
ASSERT_TRUE(ArrowBitGet(validity, 0));
522+
ASSERT_TRUE(ArrowBitGet(validity, 1));
523+
ASSERT_TRUE(ArrowBitGet(validity, 2));
524+
ASSERT_TRUE(ArrowBitGet(validity, 3));
525+
ASSERT_TRUE(ArrowBitGet(validity, 4));
526+
ASSERT_TRUE(ArrowBitGet(validity, 5));
527+
ASSERT_FALSE(ArrowBitGet(validity, 6));
528+
529+
ASSERT_DOUBLE_EQ(data_buffer[0], 0.0);
530+
ASSERT_DOUBLE_EQ(data_buffer[1], 1.01234);
531+
ASSERT_DOUBLE_EQ(data_buffer[2], 1.0123456789);
532+
ASSERT_DOUBLE_EQ(data_buffer[3], -1.0123400000);
533+
ASSERT_DOUBLE_EQ(data_buffer[4], -1.0123456789);
534+
ASSERT_TRUE(std::isnan(data_buffer[5]));
535+
}
536+
430537
TEST(PostgresCopyUtilsTest, PostgresCopyReadTimestamp) {
431538
ArrowBufferView data;
432539
data.data.as_uint8 = kTestPgCopyTimestamp;

0 commit comments

Comments
 (0)