Skip to content

Commit e2ac63c

Browse files
authored
Merge pull request #331 from Enmk/fix_Array_of_LowCardinality
Fix `ColumnArrayT<ColumnLowCardinalityT<ColumnString>>::Append`
2 parents b297a6e + 7d83db4 commit e2ac63c

11 files changed

+199
-59
lines changed

clickhouse/columns/array.cpp

+2-11
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,9 @@ ColumnArray::ColumnArray(ColumnArray&& other)
2525
}
2626

2727
void ColumnArray::AppendAsColumn(ColumnRef array) {
28-
if (!data_->Type()->IsEqual(array->Type())) {
29-
throw ValidationError(
30-
"can't append column of type " + array->Type()->GetName() + " "
31-
"to column type " + data_->Type()->GetName());
32-
}
33-
34-
AddOffset(array->Size());
28+
// appending data may throw (i.e. due to ype check failure), so do it first to avoid partly modified state.
3529
data_->Append(array);
30+
AddOffset(array->Size());
3631
}
3732

3833
ColumnRef ColumnArray::GetAsColumn(size_t n) const {
@@ -59,10 +54,6 @@ ColumnRef ColumnArray::CloneEmpty() const {
5954

6055
void ColumnArray::Append(ColumnRef column) {
6156
if (auto col = column->As<ColumnArray>()) {
62-
if (!col->data_->Type()->IsEqual(data_->Type())) {
63-
return;
64-
}
65-
6657
for (size_t i = 0; i < col->Size(); ++i) {
6758
AppendAsColumn(col->GetAsColumn(i));
6859
}

clickhouse/columns/lowcardinality.cpp

+13-4
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,21 @@ ColumnRef ColumnLowCardinality::GetDictionary() {
227227
}
228228

229229
void ColumnLowCardinality::Append(ColumnRef col) {
230+
// Append values from col only if it is either
231+
// - exactly same type as `this`: LowCardinality wrapping same dictionary type
232+
// - same type as dictionary column
233+
230234
auto c = col->As<ColumnLowCardinality>();
231-
if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type()))
232-
return;
235+
// If not LowCardinality of same dictionary type
236+
if (!c || !dictionary_column_->Type()->IsEqual(c->dictionary_column_->Type())) {
237+
// If not column of the same type as dictionary type
238+
if (!dictionary_column_->Type()->IsEqual(col->GetType())) {
239+
return;
240+
}
241+
}
233242

234-
for (size_t i = 0; i < c->Size(); ++i) {
235-
AppendUnsafe(c->GetItem(i));
243+
for (size_t i = 0; i < col->Size(); ++i) {
244+
AppendUnsafe(col->GetItem(i));
236245
}
237246
}
238247

ut/Column_ut.cpp

+100-33
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <clickhouse/client.h>
1717

1818
#include <gtest/gtest.h>
19+
#include <initializer_list>
20+
#include <memory>
21+
#include <type_traits>
1922

2023
#include "utils.h"
2124
#include "roundtrip_column.h"
@@ -46,10 +49,12 @@ std::ostream& operator<<(std::ostream& ostr, const Type::Code& type_code) {
4649
template <typename ColumnTypeT,
4750
typename std::shared_ptr<ColumnTypeT> (*CreatorFunction)(),
4851
typename GeneratorValueType,
49-
typename std::vector<GeneratorValueType> (*GeneratorFunc)()>
52+
typename std::vector<GeneratorValueType> (*GeneratorFunction)()>
5053
struct GenericColumnTestCase
5154
{
5255
using ColumnType = ColumnTypeT;
56+
static constexpr auto Creator = CreatorFunction;
57+
static constexpr auto Generator = GeneratorFunction;
5358

5459
static auto createColumn()
5560
{
@@ -58,7 +63,7 @@ struct GenericColumnTestCase
5863

5964
static auto generateValues()
6065
{
61-
return GeneratorFunc();
66+
return GeneratorFunction();
6267
}
6368
};
6469

@@ -92,7 +97,7 @@ class GenericColumnTest : public testing::Test {
9297
return std::tuple{column, values};
9398
}
9499

95-
static std::optional<std::string> SkipTest(clickhouse::Client& client) {
100+
static std::optional<std::string> CheckIfShouldSkipTest(clickhouse::Client& client) {
96101
if constexpr (std::is_same_v<ColumnType, ColumnDate32>) {
97102
// Date32 first appeared in v21.9.2.17-stable
98103
const auto server_info = client.GetServerInfo();
@@ -113,6 +118,33 @@ class GenericColumnTest : public testing::Test {
113118
}
114119
return std::nullopt;
115120
}
121+
122+
template <typename ColumnType>
123+
static void TestColumnRoundtrip(const std::shared_ptr<ColumnType> & column, const ClientOptions & client_options)
124+
{
125+
SCOPED_TRACE(::testing::Message("Column type: ") << column->GetType().GetName());
126+
SCOPED_TRACE(::testing::Message("Client options: ") << client_options);
127+
128+
clickhouse::Client client(client_options);
129+
130+
if (auto message = CheckIfShouldSkipTest(client)) {
131+
GTEST_SKIP() << *message;
132+
}
133+
134+
auto result_typed = RoundtripColumnValues(client, column)->template AsStrict<ColumnType>();
135+
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
136+
}
137+
138+
139+
template <typename ColumnType, typename CompressionMethods>
140+
static void TestColumnRoundtrip(const ColumnType & column, const ClientOptions & client_options, CompressionMethods && compression_methods)
141+
{
142+
for (auto compressionMethod : compression_methods)
143+
{
144+
ClientOptions new_options = ClientOptions(client_options).SetCompressionMethod(compressionMethod);
145+
TestColumnRoundtrip(column, new_options);
146+
}
147+
}
116148
};
117149

118150
// Luckily all (non-data copying/moving) constructors have size_t params.
@@ -184,7 +216,17 @@ using TestCases = ::testing::Types<
184216
DecimalColumnTestCase<ColumnDecimal, 12, 9>,
185217

186218
DecimalColumnTestCase<ColumnDecimal, 6, 0>,
187-
DecimalColumnTestCase<ColumnDecimal, 6, 3>
219+
DecimalColumnTestCase<ColumnDecimal, 6, 3>,
220+
221+
GenericColumnTestCase<ColumnLowCardinalityT<ColumnString>, &makeColumn<ColumnLowCardinalityT<ColumnString>>, std::string, &MakeStrings>
222+
223+
// Array(String)
224+
// GenericColumnTestCase<ColumnArrayT<ColumnString>, &makeColumn<ColumnArrayT<ColumnString>>, std::vector<std::string>, &MakeArrays<std::string, &MakeStrings>>
225+
226+
// // Array(Array(String))
227+
// GenericColumnTestCase<ColumnArrayT<ColumnArrayT<ColumnString>>, &makeColumn<ColumnArrayT<ColumnArrayT<ColumnString>>>,
228+
// std::vector<std::vector<std::string>>,
229+
// &MakeArrays<std::vector<std::string>, &MakeArrays<std::string, &MakeStrings>>>
188230
>;
189231

190232
TYPED_TEST_SUITE(GenericColumnTest, TestCases);
@@ -222,7 +264,7 @@ TYPED_TEST(GenericColumnTest, EmptyColumn) {
222264

223265
TYPED_TEST(GenericColumnTest, Append) {
224266
auto column = this->MakeColumn();
225-
const auto values = this->GenerateValues(100);
267+
const auto values = this->GenerateValues(10'000);
226268

227269
for (const auto & v : values) {
228270
EXPECT_NO_THROW(column->Append(v));
@@ -259,10 +301,17 @@ inline auto convertValueForGetItem(const ColumnType& col, ValueType&& t) {
259301
}
260302

261303
TYPED_TEST(GenericColumnTest, GetItem) {
262-
auto [column, values] = this->MakeColumnWithValues(100);
304+
auto [column, values] = this->MakeColumnWithValues(10'000);
263305

264306
ASSERT_EQ(values.size(), column->Size());
265-
ASSERT_EQ(column->GetItem(0).type, column->GetType().GetCode());
307+
const auto wrapping_types = std::set<Type::Code>{
308+
Type::Code::LowCardinality, Type::Code::Array, Type::Code::Nullable
309+
};
310+
311+
// For wrapping types, type of ItemView can be different from type of column
312+
if (wrapping_types.find(column->GetType().GetCode()) == wrapping_types.end() ) {
313+
EXPECT_EQ(column->GetItem(0).type, column->GetType().GetCode());
314+
}
266315

267316
for (size_t i = 0; i < values.size(); ++i) {
268317
const auto v = convertValueForGetItem(*column, values[i]);
@@ -274,7 +323,7 @@ TYPED_TEST(GenericColumnTest, GetItem) {
274323
}
275324

276325
TYPED_TEST(GenericColumnTest, Slice) {
277-
auto [column, values] = this->MakeColumnWithValues(100);
326+
auto [column, values] = this->MakeColumnWithValues(10'000);
278327

279328
auto untyped_slice = column->Slice(0, column->Size());
280329
auto slice = untyped_slice->template AsStrict<typename TestFixture::ColumnType>();
@@ -286,7 +335,7 @@ TYPED_TEST(GenericColumnTest, Slice) {
286335
}
287336

288337
TYPED_TEST(GenericColumnTest, CloneEmpty) {
289-
auto [column, values] = this->MakeColumnWithValues(100);
338+
auto [column, values] = this->MakeColumnWithValues(10'000);
290339
EXPECT_EQ(values.size(), column->Size());
291340

292341
auto clone_untyped = column->CloneEmpty();
@@ -298,15 +347,15 @@ TYPED_TEST(GenericColumnTest, CloneEmpty) {
298347
}
299348

300349
TYPED_TEST(GenericColumnTest, Clear) {
301-
auto [column, values] = this->MakeColumnWithValues(100);
350+
auto [column, values] = this->MakeColumnWithValues(10'000);
302351
EXPECT_EQ(values.size(), column->Size());
303352

304353
column->Clear();
305354
EXPECT_EQ(0u, column->Size());
306355
}
307356

308357
TYPED_TEST(GenericColumnTest, Swap) {
309-
auto [column_A, values] = this->MakeColumnWithValues(100);
358+
auto [column_A, values] = this->MakeColumnWithValues(10'000);
310359
auto column_B = this->MakeColumn();
311360

312361
column_A->Swap(*column_B);
@@ -318,18 +367,21 @@ TYPED_TEST(GenericColumnTest, Swap) {
318367
TYPED_TEST(GenericColumnTest, LoadAndSave) {
319368
auto [column_A, values] = this->MakeColumnWithValues(100);
320369

321-
char buffer[4096] = {'\0'};
370+
// large buffer since we have pretty big values for String column
371+
auto const BufferSize = 10*1024*1024;
372+
std::unique_ptr<char[]> buffer = std::make_unique<char[]>(BufferSize);
373+
memset(buffer.get(), 0, BufferSize);
322374
{
323-
ArrayOutput output(buffer, sizeof(buffer));
375+
ArrayOutput output(buffer.get(), BufferSize);
324376
// Save
325-
EXPECT_NO_THROW(column_A->Save(&output));
377+
ASSERT_NO_THROW(column_A->Save(&output));
326378
}
327379

328380
auto column_B = this->MakeColumn();
329381
{
330-
ArrayInput input(buffer, sizeof(buffer));
382+
ArrayInput input(buffer.get(), BufferSize);
331383
// Load
332-
EXPECT_TRUE(column_B->Load(&input, values.size()));
384+
ASSERT_TRUE(column_B->Load(&input, values.size()));
333385
}
334386

335387
EXPECT_TRUE(CompareRecursive(*column_A, *column_B));
@@ -342,25 +394,28 @@ const auto LocalHostEndpoint = ClientOptions()
342394
.SetPassword( getEnvOrDefault("CLICKHOUSE_PASSWORD", ""))
343395
.SetDefaultDatabase(getEnvOrDefault("CLICKHOUSE_DB", "default"));
344396

397+
const auto AllCompressionMethods = {
398+
clickhouse::CompressionMethod::None,
399+
clickhouse::CompressionMethod::LZ4
400+
};
401+
345402
TYPED_TEST(GenericColumnTest, RoundTrip) {
346-
auto [column, values] = this->MakeColumnWithValues(100);
403+
auto [column, values] = this->MakeColumnWithValues(10'000);
347404
EXPECT_EQ(values.size(), column->Size());
348405

349-
clickhouse::Client client(LocalHostEndpoint);
350-
351-
if (auto message = this->SkipTest(client)) {
352-
GTEST_SKIP() << *message;
353-
}
354-
355-
auto result_typed = RoundtripColumnValues(client, column)->template AsStrict<typename TestFixture::ColumnType>();
356-
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
406+
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
357407
}
358408

359-
TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) {
409+
TYPED_TEST(GenericColumnTest, NullableT_RoundTrip) {
360410
using NullableType = ColumnNullableT<typename TestFixture::ColumnType>;
361411

362-
auto column = std::make_shared<NullableType>(this->MakeColumn());
363-
auto values = this->GenerateValues(100);
412+
auto non_nullable_column = this->MakeColumn();
413+
if (non_nullable_column->GetType().GetCode() == Type::Code::LowCardinality)
414+
// TODO (vnemkov): wrap as ColumnLowCardinalityT<ColumnNullableT<NestedColumn>> instead of ColumnNullableT<ColumnLowCardinalityT<NestedColumn>>
415+
GTEST_SKIP() << "Can't have " << non_nullable_column->GetType().GetName() << " in Nullable";
416+
417+
auto column = std::make_shared<NullableType>(std::move(non_nullable_column));
418+
auto values = this->GenerateValues(10'000);
364419

365420
FromVectorGenerator<bool> is_null({true, false});
366421
for (size_t i = 0; i < values.size(); ++i) {
@@ -371,12 +426,24 @@ TYPED_TEST(GenericColumnTest, NulableT_RoundTrip) {
371426
}
372427
}
373428

374-
clickhouse::Client client(LocalHostEndpoint);
429+
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
430+
}
431+
432+
TYPED_TEST(GenericColumnTest, ArrayT_RoundTrip) {
433+
using ColumnArrayType = ColumnArrayT<typename TestFixture::ColumnType>;
375434

376-
if (auto message = this->SkipTest(client)) {
377-
GTEST_SKIP() << *message;
435+
auto [nested_column, values] = this->MakeColumnWithValues(100);
436+
437+
auto column = std::make_shared<ColumnArrayType>(nested_column->CloneEmpty()->template As<typename TestFixture::ColumnType>());
438+
for (size_t i = 0; i < values.size(); ++i)
439+
{
440+
const std::vector<std::decay_t<decltype(values[0])>> row{values.begin(), values.begin() + i};
441+
column->Append(values.begin(), values.begin() + i);
442+
443+
EXPECT_TRUE(CompareRecursive(row, (*column)[column->Size() - 1]));
378444
}
445+
EXPECT_EQ(values.size(), column->Size());
379446

380-
auto result_typed = WrapColumn<NullableType>(RoundtripColumnValues(client, column));
381-
EXPECT_TRUE(CompareRecursive(*column, *result_typed));
447+
this->TestColumnRoundtrip(column, LocalHostEndpoint, AllCompressionMethods);
382448
}
449+

ut/columns_ut.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@ TEST(ColumnsCase, FixedString_Append_LargeString) {
105105
}
106106

107107
TEST(ColumnsCase, StringInit) {
108-
auto col = std::make_shared<ColumnString>(MakeStrings());
108+
auto values = MakeStrings();
109+
auto col = std::make_shared<ColumnString>(values);
109110

110-
ASSERT_EQ(col->Size(), 4u);
111+
ASSERT_EQ(col->Size(), values.size());
111112
ASSERT_EQ(col->At(1), "ab");
112113
ASSERT_EQ(col->At(3), "abcd");
113114
}

ut/roundtrip_column.cpp

+26-5
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,49 @@
44
#include <clickhouse/block.h>
55

66
#include <gtest/gtest.h>
7+
#include <type_traits>
8+
#include "clickhouse/columns/numeric.h"
79

810
namespace {
911
using namespace clickhouse;
12+
13+
template <typename T>
14+
std::vector<T> GenerateConsecutiveNumbers(size_t count, T start = 0)
15+
{
16+
std::vector<T> result;
17+
result.reserve(count);
18+
19+
T value = start;
20+
for (size_t i = 0; i < count; ++i, ++value)
21+
{
22+
result.push_back(value);
23+
}
24+
25+
return result;
26+
}
27+
1028
}
1129

30+
1231
ColumnRef RoundtripColumnValues(Client& client, ColumnRef expected) {
13-
// Create a temporary table with a single column
14-
// insert values from `expected`
15-
// select and aggregate all values from block into `result` column
32+
// Create a temporary table with a corresponding data column
33+
// INSERT values from `expected`
34+
// SELECT and collect all values from block into `result` column
1635
auto result = expected->CloneEmpty();
1736

1837
const std::string type_name = result->GetType().GetName();
1938
client.Execute("DROP TEMPORARY TABLE IF EXISTS temporary_roundtrip_table;");
20-
client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (col " + type_name + ");");
39+
// id column is to have the same order of rows on SELECT
40+
client.Execute("CREATE TEMPORARY TABLE IF NOT EXISTS temporary_roundtrip_table (id UInt32, col " + type_name + ");");
2141
{
2242
Block block;
2343
block.AppendColumn("col", expected);
44+
block.AppendColumn("id", std::make_shared<ColumnUInt32>(GenerateConsecutiveNumbers<uint32_t>(expected->Size())));
2445
block.RefreshRowCount();
2546
client.Insert("temporary_roundtrip_table", block);
2647
}
2748

28-
client.Select("SELECT col FROM temporary_roundtrip_table", [&result](const Block& b) {
49+
client.Select("SELECT col FROM temporary_roundtrip_table ORDER BY id", [&result](const Block& b) {
2950
if (b.GetRowCount() == 0)
3051
return;
3152

ut/utils.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,10 @@ std::ostream& operator<<(std::ostream & ostr, const PrintContainer<T>& print_con
167167
for (auto i = std::begin(container); i != std::end(container); /*intentionally no ++i*/) {
168168
const auto & elem = *i;
169169

170-
if constexpr (is_container_v<std::decay_t<decltype(elem)>>) {
170+
if constexpr (is_string_v<decltype(elem)>) {
171+
ostr << '"' << elem << '"';
172+
}
173+
else if constexpr (is_container_v<std::decay_t<decltype(elem)>>) {
171174
ostr << PrintContainer{elem};
172175
} else {
173176
ostr << elem;

0 commit comments

Comments
 (0)