Skip to content

Commit f1f32f1

Browse files
authored
Merge pull request #156 from Enmk/performance_Type_IsEqual
Better performance for Type::IsEqual()
2 parents bf2ad17 + 8e3970c commit f1f32f1

File tree

3 files changed

+128
-2
lines changed

3 files changed

+128
-2
lines changed

clickhouse/types/types.cpp

+62-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#include "types.h"
22

3+
#include <cityhash/city.h>
4+
35
#include <stdexcept>
46

57
namespace clickhouse {
68

7-
Type::Type(const Code code) : code_(code) {}
9+
Type::Type(const Code code)
10+
: code_(code)
11+
, type_unique_id_(0)
12+
{}
813

914
std::string Type::GetName() const {
1015
switch (code_) {
@@ -72,6 +77,62 @@ std::string Type::GetName() const {
7277
return std::string();
7378
}
7479

80+
uint64_t Type::GetTypeUniqueId() const {
81+
// Helper method to optimize equality checks of types with Type::IsEqual(),
82+
// base invariant: types with same names produce same unique id (and hence considered equal).
83+
// As an optimization, full type name is constructed at most once, and only for complex types.
84+
switch (code_) {
85+
case Void:
86+
case Int8:
87+
case Int16:
88+
case Int32:
89+
case Int64:
90+
case Int128:
91+
case UInt8:
92+
case UInt16:
93+
case UInt32:
94+
case UInt64:
95+
case UUID:
96+
case Float32:
97+
case Float64:
98+
case String:
99+
case IPv4:
100+
case IPv6:
101+
case Date:
102+
// For simple types, unique ID is the same as Type::Code
103+
return code_;
104+
105+
case FixedString:
106+
case DateTime:
107+
case DateTime64:
108+
case Array:
109+
case Nullable:
110+
case Tuple:
111+
case Enum8:
112+
case Enum16:
113+
case Decimal:
114+
case Decimal32:
115+
case Decimal64:
116+
case Decimal128:
117+
case LowCardinality: {
118+
// For complex types, exact unique ID depends on nested types and/or parameters,
119+
// the easiest way is to lazy-compute unique ID from name once.
120+
// Here we do not care if multiple threads are computing value simultaneosly since it is both:
121+
// 1. going to be the same
122+
// 2. going to be stored atomically
123+
124+
if (type_unique_id_ == 0) {
125+
const auto name = GetName();
126+
type_unique_id_ = CityHash64WithSeed(name.c_str(), name.size(), code_);
127+
}
128+
129+
return type_unique_id_;
130+
}
131+
}
132+
assert(false);
133+
return 0;
134+
}
135+
75136
TypeRef Type::CreateArray(TypeRef item_type) {
76137
return TypeRef(new ArrayType(item_type));
77138
}

clickhouse/types/types.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "absl/numeric/int128.h"
44

5+
#include <atomic>
56
#include <map>
67
#include <memory>
78
#include <string>
@@ -73,7 +74,12 @@ class Type {
7374
std::string GetName() const;
7475

7576
/// Is given type same as current one.
76-
bool IsEqual(const Type& other) const { return this->GetName() == other.GetName(); }
77+
bool IsEqual(const Type& other) const {
78+
return this == &other
79+
// GetTypeUniqueId() is relatively heavy, so avoid calling it when comparing obviously different types.
80+
|| (this->GetCode() == other.GetCode() && this->GetTypeUniqueId() == other.GetTypeUniqueId());
81+
}
82+
7783
bool IsEqual(const TypeRef& other) const { return IsEqual(*other); }
7884

7985
public:
@@ -113,7 +119,10 @@ class Type {
113119
static TypeRef CreateLowCardinality(TypeRef item_type);
114120

115121
private:
122+
uint64_t GetTypeUniqueId() const;
123+
116124
const Code code_;
125+
mutable std::atomic<uint64_t> type_unique_id_;
117126
};
118127

119128
inline bool operator==(const Type & left, const Type & right) {

ut/types_ut.cpp

+56
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include <clickhouse/types/types.h>
2+
#include <clickhouse/columns/factory.h>
3+
24
#include <gtest/gtest.h>
35

46
using namespace clickhouse;
@@ -70,3 +72,57 @@ TEST(TypesCase, EnumTypesEmpty) {
7072
TEST(TypesCase, DecimalTypes) {
7173
// TODO: implement this test.
7274
}
75+
76+
TEST(TypesCase, IsEqual) {
77+
const std::string type_names[] = {
78+
"UInt8",
79+
"Int8",
80+
"UInt128",
81+
"String",
82+
"FixedString(0)",
83+
"FixedString(10000)",
84+
"DateTime('UTC')",
85+
"DateTime64(3, 'UTC')",
86+
"Decimal(9,3)",
87+
"Decimal(18,3)",
88+
"Enum8()",
89+
"Enum16()",
90+
"Enum8('ONE' = 1)",
91+
"Enum8('ONE' = 1, 'TWO' = 2)",
92+
"Enum16('ONE' = 1, 'TWO' = 2, 'THREE' = 3, 'FOUR' = 4)",
93+
"Nullable(FixedString(10000))",
94+
"Nullable(LowCardinality(FixedString(10000)))",
95+
"Array(Int8)",
96+
"Array(UInt8)",
97+
"Array(String)",
98+
"Array(Nullable(LowCardinality(FixedString(10000))))",
99+
"Array(Enum8('ONE' = 1, 'TWO' = 2))"
100+
"Tuple(String, Int8, Date, DateTime)",
101+
"Nullable(Tuple(String, Int8, Date, DateTime))",
102+
"Array(Nullable(Tuple(String, Int8, Date, DateTime)))",
103+
"Array(Array(Nullable(Tuple(String, Int8, Date, DateTime))))",
104+
"Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime)))))",
105+
"Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC')))))))"
106+
"Array(Array(Array(Array(Nullable(Tuple(String, Int8, Date, DateTime('UTC'), Tuple(LowCardinality(String), Enum8('READ'=1, 'WRITE'=0))))))))",
107+
};
108+
109+
// Check that Type::IsEqual returns true only if:
110+
// - same Type instance
111+
// - same Type layout (matching outer type with all nested types and/or parameters)
112+
for (const auto & type_name : type_names) {
113+
SCOPED_TRACE(type_name);
114+
const auto type = clickhouse::CreateColumnByType(type_name)->Type();
115+
116+
// Should be equal to itself
117+
EXPECT_TRUE(type->IsEqual(type));
118+
EXPECT_TRUE(type->IsEqual(*type));
119+
120+
for (const auto & other_type_name : type_names) {
121+
const auto other_type = clickhouse::CreateColumnByType(other_type_name)->Type();
122+
123+
const auto should_be_equal = type_name == other_type_name;
124+
EXPECT_EQ(should_be_equal, type->IsEqual(other_type))
125+
<< "For types: " << type_name << " and " << other_type_name;
126+
}
127+
}
128+
}

0 commit comments

Comments
 (0)