Skip to content

Commit 4a186a8

Browse files
authored
Merge pull request #249 from 1261385937/performance_improve
ColumnString improve performance(26%) by avoiding vector reallocate
2 parents 51c62ce + 4f463cb commit 4a186a8

File tree

2 files changed

+19
-22
lines changed

2 files changed

+19
-22
lines changed

clickhouse/columns/string.cpp

+18-22
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) {
3737
+ std::to_string(str.size()) + " bytes.");
3838
}
3939

40-
if (data_.capacity() - data_.size() < str.size())
41-
{
40+
if (data_.capacity() - data_.size() < str.size()) {
4241
// round up to the next block size
4342
const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE;
4443
data_.reserve(new_size);
@@ -129,13 +128,11 @@ struct ColumnString::Block
129128
data_(new CharT[capacity])
130129
{}
131130

132-
inline auto GetAvailable() const
133-
{
131+
inline auto GetAvailable() const {
134132
return capacity - size;
135133
}
136134

137-
std::string_view AppendUnsafe(std::string_view str)
138-
{
135+
std::string_view AppendUnsafe(std::string_view str) {
139136
const auto pos = &data_[size];
140137

141138
memcpy(pos, str.data(), str.size());
@@ -144,13 +141,11 @@ struct ColumnString::Block
144141
return std::string_view(pos, str.size());
145142
}
146143

147-
auto GetCurrentWritePos()
148-
{
144+
auto GetCurrentWritePos() {
149145
return &data_[size];
150146
}
151147

152-
std::string_view ConsumeTailAsStringViewUnsafe(size_t len)
153-
{
148+
std::string_view ConsumeTailAsStringViewUnsafe(size_t len) {
154149
const auto start = &data_[size];
155150
size += len;
156151
return std::string_view(start, len);
@@ -166,14 +161,21 @@ ColumnString::ColumnString()
166161
{
167162
}
168163

164+
ColumnString::ColumnString(size_t element_count)
165+
: Column(Type::CreateString())
166+
{
167+
items_.reserve(element_count);
168+
// 100 is arbitrary number, assumption that string values are about ~40 bytes long.
169+
blocks_.reserve(std::max<size_t>(1, element_count / 100));
170+
}
171+
169172
ColumnString::ColumnString(const std::vector<std::string>& data)
170173
: ColumnString()
171174
{
172175
items_.reserve(data.size());
173176
blocks_.emplace_back(ComputeTotalSize(data));
174177

175-
for (const auto & s : data)
176-
{
178+
for (const auto & s : data) {
177179
AppendUnsafe(s);
178180
}
179181
};
@@ -194,21 +196,15 @@ ColumnString::~ColumnString()
194196
{}
195197

196198
void ColumnString::Append(std::string_view str) {
197-
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length())
198-
{
199+
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
199200
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
200201
}
201202

202203
items_.emplace_back(blocks_.back().AppendUnsafe(str));
203204
}
204205

205206
void ColumnString::Append(const char* str) {
206-
auto len = strlen(str);
207-
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) {
208-
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len));
209-
}
210-
211-
items_.emplace_back(blocks_.back().AppendUnsafe(str));
207+
Append(std::string_view(str, strlen(str)));
212208
}
213209

214210
void ColumnString::Append(std::string&& steal_value) {
@@ -295,10 +291,10 @@ ColumnRef ColumnString::Slice(size_t begin, size_t len) const {
295291

296292
if (begin < items_.size()) {
297293
len = std::min(len, items_.size() - begin);
294+
result->items_.reserve(len);
298295

299296
result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
300-
for (size_t i = begin; i < begin + len; ++i)
301-
{
297+
for (size_t i = begin; i < begin + len; ++i) {
302298
result->Append(items_[i]);
303299
}
304300
}

clickhouse/columns/string.h

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ class ColumnString : public Column {
7878
ColumnString();
7979
~ColumnString();
8080

81+
explicit ColumnString(size_t element_count);
8182
explicit ColumnString(const std::vector<std::string> & data);
8283
explicit ColumnString(std::vector<std::string>&& data);
8384
ColumnString& operator=(const ColumnString&) = delete;

0 commit comments

Comments
 (0)