Skip to content

Commit 4f463cb

Browse files
authored
More conservative reserve-ing of vectors + other
- `ColumnString` c-tor: assuming that there are about ~100 rows in each `ColumnString::Block`, rather than 2. - `ColumnString::Slice`: only reserving for exact number of elements in `items_`. - `ColumnString::Append` less code duplication - minor style fixes
1 parent b18be40 commit 4f463cb

File tree

1 file changed

+13
-24
lines changed

1 file changed

+13
-24
lines changed

clickhouse/columns/string.cpp

+13-24
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) {
3737
+ std::to_string(str.size()) + " bytes.");
3838
}
3939

40-
if (data_.capacity() - data_.size() < str.size())
41-
{
40+
if (data_.capacity() - data_.size() < str.size()) {
4241
// round up to the next block size
4342
const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE;
4443
data_.reserve(new_size);
@@ -129,13 +128,11 @@ struct ColumnString::Block
129128
data_(new CharT[capacity])
130129
{}
131130

132-
inline auto GetAvailable() const
133-
{
131+
inline auto GetAvailable() const {
134132
return capacity - size;
135133
}
136134

137-
std::string_view AppendUnsafe(std::string_view str)
138-
{
135+
std::string_view AppendUnsafe(std::string_view str) {
139136
const auto pos = &data_[size];
140137

141138
memcpy(pos, str.data(), str.size());
@@ -144,13 +141,11 @@ struct ColumnString::Block
144141
return std::string_view(pos, str.size());
145142
}
146143

147-
auto GetCurrentWritePos()
148-
{
144+
auto GetCurrentWritePos() {
149145
return &data_[size];
150146
}
151147

152-
std::string_view ConsumeTailAsStringViewUnsafe(size_t len)
153-
{
148+
std::string_view ConsumeTailAsStringViewUnsafe(size_t len) {
154149
const auto start = &data_[size];
155150
size += len;
156151
return std::string_view(start, len);
@@ -170,7 +165,8 @@ ColumnString::ColumnString(size_t element_count)
170165
: Column(Type::CreateString())
171166
{
172167
items_.reserve(element_count);
173-
blocks_.reserve(element_count / 2);
168+
// 100 is arbitrary number, assumption that string values are about ~40 bytes long.
169+
blocks_.reserve(std::max<size_t>(1, element_count / 100));
174170
}
175171

176172
ColumnString::ColumnString(const std::vector<std::string>& data)
@@ -179,8 +175,7 @@ ColumnString::ColumnString(const std::vector<std::string>& data)
179175
items_.reserve(data.size());
180176
blocks_.emplace_back(ComputeTotalSize(data));
181177

182-
for (const auto & s : data)
183-
{
178+
for (const auto & s : data) {
184179
AppendUnsafe(s);
185180
}
186181
};
@@ -201,21 +196,15 @@ ColumnString::~ColumnString()
201196
{}
202197

203198
void ColumnString::Append(std::string_view str) {
204-
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length())
205-
{
199+
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
206200
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
207201
}
208202

209203
items_.emplace_back(blocks_.back().AppendUnsafe(str));
210204
}
211205

212206
void ColumnString::Append(const char* str) {
213-
auto len = strlen(str);
214-
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) {
215-
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len));
216-
}
217-
218-
items_.emplace_back(blocks_.back().AppendUnsafe(str));
207+
Append(std::string_view(str, strlen(str)));
219208
}
220209

221210
void ColumnString::Append(std::string&& steal_value) {
@@ -298,14 +287,14 @@ size_t ColumnString::Size() const {
298287
}
299288

300289
ColumnRef ColumnString::Slice(size_t begin, size_t len) const {
301-
auto result = std::make_shared<ColumnString>(len);
290+
auto result = std::make_shared<ColumnString>();
302291

303292
if (begin < items_.size()) {
304293
len = std::min(len, items_.size() - begin);
294+
result->items_.reserve(len);
305295

306296
result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
307-
for (size_t i = begin; i < begin + len; ++i)
308-
{
297+
for (size_t i = begin; i < begin + len; ++i) {
309298
result->Append(items_[i]);
310299
}
311300
}

0 commit comments

Comments
 (0)