Skip to content

Commit e6863bb

Browse files
committed
Improve: hybrid bench sort performance
1 parent 455508f commit e6863bb

File tree

1 file changed

+60
-4
lines changed

1 file changed

+60
-4
lines changed

scripts/bench_sort.cpp

+60-4
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,37 @@ static idx_t hybrid_sort_cpp(strings_t const &strings, sz_u64_t *order) {
8585
return *(uint32_t *)(i_bytes + offset_in_word) < *(uint32_t *)(j_bytes + offset_in_word);
8686
});
8787

88-
for (size_t i = 0; i != strings.size(); ++i) std::memset((char *)&order[i] + offset_in_word, 0, 4ul);
88+
const auto extract_bytes = [](sz_u64_t v) -> uint32_t {
89+
char *bytes = (char *)&v;
90+
return *(uint32_t *)(bytes + offset_in_word);
91+
};
92+
93+
if (strings.size() >= 2) {
94+
size_t prev_index = 0;
95+
uint64_t prev_bytes = extract_bytes(order[0]);
96+
97+
for (size_t i = 1; i < strings.size(); ++i) {
98+
uint32_t bytes = extract_bytes(order[i]);
99+
if (bytes != prev_bytes) {
100+
std::sort(order + prev_index, order + i, [&](sz_u64_t i, sz_u64_t j) {
101+
// Assumes: offset_in_word==4
102+
sz_size_t i_index = i & 0xFFFF'FFFF;
103+
sz_size_t j_index = j & 0xFFFF'FFFF;
104+
return strings[i_index] < strings[j_index];
105+
});
106+
prev_index = i;
107+
prev_bytes = bytes;
108+
}
109+
}
110+
111+
std::sort(order + prev_index, order + strings.size(), [&](sz_u64_t i, sz_u64_t j) {
112+
sz_size_t i_index = i & 0xFFFF'FFFF;
113+
sz_size_t j_index = j & 0xFFFF'FFFF;
114+
return strings[i_index] < strings[j_index];
115+
});
116+
}
89117

90-
std::sort(order, order + strings.size(), [&](sz_u64_t i, sz_u64_t j) { return strings[i] < strings[j]; });
118+
for (size_t i = 0; i != strings.size(); ++i) std::memset((char *)&order[i] + offset_in_word, 0, 4ul);
91119

92120
return strings.size();
93121
}
@@ -109,9 +137,37 @@ static idx_t hybrid_stable_sort_cpp(strings_t const &strings, sz_u64_t *order) {
109137
return *(uint32_t *)(i_bytes + offset_in_word) < *(uint32_t *)(j_bytes + offset_in_word);
110138
});
111139

112-
for (size_t i = 0; i != strings.size(); ++i) std::memset((char *)&order[i] + offset_in_word, 0, 4ul);
140+
const auto extract_bytes = [](sz_u64_t v) -> uint32_t {
141+
char *bytes = (char *)&v;
142+
return *(uint32_t *)(bytes + offset_in_word);
143+
};
144+
145+
if (strings.size() >= 2) {
146+
size_t prev_index = 0;
147+
uint64_t prev_bytes = extract_bytes(order[0]);
148+
149+
for (size_t i = 1; i < strings.size(); ++i) {
150+
uint32_t bytes = extract_bytes(order[i]);
151+
if (bytes != prev_bytes) {
152+
std::stable_sort(order + prev_index, order + i, [&](sz_u64_t i, sz_u64_t j) {
153+
// Assumes: offset_in_word==4
154+
sz_size_t i_index = i & 0xFFFF'FFFF;
155+
sz_size_t j_index = j & 0xFFFF'FFFF;
156+
return strings[i_index] < strings[j_index];
157+
});
158+
prev_index = i;
159+
prev_bytes = bytes;
160+
}
161+
}
162+
163+
std::stable_sort(order + prev_index, order + strings.size(), [&](sz_u64_t i, sz_u64_t j) {
164+
sz_size_t i_index = i & 0xFFFF'FFFF;
165+
sz_size_t j_index = j & 0xFFFF'FFFF;
166+
return strings[i_index] < strings[j_index];
167+
});
168+
}
113169

114-
std::stable_sort(order, order + strings.size(), [&](sz_u64_t i, sz_u64_t j) { return strings[i] < strings[j]; });
170+
for (size_t i = 0; i != strings.size(); ++i) std::memset((char *)&order[i] + offset_in_word, 0, 4ul);
115171

116172
return strings.size();
117173
}

0 commit comments

Comments
 (0)