-
Notifications
You must be signed in to change notification settings - Fork 155
Implement merge update for timeseries matching on the index #2781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: vasil.pashov/feature/merge
Are you sure you want to change the base?
Changes from all commits
167cb2f
580cf14
abe52e3
59af346
97549b6
dfda400
0473e65
3490e74
b613264
769e271
83a8065
c57fdc0
27cbe29
5f94216
12e2c28
658e2dc
056b8d8
c28903b
6c9438a
809d961
6615c97
556fdce
7c861b2
fca60b9
38ac5b5
ab71903
9b80b7a
0e1921c
8691d88
3d02c50
40610a6
b1052de
25eea67
dfc8496
437fee0
0687e03
29a28e5
c88363d
967b70c
51f8854
07580e9
546ccb7
336bae9
c5f0516
ba437a8
8431069
051af2c
686d8d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| /* Copyright 2025 Man Group Operations Limited | ||
| * | ||
| * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. | ||
| * | ||
| * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software | ||
| * will be governed by the Apache License, version 2.0. | ||
| */ | ||
|
|
||
| #include <arcticdb/column_store/segment_utils.hpp> | ||
| #include <arcticdb/column_store/column.hpp> | ||
| #include <arcticdb/util/configs_map.hpp> | ||
| #include <arcticdb/column_store/column_algorithms.hpp> | ||
|
|
||
| namespace arcticdb { | ||
|
|
||
| ankerl::unordered_dense::set<entity::position_t> unique_values_for_string_column(const Column& column) { | ||
| ankerl::unordered_dense::set<entity::position_t> output_set; | ||
| // Guessing that unique values is a third of the column length | ||
| // TODO would be useful to have actual unique count here from stats | ||
| static auto map_reserve_ratio = ConfigsMap::instance()->get_int("UniqueColumns.AllocationRatio", 3); | ||
| output_set.reserve(column.row_count() / map_reserve_ratio); | ||
|
|
||
| details::visit_type(column.type().data_type(), [&](auto col_desc_tag) { | ||
| using type_info = ScalarTypeInfo<decltype(col_desc_tag)>; | ||
| if constexpr (is_sequence_type(type_info::data_type)) { | ||
| arcticdb::for_each<typename type_info::TDT>(column, [&output_set](auto value) { | ||
| output_set.emplace(value); | ||
| }); | ||
| } else { | ||
| util::raise_rte("Column {} is not a string type column"); | ||
| } | ||
| }); | ||
| return output_set; | ||
| } | ||
|
Comment on lines
+16
to
+34
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was moved from |
||
|
|
||
| std::vector<StreamDescriptor> split_descriptor(const StreamDescriptor& descriptor, const size_t cols_per_segment) { | ||
| if (descriptor.fields().size() <= cols_per_segment) { | ||
| return std::vector{descriptor}; | ||
| } | ||
| const size_t num_segments = descriptor.fields().size() / cols_per_segment; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we round up? E.g. with 3 fields and 2 cols per segment we'll end up dropping the last field? |
||
| std::vector<StreamDescriptor> res; | ||
| res.reserve(num_segments); | ||
|
|
||
| const unsigned field_count = descriptor.field_count(); | ||
| for (size_t i = 0, source_field = descriptor.index().field_count(); i < num_segments; ++i) { | ||
| StreamDescriptor partial(descriptor.id()); | ||
| if (descriptor.index().field_count() > 0) { | ||
| partial.set_index(descriptor.index()); | ||
| for (unsigned index_field = 0; index_field < descriptor.index().field_count(); ++index_field) { | ||
| partial.add_field(descriptor.field(index_field)); | ||
| } | ||
| } | ||
| for (size_t field = 0; field < cols_per_segment && source_field < field_count; ++field) { | ||
| partial.add_field(descriptor.field(source_field++)); | ||
| } | ||
| res.push_back(std::move(partial)); | ||
| } | ||
| return res; | ||
| } | ||
|
|
||
| } // namespace arcticdb | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we convert the
int8_ttointwhich is probablyint32_t?