Skip to content

Commit 96dfe25

Browse files
committed
First pass to remove dependency on Python in arcticdb_core
1 parent 7c45ec4 commit 96dfe25

17 files changed

+232
-280
lines changed

cpp/arcticdb/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ set(arcticdb_srcs
194194
column_store/column_data.hpp
195195
column_store/column_data_random_accessor.hpp
196196
column_store/column.hpp
197-
column_store/column_utils.hpp
198197
column_store/key_segment.hpp
199198
column_store/memory_segment.hpp
200199
column_store/memory_segment_impl.hpp
@@ -338,6 +337,7 @@ set(arcticdb_srcs
338337
util/buffer.hpp
339338
util/buffer_pool.hpp
340339
util/clock.hpp
340+
util/concepts.hpp
341341
util/configs_map.hpp
342342
util/constants.hpp
343343
util/constructors.hpp

cpp/arcticdb/column_store/column.hpp

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <arcticdb/entity/performance_tracing.hpp>
1616
#include <arcticdb/entity/types.hpp>
1717
#include <arcticdb/util/bitset.hpp>
18+
#include <arcticdb/util/concepts.hpp>
1819
#include <arcticdb/util/cursored_buffer.hpp>
1920
#include <arcticdb/util/flatten_utils.hpp>
2021
#include <arcticdb/util/preconditions.hpp>
@@ -26,23 +27,14 @@
2627
#include <cstdio>
2728
#endif
2829
#include <folly/Function.h>
29-
#include <pybind11/pybind11.h>
30-
#include <pybind11/numpy.h>
3130

3231
#include <concepts>
3332
#include <numeric>
3433
#include <optional>
3534

36-
namespace py = pybind11;
37-
3835
namespace arcticdb {
3936

40-
// this is needed to make templates of templates work
41-
// since py::array_t has more than one template parameter
42-
// (the rest are defaulted)
43-
template< class T>
44-
using py_array_t = py::array_t<T>;
45-
37+
using util::arithmetic_tensor;
4638
using namespace arcticdb::entity;
4739

4840
// N.B. this will not catch all the things that C++ considers to be narrowing conversions, because
@@ -383,10 +375,9 @@ class Column {
383375
return std::move(shapes_.buffer());
384376
}
385377

386-
template<class T, template<class> class Tensor, std::enable_if_t<
387-
std::is_integral_v<T> || std::is_floating_point_v<T>,
388-
int> = 0>
389-
void set_array(ssize_t row_offset, Tensor<T> &val) {
378+
template<arithmetic_tensor TensorType>
379+
void set_array(ssize_t row_offset, TensorType& val) {
380+
using value_type = typename TensorType::value_type;
390381
ARCTICDB_SAMPLE(ColumnSetArray, RMTSF_Aggregate)
391382
magic_.check();
392383
util::check_arg(last_logical_row_ + 1 == row_offset, "set_array expected row {}, actual {} ", last_logical_row_ + 1, row_offset);
@@ -395,26 +386,8 @@ class Column {
395386
memcpy(shapes_.cursor(), val.shape(), val.ndim() * sizeof(shape_t));
396387
auto info = val.request();
397388
util::FlattenHelper flatten(val);
398-
auto data_ptr = reinterpret_cast<T*>(data_.cursor());
399-
flatten.flatten(data_ptr, reinterpret_cast<const T *>(info.ptr));
400-
update_offsets(val.nbytes());
401-
data_.commit();
402-
shapes_.commit();
403-
++last_logical_row_;
404-
}
405-
406-
template<class T, std::enable_if_t< std::is_integral_v<T> || std::is_floating_point_v<T>, int> = 0>
407-
void set_array(ssize_t row_offset, py::array_t<T>& val) {
408-
ARCTICDB_SAMPLE(ColumnSetArray, RMTSF_Aggregate)
409-
magic_.check();
410-
util::check_arg(last_logical_row_ + 1 == row_offset, "set_array expected row {}, actual {} ", last_logical_row_ + 1, row_offset);
411-
data_.ensure_bytes(val.nbytes());
412-
shapes_.ensure<shape_t>(val.ndim());
413-
memcpy(shapes_.cursor(), val.shape(), val.ndim() * sizeof(shape_t));
414-
auto info = val.request();
415-
util::FlattenHelper<T, py_array_t> flatten(val);
416-
auto data_ptr = reinterpret_cast<T*>(data_.cursor());
417-
flatten.flatten(data_ptr, reinterpret_cast<const T*>(info.ptr));
389+
auto data_ptr = reinterpret_cast<value_type*>(data_.cursor());
390+
flatten.flatten(data_ptr, reinterpret_cast<const value_type*>(info.ptr));
418391
update_offsets(val.nbytes());
419392
data_.commit();
420393
shapes_.commit();

cpp/arcticdb/column_store/column_utils.hpp

Lines changed: 0 additions & 155 deletions
This file was deleted.

cpp/arcticdb/column_store/memory_segment.hpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,15 +126,8 @@ class SegmentInMemory {
126126
impl_->init_column_map();
127127
}
128128

129-
template<class T, template<class> class Tensor>
130-
requires std::integral<T> || std::floating_point<T>
131-
void set_array(position_t pos, Tensor<T> &val) {
132-
impl_->set_array(pos, val);
133-
}
134-
135-
template<class T>
136-
requires std::integral<T> || std::floating_point<T>
137-
void set_array(position_t pos, py::array_t<T>& val) {
129+
template<arithmetic_tensor TensorType>
130+
void set_array(position_t pos, TensorType &val) {
138131
impl_->set_array(pos, val);
139132
}
140133

cpp/arcticdb/column_store/memory_segment_impl.hpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <arcticdb/entity/types.hpp>
1111
#include <arcticdb/column_store/column.hpp>
12+
#include <arcticdb/util/concepts.hpp>
1213
#include <arcticdb/util/offset_string.hpp>
1314
#include <arcticdb/util/preconditions.hpp>
1415

@@ -506,17 +507,8 @@ class SegmentInMemoryImpl {
506507
set_string(idx, val);
507508
}
508509

509-
template<class T, template<class> class Tensor>
510-
requires std::integral<T> || std::floating_point<T>
511-
void set_array(position_t pos, Tensor<T> &val) {
512-
magic_.check();
513-
ARCTICDB_SAMPLE(MemorySegmentSetArray, 0)
514-
column_unchecked(pos).set_array(row_id_ + 1, val);
515-
}
516-
517-
template<class T>
518-
requires std::integral<T> || std::floating_point<T>
519-
void set_array(position_t pos, py::array_t<T>& val) {
510+
template<arithmetic_tensor TensorType>
511+
void set_array(position_t pos, TensorType &val) {
520512
magic_.check();
521513
ARCTICDB_SAMPLE(MemorySegmentSetArray, 0)
522514
column_unchecked(pos).set_array(row_id_ + 1, val);

cpp/arcticdb/column_store/python_bindings.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,15 @@ void register_column_store(py::module &m) {
2828
py::class_<StringPool>(m, "StringPool")
2929
.def(py::init())
3030
.def_property_readonly("nbytes", &StringPool::size)
31-
.def("as_buffer_info", &StringPool::as_buffer_info);
31+
.def("as_buffer_info", [](const StringPool& s) {
32+
return py::buffer_info{
33+
(void *) s.get_const_view(0).data(),
34+
1,
35+
py::format_descriptor<char>::format(),
36+
ssize_t(s.get_const_view(0).size())
37+
38+
};
39+
});
3240
}
3341

3442
} // namespace arcticc::column_store

cpp/arcticdb/column_store/string_pool.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
#include <arcticdb/column_store/segment_utils.hpp>
1111
#include <ankerl/unordered_dense.h>
1212

13-
#include <pybind11/pybind11.h>
14-
1513
namespace arcticdb {
1614

1715
/*****************
@@ -199,15 +197,6 @@ size_t StringPool::size() const {
199197
return block_.size();
200198
}
201199

202-
py::buffer_info StringPool::as_buffer_info() const {
203-
return py::buffer_info{
204-
(void *) block_.at(0).data(),
205-
1,
206-
py::format_descriptor<char>::format(),
207-
ssize_t(block_.at(0).size())
208-
};
209-
}
210-
211200
std::optional<position_t> StringPool::get_offset_for_column(std::string_view string, const Column& column) {
212201
auto unique_values = unique_values_for_string_column(column);
213202
remove_nones_and_nans(unique_values);

cpp/arcticdb/column_store/string_pool.hpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@
1818
#include <arcticdb/column_store/chunked_buffer.hpp>
1919
#include <arcticdb/column_store/column_data.hpp>
2020

21-
namespace pybind11 {
22-
struct buffer_info;
23-
}
24-
25-
namespace py = pybind11;
26-
2721
#include <ankerl/unordered_dense.h>
2822

2923
namespace arcticdb {
@@ -172,8 +166,6 @@ class StringPool {
172166

173167
[[nodiscard]] size_t num_blocks() const;
174168

175-
py::buffer_info as_buffer_info() const;
176-
177169
std::optional<position_t> get_offset_for_column(std::string_view str, const Column& column);
178170
ankerl::unordered_dense::set<position_t> get_offsets_for_column(const std::shared_ptr<std::unordered_set<std::string>>& strings, const Column& column);
179171
private:

cpp/arcticdb/entity/native_tensor.hpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
// for std::accumulate
1515
#include <numeric>
1616

17-
#include <pybind11/numpy.h>
18-
19-
namespace py = pybind11;
20-
2117
namespace arcticdb::entity {
2218

2319
inline ssize_t calc_elements(const shape_t* shape, ssize_t ndim) {
@@ -166,6 +162,7 @@ ssize_t byte_offset_impl(const stride_t* strides, ssize_t i, Ix... index) {
166162
//TODO is the conversion to a typed tensor really necessary for the codec part?
167163
template<typename T>
168164
struct TypedTensor : public NativeTensor {
165+
using value_type = T;
169166
static size_t itemsize() { return sizeof(T); }
170167

171168
std::array<stride_t, 2> f_style_strides() {
@@ -255,12 +252,8 @@ struct TypedTensor : public NativeTensor {
255252
}
256253
}
257254
};
258-
template<typename T>
259-
py::array to_py_array(const TypedTensor<T>& tensor) {
260-
return py::array({tensor.shape(), tensor.shape() + tensor.ndim()}, reinterpret_cast<const T*>(tensor.data()));
261-
}
262255

263256
template<typename T>
264257
using TensorType = TypedTensor<T>;
265258

266-
}//namespace arcticdb
259+
}//namespace arcticdb

0 commit comments

Comments
 (0)