Skip to content

Commit 3b2b940

Browse files
committed
Add ingestion of matrices
1 parent f344576 commit 3b2b940

File tree

6 files changed

+341
-22
lines changed

6 files changed

+341
-22
lines changed

src/include/typesr.hpp

+11
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ enum class RTypeId {
2121
LOGICAL,
2222
INTEGER,
2323
NUMERIC,
24+
COMPLEX,
2425
STRING,
2526
FACTOR,
2627
DATE,
@@ -43,12 +44,14 @@ enum class RTypeId {
4344
// No RType equivalent
4445
BYTE,
4546
LIST,
47+
MATRIX,
4648
STRUCT,
4749
};
4850

4951
struct RType {
5052
RType();
5153
RType(RTypeId id); // NOLINT: Allow implicit conversion from `RTypeId`
54+
RType(RTypeId id, R_len_t size); // NOLINT: Allow implicit conversion from `RTypeId`
5255
RType(const RType &other);
5356
RType(RType &&other) noexcept;
5457

@@ -57,12 +60,14 @@ struct RType {
5760
// copy assignment
5861
inline RType &operator=(const RType &other) {
5962
id_ = other.id_;
63+
size_ = other.size_;
6064
aux_ = other.aux_;
6165
return *this;
6266
}
6367
// move assignment
6468
inline RType &operator=(RType &&other) noexcept {
6569
id_ = other.id_;
70+
size_ = other.size_;
6671
std::swap(aux_, other.aux_);
6772
return *this;
6873
}
@@ -76,6 +81,7 @@ struct RType {
7681
static constexpr const RTypeId LOGICAL = RTypeId::LOGICAL;
7782
static constexpr const RTypeId INTEGER = RTypeId::INTEGER;
7883
static constexpr const RTypeId NUMERIC = RTypeId::NUMERIC;
84+
static constexpr const RTypeId COMPLEX = RTypeId::COMPLEX;
7985
static constexpr const RTypeId STRING = RTypeId::STRING;
8086
static constexpr const RTypeId DATE = RTypeId::DATE;
8187
static constexpr const RTypeId DATE_INTEGER = RTypeId::DATE_INTEGER;
@@ -105,8 +111,13 @@ struct RType {
105111
static RType STRUCT(child_list_t<RType> &&children);
106112
child_list_t<RType> GetStructChildTypes() const;
107113

114+
static RType MATRIX(const RType &child, R_len_t ncols);
115+
RType GetMatrixElementType() const;
116+
R_len_t GetMatrixNcols() const;
117+
108118
private:
109119
RTypeId id_;
120+
R_len_t size_;
110121
child_list_t<RType> aux_;
111122
};
112123

src/scan.cpp

+84
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using namespace duckdb;
88
using namespace cpp11;
99

10+
static
1011
data_ptr_t GetColDataPtr(const RType &rtype, SEXP coldata) {
1112
switch (rtype.id()) {
1213
case RType::LOGICAL:
@@ -51,6 +52,7 @@ data_ptr_t GetColDataPtr(const RType &rtype, SEXP coldata) {
5152
return (data_ptr_t)DATAPTR_RO(coldata);
5253
case RTypeId::LIST:
5354
return (data_ptr_t)DATAPTR_RO(coldata);
55+
case RTypeId::MATRIX:
5456
case RTypeId::STRUCT:
5557
// Will bind child columns dynamically. Could also optimize by descending early and recording.
5658
return (data_ptr_t)coldata;
@@ -83,6 +85,7 @@ static void AppendColumnSegment(SRC *source_data, idx_t sexp_offset, Vector &res
8385
}
8486
}
8587

88+
static
8689
void AppendListColumnSegment(const RType &rtype, SEXP *source_data, idx_t sexp_offset, Vector &result, idx_t count) {
8790
source_data += sexp_offset;
8891
auto &result_mask = FlatVector::Validity(result);
@@ -104,9 +107,84 @@ void AppendListColumnSegment(const RType &rtype, SEXP *source_data, idx_t sexp_o
104107
}
105108
}
106109

110+
template <class SRC, class DST, class RTYPE>
111+
static inline
112+
void AppendMatrixSegmentAtomic(SRC *src_ptr, int nrows, int ncols, idx_t sexp_offset,
113+
Vector &child_vector, idx_t count) {
114+
auto child_data = FlatVector::GetData<DST>(child_vector);
115+
auto &child_mask = FlatVector::Validity(child_vector);
116+
idx_t vector_idx = 0;
117+
for (idx_t i = 0; i < count; i++) {
118+
auto matrix_elt_idx = sexp_offset + i;
119+
for (idx_t k = 0; k < ncols; k++) {
120+
auto val = src_ptr[matrix_elt_idx];
121+
if (RTYPE::IsNull(val)) {
122+
child_mask.SetInvalid(vector_idx++);
123+
} else {
124+
child_data[vector_idx++] = RTYPE::Convert(val);
125+
}
126+
matrix_elt_idx += nrows;
127+
}
128+
}
129+
}
130+
131+
static
132+
void AppendMatrixColumnSegment(const RType &rtype, bool experimental, SEXP source_data, idx_t sexp_offset, Vector &result, idx_t count) {
133+
auto element_rtype = rtype.GetMatrixElementType();
134+
auto nrows = Rf_nrows(source_data);
135+
auto ncols = Rf_ncols(source_data);
136+
auto &child_vector = ArrayVector::GetEntry(result);
137+
138+
switch (element_rtype.id()) {
139+
case RType::LOGICAL: //LGLSXP
140+
AppendMatrixSegmentAtomic<int, bool, RBooleanType>(LOGICAL_POINTER(source_data),
141+
nrows, ncols, sexp_offset, child_vector, count);
142+
break;
143+
144+
case RType::INTEGER: //INTSXP
145+
AppendMatrixSegmentAtomic<int, int, RIntegerType>(INTEGER_POINTER(source_data),
146+
nrows, ncols, sexp_offset, child_vector, count);
147+
break;
148+
149+
case RType::INTEGER64: //REALSXP
150+
AppendMatrixSegmentAtomic<int64_t, int64_t, RInteger64Type>((int64_t *)NUMERIC_POINTER(source_data),
151+
nrows, ncols, sexp_offset, child_vector, count);
152+
break;
153+
154+
case RType::NUMERIC: //REALSXP
155+
AppendMatrixSegmentAtomic<double, double, RDoubleType>(NUMERIC_POINTER(source_data),
156+
nrows, ncols, sexp_offset, child_vector, count);
157+
break;
158+
159+
case RType::COMPLEX: //CPLXSXP
160+
cpp11::stop("Matrix with complex numbers are not supported.");
161+
break;
162+
163+
case RTypeId::BYTE: // RAWSXP
164+
cpp11::stop("Matrix of type raw is not supported.");
165+
break;
166+
167+
case RType::STRING: //STRSXP
168+
if (experimental) {
169+
D_ASSERT(result.GetType().id() == LogicalTypeId::POINTER);
170+
AppendMatrixSegmentAtomic<SEXP, uintptr_t, DedupPointerEnumType>((SEXP *)DATAPTR_RO(source_data),
171+
nrows, ncols, sexp_offset, child_vector, count);
172+
} else {
173+
AppendMatrixSegmentAtomic<SEXP, string_t, RStringSexpType>((SEXP *)DATAPTR_RO(source_data),
174+
nrows, ncols, sexp_offset, child_vector, count);
175+
}
176+
break;
177+
178+
default:
179+
cpp11::stop("AppendMatrixColumnSegment: Unsupported matrix type for scan");
180+
}
181+
}
182+
183+
static
107184
void AppendAnyColumnSegment(const RType &rtype, bool experimental, data_ptr_t coldata_ptr, idx_t sexp_offset, Vector &v,
108185
idx_t this_count);
109186

187+
static
110188
void AppendStructColumnSegment(const RType &rtype, bool experimental, SEXP source_data, idx_t sexp_offset,
111189
Vector &result, idx_t count) {
112190
// No NULL values for STRUCTs.
@@ -120,6 +198,7 @@ void AppendStructColumnSegment(const RType &rtype, bool experimental, SEXP sourc
120198
}
121199
}
122200

201+
static
123202
void AppendAnyColumnSegment(const RType &rtype, bool experimental, data_ptr_t coldata_ptr, idx_t sexp_offset, Vector &v,
124203
idx_t this_count) {
125204
switch (rtype.id()) {
@@ -253,6 +332,11 @@ void AppendAnyColumnSegment(const RType &rtype, bool experimental, data_ptr_t co
253332
AppendListColumnSegment(rtype, data_ptr, sexp_offset, v, this_count);
254333
break;
255334
}
335+
case RTypeId::MATRIX: {
336+
auto data_ptr = (SEXP)coldata_ptr;
337+
AppendMatrixColumnSegment(rtype, experimental, data_ptr, sexp_offset, v, this_count);
338+
break;
339+
}
256340
case RTypeId::STRUCT: {
257341
auto data_ptr = (SEXP)coldata_ptr;
258342
AppendStructColumnSegment(rtype, experimental, data_ptr, sexp_offset, v, this_count);

src/types.cpp

+46-4
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,24 @@ using namespace duckdb;
1212
RType::RType() : id_(RTypeId::UNKNOWN) {
1313
}
1414

15-
RType::RType(RTypeId id) : id_(id) {
15+
RType::RType(RTypeId id) : id_(id), size_(0) {
1616
}
1717

18-
RType::RType(const RType &other) : id_(other.id_), aux_(other.aux_) {
18+
RType::RType(RTypeId id, R_len_t size) : id_(id), size_(size) {
1919
}
2020

21-
RType::RType(RType &&other) noexcept : id_(other.id_), aux_(std::move(other.aux_)) {
21+
RType::RType(const RType &other) : id_(other.id_), size_(other.size_), aux_(other.aux_) {
22+
}
23+
24+
RType::RType(RType &&other) noexcept : id_(other.id_), size_(other.size_), aux_(std::move(other.aux_)) {
2225
}
2326

2427
RTypeId RType::id() const {
2528
return id_;
2629
}
2730

2831
bool RType::operator==(const RType &rhs) const {
29-
return id_ == rhs.id_ && aux_ == rhs.aux_;
32+
return id_ == rhs.id_ && size_ == rhs.size_ && aux_ == rhs.aux_;
3033
}
3134

3235
RType RType::FACTOR(cpp11::strings levels) {
@@ -74,6 +77,22 @@ RType RType::GetListChildType() const {
7477
return aux_.front().second;
7578
}
7679

80+
RType RType::MATRIX(const RType &child, R_len_t ncols) {
81+
RType out = RType(RTypeId::MATRIX, ncols);
82+
out.aux_.push_back(std::make_pair("", child));
83+
return out;
84+
}
85+
86+
RType RType::GetMatrixElementType() const {
87+
D_ASSERT(id_ == RTypeId::MATRIX);
88+
return aux_.front().second;
89+
}
90+
91+
R_len_t RType::GetMatrixNcols() const {
92+
D_ASSERT(id_ == RTypeId::MATRIX);
93+
return size_;
94+
}
95+
7796
RType RType::STRUCT(child_list_t<RType> &&children) {
7897
RType out = RType(RTypeId::STRUCT);
7998
std::swap(out.aux_, children);
@@ -132,6 +151,23 @@ RType RApiTypes::DetectRType(SEXP v, bool integer64) {
132151
}
133152
} else if (Rf_isFactor(v) && TYPEOF(v) == INTSXP) {
134153
return RType::FACTOR(GET_LEVELS(v));
154+
} else if (Rf_isMatrix(v)) {
155+
if (TYPEOF(v) == LGLSXP) {
156+
return RType::MATRIX(RType::LOGICAL, Rf_ncols(v));
157+
} else if (TYPEOF(v) == INTSXP) {
158+
return RType::MATRIX(RType::INTEGER, Rf_ncols(v));
159+
} else if (TYPEOF(v) == REALSXP) {
160+
if (integer64 && Rf_inherits(v, "integer64")) {
161+
return RType::MATRIX(RType::INTEGER64, Rf_ncols(v));
162+
}
163+
return RType::MATRIX(RType::NUMERIC, Rf_ncols(v));
164+
} else if (TYPEOF(v) == CPLXSXP) {
165+
return RType::MATRIX(RType::COMPLEX, Rf_ncols(v));
166+
} else if (TYPEOF(v) == STRSXP) {
167+
return RType::MATRIX(RType::STRING, Rf_ncols(v));
168+
} else {
169+
return RType::UNKNOWN;
170+
}
135171
} else if (TYPEOF(v) == LGLSXP) {
136172
return RType::LOGICAL;
137173
} else if (TYPEOF(v) == INTSXP) {
@@ -145,6 +181,8 @@ RType RApiTypes::DetectRType(SEXP v, bool integer64) {
145181
return RType::NUMERIC;
146182
} else if (TYPEOF(v) == STRSXP) {
147183
return RType::STRING;
184+
} else if (TYPEOF(v) == CPLXSXP) {
185+
return RType::COMPLEX;
148186
} else if (TYPEOF(v) == VECSXP) {
149187
if (Rf_inherits(v, "blob")) {
150188
return RType::BLOB;
@@ -211,6 +249,8 @@ LogicalType RApiTypes::LogicalTypeFromRType(const RType &rtype, bool experimenta
211249
return LogicalType::DOUBLE;
212250
case RType::INTEGER64:
213251
return LogicalType::BIGINT;
252+
case RType::COMPLEX:
253+
return LogicalType::ARRAY(LogicalType::DOUBLE, 2);
214254
case RTypeId::FACTOR: {
215255
auto duckdb_levels = rtype.GetFactorLevels();
216256
return LogicalType::ENUM(duckdb_levels, rtype.GetFactorLevelsCount());
@@ -244,6 +284,8 @@ LogicalType RApiTypes::LogicalTypeFromRType(const RType &rtype, bool experimenta
244284
return LogicalType::BLOB;
245285
case RTypeId::LIST:
246286
return LogicalType::LIST(RApiTypes::LogicalTypeFromRType(rtype.GetListChildType(), experimental));
287+
case RTypeId::MATRIX:
288+
return LogicalType::ARRAY(RApiTypes::LogicalTypeFromRType(rtype.GetMatrixElementType(), experimental), rtype.GetMatrixNcols());
247289
case RTypeId::STRUCT: {
248290
child_list_t<LogicalType> children;
249291
for (const auto &child : rtype.GetStructChildTypes()) {

src/utils.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ R_len_t RApiTypes::GetVecSize(RType rtype, SEXP coldata) {
116116
D_ASSERT(TYPEOF(coldata) == VECSXP);
117117
coldata = VECTOR_ELT(coldata, 0);
118118
}
119+
120+
if (rtype.id() == RTypeId::MATRIX) {
121+
return Rf_nrows(coldata);
122+
}
119123
// This still isn't quite accurate, but good enough for the types we support.
120124
return Rf_length(coldata);
121125
}

tests/testthat/_snaps/array.md

+11
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,14 @@
2222
Error in `duckdb_result()`:
2323
! Use `dbConnect(array = "matrix")` to enable arrays to be returned to R.
2424

25+
# array errors when writing matrix of complex numbers
26+
27+
Code
28+
dbWriteTable(con, "tbl", df)
29+
Condition
30+
Error in `duckdb_result()`:
31+
! Matrix with complex numbers are not supported.
32+
Error in `duckdb_result()`:
33+
! rapi_execute: Failed to run query
34+
Error: Invalid Error: std::exception
35+

0 commit comments

Comments
 (0)