Skip to content

Commit 1c6d04f

Browse files
authored
fix: Fail with non-UTF8-encoded strings during data frame scan instead of attempting to reencode (#1795)
* fix: Fail with non-UTF8-encoded strings during data frame scan instead of attempting to reencode * chore: Auto-update from GitHub Actions Run: https://github.com/duckdb/duckdb-r/actions/runs/19836209158 * Revert "chore: Auto-update from GitHub Actions" This reverts commit 12a4af8. * Avoid
1 parent 774e685 commit 1c6d04f

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

src/include/rapi.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,6 @@ struct ReplacementDataDBWrapper : public ReplacementScanData {
155155

156156
cpp11::strings StringsToSexp(vector<std::string> s);
157157

158-
SEXP ToUtf8(SEXP string_sexp);
159-
160158
static constexpr char R_STRING_TYPE_NAME[] = "r_string";
161159

162160
struct RStringsType {

src/utils.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,6 @@ using namespace duckdb;
1111
return R_MakeExternalPtrFn((DL_FUNC)duckdb_adbc_init, R_NilValue, R_NilValue);
1212
}
1313

14-
SEXP duckdb::ToUtf8(SEXP string_sexp) {
15-
cpp11::function enc2utf8 = RStrings::get().enc2utf8_sym;
16-
return enc2utf8(string_sexp);
17-
}
18-
1914
[[cpp11::register]] cpp11::r_string rapi_ptr_to_str(SEXP extptr) {
2015
if (TYPEOF(extptr) != EXTPTRSXP) {
2116
rapi_error_with_context("rapi_ptr_to_str", "Need external pointer parameter");
@@ -151,8 +146,16 @@ Value RApiTypes::SexpToValue(SEXP valsexp, R_len_t idx, bool typed_logical_null)
151146
}
152147
}
153148
case RType::STRING: {
154-
auto str_val = STRING_ELT(ToUtf8(valsexp), idx);
155-
return str_val == NA_STRING ? Value(LogicalType::VARCHAR) : Value(CHAR(str_val));
149+
auto str_val = STRING_ELT(valsexp, idx);
150+
if (str_val == NA_STRING) {
151+
return Value(LogicalType::VARCHAR);
152+
}
153+
154+
auto ce = Rf_getCharCE(str_val);
155+
if (ce != CE_UTF8 && ce != CE_NATIVE) {
156+
rapi_error_with_context("SexpToValue", "Only UTF-8 encoded strings are supported for the data frame scan.");
157+
}
158+
return Value(CHAR(str_val));
156159
}
157160
case RTypeId::FACTOR: {
158161
auto int_val = INTEGER_POINTER(valsexp)[idx];

0 commit comments

Comments
 (0)