Skip to content

GH-39138: [R] Fix implicit conversion warnings #39250

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 22, 2023
56 changes: 38 additions & 18 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

return array->IsNull(j) ? cpp11::na<c_type>()
Expand Down Expand Up @@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
std::unique_ptr<arrow::DictionaryUnifier> unifier_ =
ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));

size_t n_arrays = chunked_array->num_chunks();
int n_arrays = chunked_array->num_chunks();
BufferVector arrays_transpose(n_arrays);

for (size_t i = 0; i < n_arrays; i++) {
for (int i = 0; i < n_arrays; i++) {
const auto& dict_i =
*internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i))
.dictionary();
Expand Down Expand Up @@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
return dup;
}

// The value at position i
static int Elt(SEXP alt, R_xlen_t i) {
if (Base::IsMaterialized(alt)) {
return INTEGER_ELT(Representation(alt), i);
}

// The value at position i as an int64_t (to make bounds checking less verbose)
static int64_t Elt64(SEXP alt, R_xlen_t i) {
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);

const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

if (!array->IsNull(j)) {
Expand All @@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

if (WasUnified(alt)) {
const auto* transpose_data = reinterpret_cast<const int32_t*>(
GetArrayTransposed(alt, resolve.chunk_index)->data());
GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data());

switch (indices->type_id()) {
case Type::UINT8:
Expand Down Expand Up @@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
case Type::INT64:
return indices->data()->GetValues<int64_t>(1)[j] + 1;
case Type::UINT64:
return indices->data()->GetValues<uint64_t>(1)[j] + 1;
return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1);
default:
break;
}
Expand All @@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
return NA_INTEGER;
}

// The value at position i as an int (which R needs because this is a factor)
static int Elt(SEXP alt, R_xlen_t i) {
if (Base::IsMaterialized(alt)) {
return INTEGER_ELT(Representation(alt), i);
}

int64_t elt64 = Elt64(alt, i);
ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1);
ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max());
return static_cast<int>(elt64);
}

static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) {
// If we have data2, we can just copy the region into buf
// using the standard Get_region for this R type
Expand Down Expand Up @@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
// using the transpose data for this chunk
const auto* transpose_data =
reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data());
auto transpose = [transpose_data](int x) { return transpose_data[x]; };
auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; };

GetRegionDispatch(array, indices, transpose, out);

Expand All @@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

} else {
// simpler case, identity transpose
auto transpose = [](int x) { return x; };
auto transpose = [](int64_t x) { return static_cast<int>(x); };

int* out = buf;
for (const auto& array : slice->chunks()) {
Expand Down Expand Up @@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {

VisitArraySpanInline<Type>(
*array->data(),
/*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; },
/*valid_func=*/
[&](index_type index) {
int64_t transposed = transpose(index) + 1;
ARROW_R_DCHECK(transposed >= 1);
ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max());
*out++ = static_cast<int>(transposed);
},
/*null_func=*/[&]() { *out++ = cpp11::na<int>(); });
}

Expand Down Expand Up @@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();

if (no_nul) {
return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max());
return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8);
} else if (strip_out_nuls_) {
return ConvertStripNul();
} else {
Expand Down Expand Up @@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
}

nul_was_stripped_ = true;
return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max());
return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, a DCHECK_LE(stripped_len, std::numeric_limits<int>::max()) would be useful since there is no way to change what Rf_mkCharLenCE receives.

CE_UTF8);
}

bool nul_was_stripped() const { return nul_was_stripped_; }
Expand Down Expand Up @@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
auto altrep_data =
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
auto resolve = altrep_data->locate(i);
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
const auto& array =
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
auto j = resolve.index_in_chunk;

SEXP s = NA_STRING;
Expand Down
18 changes: 11 additions & 7 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>&
return array->Slice(offset, length);
}

void arrow::r::validate_index(int i, int len) {
void arrow::r::validate_index(int64_t i, int64_t len) {
if (i == NA_INTEGER) {
cpp11::stop("'i' cannot be NA");
}
Expand All @@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr<arrow::Array>& x) {
}

// [[arrow::export]]
int Array__offset(const std::shared_ptr<arrow::Array>& x) { return x->offset(); }
r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x) {
return r_vec_size(x->offset());
}

// [[arrow::export]]
int Array__null_count(const std::shared_ptr<arrow::Array>& x) { return x->null_count(); }
r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x) {
return r_vec_size(x->null_count());
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x) {
Expand Down Expand Up @@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length(
}

// [[arrow::export]]
r_vec_size FixedSizeListArray__value_length(
int FixedSizeListArray__value_length(
const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
return r_vec_size(array->value_length(i));
return array->value_length(i);
}

// [[arrow::export]]
Expand Down Expand Up @@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets(
}

// [[arrow::export]]
cpp11::writable::integers LargeListArray__raw_value_offsets(
cpp11::writable::doubles LargeListArray__raw_value_offsets(
const std::shared_ptr<arrow::LargeListArray>& array) {
auto offsets = array->raw_value_offsets();
return cpp11::writable::integers(offsets, offsets + array->length());
return cpp11::writable::doubles(offsets, offsets + array->length());
}

// [[arrow::export]]
Expand Down
14 changes: 7 additions & 7 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ struct Converter_String : public Converter {

private:
static SEXP r_string_from_view(std::string_view view) {
return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
return Rf_mkCharLenCE(view.data(), static_cast<int>(view.size()), CE_UTF8);
}

static SEXP r_string_from_view_strip_nul(std::string_view view,
Expand Down Expand Up @@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter {
const auto& arr_type = checked_cast<const DictionaryType&>(*chunked_array->type());
unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));

size_t n_arrays = chunked_array->num_chunks();
int n_arrays = chunked_array->num_chunks();
arrays_transpose_.resize(n_arrays);

for (size_t i = 0; i < n_arrays; i++) {
for (int i = 0; i < n_arrays; i++) {
const auto& dict_i =
*checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
Expand Down Expand Up @@ -748,15 +748,15 @@ class Converter_Struct : public Converter {
auto colnames = arrow::r::to_r_strings(
type->fields(),
[](const std::shared_ptr<Field>& field) { return field->name(); });
out.attr(symbols::row_names) = arrow::r::short_row_names(n);
out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast<int>(n));
out.attr(R_NamesSymbol) = colnames;
out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;

return out;
}

Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
int nf = converters.size();
int nf = static_cast<int>(converters.size());
for (int i = 0; i < nf; i++) {
SEXP data_i = VECTOR_ELT(data, i);

Expand All @@ -771,7 +771,7 @@ class Converter_Struct : public Converter {
Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
auto struct_array = checked_cast<const arrow::StructArray*>(array.get());
int nf = converters.size();
int nf = static_cast<int>(converters.size());
// Flatten() deals with merging of nulls
auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool()));
for (int i = 0; i < nf; i++) {
Expand Down Expand Up @@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,

tbl.attr(R_NamesSymbol) = names;
tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast<int>(nr));

return tbl;
}
Expand Down
12 changes: 6 additions & 6 deletions r/src/arraydata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ std::shared_ptr<arrow::DataType> ArrayData__get_type(
}

// [[arrow::export]]
int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
return x->length;
r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->length);
}

// [[arrow::export]]
int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
return x->null_count;
r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->null_count);
}

// [[arrow::export]]
int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
return x->offset;
r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
return r_vec_size(x->offset);
}

// [[arrow::export]]
Expand Down
Loading