Skip to content

Commit b607ccc

Browse files
paleolimbotassignUser
authored andcommitted
GH-39138: [R] Fix implicit conversion warnings (#39250)
### Rationale for this change We have failing CRAN checks because this warning occurs on one check machine. ### What changes are included in this PR? Implicit integer casts are made explicit and/or variable declarations were fixed so that fewer implicit integer casts were performed. Fully solving the warnings also requires r-lib/cpp11#349 since some errors occur in those headers. ### Are these changes tested? This particular test we can't do on CI because the MacOS runner we have doesn't have a new enough `clang` to support the requisite `-W` flags. I tested this locally by adding `PKG_CXXFLAGS=-Wconversion -Wno-sign-conversion -Wsign-compare -Werror` to `Makevars.in`. ### Are there any user-facing changes? No * Closes: #39138 Authored-by: Dewey Dunnington <[email protected]> Signed-off-by: Dewey Dunnington <[email protected]>
1 parent 93207fb commit b607ccc

18 files changed

+165
-124
lines changed

r/src/altrep.cpp

+38-18
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
275275
auto altrep_data =
276276
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
277277
auto resolve = altrep_data->locate(i);
278-
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
278+
const auto& array =
279+
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
279280
auto j = resolve.index_in_chunk;
280281

281282
return array->IsNull(j) ? cpp11::na<c_type>()
@@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
466467
std::unique_ptr<arrow::DictionaryUnifier> unifier_ =
467468
ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
468469

469-
size_t n_arrays = chunked_array->num_chunks();
470+
int n_arrays = chunked_array->num_chunks();
470471
BufferVector arrays_transpose(n_arrays);
471472

472-
for (size_t i = 0; i < n_arrays; i++) {
473+
for (int i = 0; i < n_arrays; i++) {
473474
const auto& dict_i =
474475
*internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i))
475476
.dictionary();
@@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
559560
return dup;
560561
}
561562

562-
// The value at position i
563-
static int Elt(SEXP alt, R_xlen_t i) {
564-
if (Base::IsMaterialized(alt)) {
565-
return INTEGER_ELT(Representation(alt), i);
566-
}
567-
563+
// The value at position i as an int64_t (to make bounds checking less verbose)
564+
static int64_t Elt64(SEXP alt, R_xlen_t i) {
568565
auto altrep_data =
569566
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
570567
auto resolve = altrep_data->locate(i);
571568

572-
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
569+
const auto& array =
570+
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
573571
auto j = resolve.index_in_chunk;
574572

575573
if (!array->IsNull(j)) {
@@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
578576

579577
if (WasUnified(alt)) {
580578
const auto* transpose_data = reinterpret_cast<const int32_t*>(
581-
GetArrayTransposed(alt, resolve.chunk_index)->data());
579+
GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data());
582580

583581
switch (indices->type_id()) {
584582
case Type::UINT8:
@@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
617615
case Type::INT64:
618616
return indices->data()->GetValues<int64_t>(1)[j] + 1;
619617
case Type::UINT64:
620-
return indices->data()->GetValues<uint64_t>(1)[j] + 1;
618+
return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1);
621619
default:
622620
break;
623621
}
@@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
628626
return NA_INTEGER;
629627
}
630628

629+
// The value at position i as an int (which R needs because this is a factor)
630+
static int Elt(SEXP alt, R_xlen_t i) {
631+
if (Base::IsMaterialized(alt)) {
632+
return INTEGER_ELT(Representation(alt), i);
633+
}
634+
635+
int64_t elt64 = Elt64(alt, i);
636+
ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1);
637+
ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max());
638+
return static_cast<int>(elt64);
639+
}
640+
631641
static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) {
632642
// If we have data2, we can just copy the region into buf
633643
// using the standard Get_region for this R type
@@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
667677
// using the transpose data for this chunk
668678
const auto* transpose_data =
669679
reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data());
670-
auto transpose = [transpose_data](int x) { return transpose_data[x]; };
680+
auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; };
671681

672682
GetRegionDispatch(array, indices, transpose, out);
673683

@@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
677687

678688
} else {
679689
// simpler case, identity transpose
680-
auto transpose = [](int x) { return x; };
690+
auto transpose = [](int64_t x) { return static_cast<int>(x); };
681691

682692
int* out = buf;
683693
for (const auto& array : slice->chunks()) {
@@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
718728

719729
VisitArraySpanInline<Type>(
720730
*array->data(),
721-
/*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; },
731+
/*valid_func=*/
732+
[&](index_type index) {
733+
int64_t transposed = transpose(index) + 1;
734+
ARROW_R_DCHECK(transposed >= 1);
735+
ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max());
736+
*out++ = static_cast<int>(transposed);
737+
},
722738
/*null_func=*/[&]() { *out++ = cpp11::na<int>(); });
723739
}
724740

@@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
765781
bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();
766782

767783
if (no_nul) {
768-
return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
784+
ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max());
785+
return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8);
769786
} else if (strip_out_nuls_) {
770787
return ConvertStripNul();
771788
} else {
@@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
802819
}
803820

804821
nul_was_stripped_ = true;
805-
return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
822+
ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max());
823+
return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len),
824+
CE_UTF8);
806825
}
807826

808827
bool nul_was_stripped() const { return nul_was_stripped_; }
@@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
847866
auto altrep_data =
848867
reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
849868
auto resolve = altrep_data->locate(i);
850-
const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
869+
const auto& array =
870+
altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
851871
auto j = resolve.index_in_chunk;
852872

853873
SEXP s = NA_STRING;

r/src/array.cpp

+11-7
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>&
9292
return array->Slice(offset, length);
9393
}
9494

95-
void arrow::r::validate_index(int i, int len) {
95+
void arrow::r::validate_index(int64_t i, int64_t len) {
9696
if (i == NA_INTEGER) {
9797
cpp11::stop("'i' cannot be NA");
9898
}
@@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr<arrow::Array>& x) {
119119
}
120120

121121
// [[arrow::export]]
122-
int Array__offset(const std::shared_ptr<arrow::Array>& x) { return x->offset(); }
122+
r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x) {
123+
return r_vec_size(x->offset());
124+
}
123125

124126
// [[arrow::export]]
125-
int Array__null_count(const std::shared_ptr<arrow::Array>& x) { return x->null_count(); }
127+
r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x) {
128+
return r_vec_size(x->null_count());
129+
}
126130

127131
// [[arrow::export]]
128132
std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x) {
@@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length(
263267
}
264268

265269
// [[arrow::export]]
266-
r_vec_size FixedSizeListArray__value_length(
270+
int FixedSizeListArray__value_length(
267271
const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
268-
return r_vec_size(array->value_length(i));
272+
return array->value_length(i);
269273
}
270274

271275
// [[arrow::export]]
@@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets(
294298
}
295299

296300
// [[arrow::export]]
297-
cpp11::writable::integers LargeListArray__raw_value_offsets(
301+
cpp11::writable::doubles LargeListArray__raw_value_offsets(
298302
const std::shared_ptr<arrow::LargeListArray>& array) {
299303
auto offsets = array->raw_value_offsets();
300-
return cpp11::writable::integers(offsets, offsets + array->length());
304+
return cpp11::writable::doubles(offsets, offsets + array->length());
301305
}
302306

303307
// [[arrow::export]]

r/src/array_to_vector.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ struct Converter_String : public Converter {
375375

376376
private:
377377
static SEXP r_string_from_view(std::string_view view) {
378-
return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
378+
return Rf_mkCharLenCE(view.data(), static_cast<int>(view.size()), CE_UTF8);
379379
}
380380

381381
static SEXP r_string_from_view_strip_nul(std::string_view view,
@@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter {
576576
const auto& arr_type = checked_cast<const DictionaryType&>(*chunked_array->type());
577577
unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
578578

579-
size_t n_arrays = chunked_array->num_chunks();
579+
int n_arrays = chunked_array->num_chunks();
580580
arrays_transpose_.resize(n_arrays);
581581

582-
for (size_t i = 0; i < n_arrays; i++) {
582+
for (int i = 0; i < n_arrays; i++) {
583583
const auto& dict_i =
584584
*checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
585585
StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
@@ -748,15 +748,15 @@ class Converter_Struct : public Converter {
748748
auto colnames = arrow::r::to_r_strings(
749749
type->fields(),
750750
[](const std::shared_ptr<Field>& field) { return field->name(); });
751-
out.attr(symbols::row_names) = arrow::r::short_row_names(n);
751+
out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast<int>(n));
752752
out.attr(R_NamesSymbol) = colnames;
753753
out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
754754

755755
return out;
756756
}
757757

758758
Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
759-
int nf = converters.size();
759+
int nf = static_cast<int>(converters.size());
760760
for (int i = 0; i < nf; i++) {
761761
SEXP data_i = VECTOR_ELT(data, i);
762762

@@ -771,7 +771,7 @@ class Converter_Struct : public Converter {
771771
Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
772772
R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
773773
auto struct_array = checked_cast<const arrow::StructArray*>(array.get());
774-
int nf = converters.size();
774+
int nf = static_cast<int>(converters.size());
775775
// Flatten() deals with merging of nulls
776776
auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool()));
777777
for (int i = 0; i < nf; i++) {
@@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,
13841384

13851385
tbl.attr(R_NamesSymbol) = names;
13861386
tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
1387-
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
1387+
tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast<int>(nr));
13881388

13891389
return tbl;
13901390
}

r/src/arraydata.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,18 @@ std::shared_ptr<arrow::DataType> ArrayData__get_type(
2626
}
2727

2828
// [[arrow::export]]
29-
int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
30-
return x->length;
29+
r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
30+
return r_vec_size(x->length);
3131
}
3232

3333
// [[arrow::export]]
34-
int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
35-
return x->null_count;
34+
r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
35+
return r_vec_size(x->null_count);
3636
}
3737

3838
// [[arrow::export]]
39-
int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
40-
return x->offset;
39+
r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
40+
return r_vec_size(x->offset);
4141
}
4242

4343
// [[arrow::export]]

0 commit comments

Comments
 (0)