Skip to content

Commit 521f429

Browse files
kevinwilfongmeta-codesync[bot]
authored andcommitted
fix: FlatVector::resizeValues can lose existing values for opaque types (facebookincubator#15487)
Summary: Pull Request resolved: facebookincubator#15487 There is a bug in FlatVector::resizeValues where for non-POD types (I think this is just opaque types) if the Buffer is immutable, there's a typo where it copies values from the new Buffer into itself, losing the existing values. Fixing this bug exposes another bug that FlatVector::resizeValues uses BaseVector::length_ as the previous size, however, by this point BaseVector::resize has already been invoked, so it should be equal to new size (leading to reading off the end of the Buffer after the fix for the first issue). To address this I pass the previous size to FlatVector::resizeValues as relying on the order of invoking this function and BaseVector::resize seems error prone. Another option would be to rely on the Buffer sizes, but then we may be copying more values than necessary. Along these lines I updated the logic for POD types (other types) to use the Vector size to determine how many values to copy over (any other values would have been set to initialValue already if one is needed). I noticed a similar bug in the bool template specialization of FlatVector::resizeValues where it was using BaseVector::length_ for deciding how many values to set to initialValue if we are just updating the size of an existing Buffer with sufficient capacity. I opted to just delete this code since it is in a private function and there is no invocation that passes an initialValue for a bool, and it's inconsistent with the base implementation of FlatVector::resizeValues which only sets values to initialValue if the Buffer is reallocated. Reviewed By: bikramSingh91 Differential Revision: D86920858 fbshipit-source-id: 83bd8bdc121cc7998f94b412191dfba1e0c74513
1 parent 646ea2a commit 521f429

File tree

3 files changed

+17
-18
lines changed

3 files changed

+17
-18
lines changed

velox/vector/FlatVector-inl.h

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ void FlatVector<T>::resize(vector_size_t newSize, bool setNotNull) {
456456
}
457457

458458
if constexpr (std::is_same_v<T, StringView>) {
459-
resizeValues(newSize, StringView());
459+
resizeValues(newSize, previousSize, StringView());
460460
if (newSize < previousSize) {
461461
// If we downsize, just invalidate ascii, because we might have become
462462
// 'all ascii' from 'not all ascii'.
@@ -476,7 +476,7 @@ void FlatVector<T>::resize(vector_size_t newSize, bool setNotNull) {
476476
keepAtMostOneStringBuffer();
477477
}
478478
} else {
479-
resizeValues(newSize, std::nullopt);
479+
resizeValues(newSize, previousSize, std::nullopt);
480480
}
481481
}
482482

@@ -537,6 +537,7 @@ void FlatVector<T>::prepareForReuse() {
537537
template <typename T>
538538
void FlatVector<T>::resizeValues(
539539
vector_size_t newSize,
540+
vector_size_t previousSize,
540541
const std::optional<T>& initialValue) {
541542
// TODO: change this to isMutable(). See
542543
// https://github.com/facebookincubator/velox/issues/6562.
@@ -554,16 +555,16 @@ void FlatVector<T>::resizeValues(
554555
AlignedBuffer::allocate<T>(newSize, BaseVector::pool_, initialValue);
555556

556557
if (values_) {
558+
auto len = std::min(newSize, previousSize);
559+
557560
if constexpr (Buffer::is_pod_like_v<T>) {
558561
auto dst = newValues->asMutable<T>();
559562
auto src = values_->as<T>();
560-
auto len = std::min(values_->size(), newValues->size());
561-
memcpy(dst, src, len);
563+
auto byteLen = BaseVector::byteSize<T>(len);
564+
memcpy(dst, src, byteLen);
562565
} else {
563-
const vector_size_t previousSize = BaseVector::length_;
564-
auto* rawOldValues = newValues->asMutable<T>();
566+
const auto* rawOldValues = values_->as<T>();
565567
auto* rawNewValues = newValues->asMutable<T>();
566-
const auto len = std::min<vector_size_t>(newSize, previousSize);
567568
for (vector_size_t row = 0; row < len; ++row) {
568569
rawNewValues[row] = rawOldValues[row];
569570
}
@@ -576,6 +577,7 @@ void FlatVector<T>::resizeValues(
576577
template <>
577578
inline void FlatVector<bool>::resizeValues(
578579
vector_size_t newSize,
580+
vector_size_t previousSize,
579581
const std::optional<bool>& initialValue) {
580582
// TODO: change this to isMutable(). See
581583
// https://github.com/facebookincubator/velox/issues/6562.
@@ -586,13 +588,6 @@ inline void FlatVector<bool>::resizeValues(
586588
} else {
587589
values_->setSize(newByteSize);
588590
}
589-
// ensure that the newly added positions have the right initial value for
590-
// the case where changes in size don't result in change in the size of
591-
// the underlying buffer.
592-
if (initialValue.has_value() && length_ < newSize) {
593-
auto rawData = values_->asMutable<uint64_t>();
594-
bits::fillBits(rawData, length_, newSize, initialValue.value());
595-
}
596591
rawValues_ = values_->asMutable<bool>();
597592
return;
598593
}
@@ -602,7 +597,7 @@ inline void FlatVector<bool>::resizeValues(
602597
if (values_) {
603598
auto dst = newValues->asMutable<char>();
604599
auto src = values_->as<char>();
605-
auto len = std::min(values_->size(), newValues->size());
600+
auto len = BaseVector::byteSize<bool>(std::min(newSize, previousSize));
606601
memcpy(dst, src, len);
607602
}
608603
values_ = std::move(newValues);

velox/vector/FlatVector.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,11 @@ class FlatVector final : public SimpleVector<T> {
574574
const vector_size_t* toSourceRow);
575575

576576
// Ensures that the values buffer has space for 'newSize' elements and is
577-
// mutable. Sets elements between the old and new sizes to 'initialValue' if
578-
// the new size > old size.
577+
// mutable. Sets elements between the previous and new sizes to 'initialValue'
578+
// if the new size > previous size.
579579
void resizeValues(
580580
vector_size_t newSize,
581+
vector_size_t previousSize,
581582
const std::optional<T>& initialValue);
582583

583584
// Check string buffers. Keep at most one singly-referenced buffer if it is

velox/vector/tests/VectorTest.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,14 +749,17 @@ class VectorTest : public testing::Test, public velox::test::VectorTestBase {
749749
// immutable. This is true for a slice since it creates buffer views over
750750
// the buffers of the original vector that it sliced.
751751
auto newSize = slice->size() * 2;
752+
auto sliceCopy = BaseVector::copy(*slice);
752753
slice->resize(newSize);
753754
EXPECT_EQ(slice->size(), newSize);
755+
slice->resize(sliceCopy->size());
756+
test::assertEqualVectors(slice, sliceCopy);
754757
}
755758
}
756759

757760
static void testSlices(const VectorPtr& slice, int level = 2) {
758761
for (vector_size_t offset : {0, 16, 17}) {
759-
for (vector_size_t length : {0, 1, 83}) {
762+
for (vector_size_t length : {0, 1, 83, 100}) {
760763
if (offset + length <= slice->size()) {
761764
testSlice(slice, level, offset, length);
762765
}

0 commit comments

Comments
 (0)