Skip to content

Commit 2fa5b0a

Browse files
rui-mozhouyuan
authored andcommitted
Support struct schema evolution matching by name
Signed-off-by: Yuan <[email protected]>
1 parent 21af185 commit 2fa5b0a

17 files changed

+327
-41
lines changed

velox/connectors/hive/SplitReader.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,12 +451,17 @@ std::vector<TypePtr> SplitReader::adaptColumns(
451451
auto fileTypeIdx = fileType->getChildIdxIfExists(fieldName);
452452
if (!fileTypeIdx.has_value()) {
453453
// Column is missing. Most likely due to schema evolution.
454-
VELOX_CHECK(tableSchema, "Unable to resolve column '{}'", fieldName);
455-
childSpec->setConstantValue(
456-
BaseVector::createNullConstant(
457-
tableSchema->findChild(fieldName),
458-
1,
459-
connectorQueryCtx_->memoryPool()));
454+
auto outputTypeIdx = readerOutputType_->getChildIdxIfExists(fieldName);
455+
TypePtr fieldType;
456+
if (outputTypeIdx.has_value()) {
457+
// Field name exists in the user-specified output type.
458+
fieldType = readerOutputType_->childAt(outputTypeIdx.value());
459+
} else {
460+
VELOX_CHECK(tableSchema, "Unable to resolve column '{}'", fieldName);
461+
fieldType = tableSchema->findChild(fieldName);
462+
}
463+
childSpec->setConstantValue(BaseVector::createNullConstant(
464+
fieldType, 1, connectorQueryCtx_->memoryPool()));
460465
} else {
461466
// Column no longer missing, reset constant value set on the spec.
462467
childSpec->setConstantValue(nullptr);

velox/dwio/common/ScanSpec.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ bool ScanSpec::hasFilter() const {
157157
if (hasFilter_.has_value()) {
158158
return hasFilter_.value();
159159
}
160-
if (!isConstant() && filter()) {
160+
if (filter()) {
161161
hasFilter_ = true;
162162
return true;
163163
}

velox/dwio/common/SelectiveFlatMapColumnReader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ namespace facebook::velox::dwio::common {
2424
class SelectiveFlatMapColumnReader : public SelectiveStructColumnReaderBase {
2525
protected:
2626
SelectiveFlatMapColumnReader(
27+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
2728
const TypePtr& requestedType,
2829
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
2930
FormatParams& params,
3031
velox::common::ScanSpec& scanSpec)
3132
: SelectiveStructColumnReaderBase(
33+
columnReaderOptions,
3234
requestedType,
3335
fileType,
3436
params,

velox/dwio/common/SelectiveStructColumnReader.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,6 @@ void SelectiveStructColumnReaderBase::read(
426426
}
427427

428428
const auto& childSpecs = scanSpec_->children();
429-
VELOX_CHECK(!childSpecs.empty());
430429
for (size_t i = 0; i < childSpecs.size(); ++i) {
431430
const auto& childSpec = childSpecs[i];
432431
VELOX_TRACE_HISTORY_PUSH("read %s", childSpec->fieldName().c_str());
@@ -526,9 +525,12 @@ bool SelectiveStructColumnReaderBase::isChildMissing(
526525
// row type that doesn't exist
527526
// in the output.
528527
fileType_->type()->kind() !=
529-
TypeKind::MAP && // If this is the case it means this is a flat map,
530-
// so it can't have "missing" fields.
531-
childSpec.channel() >= fileType_->size());
528+
TypeKind::MAP // If this is the case it means this is a flat map,
529+
// so it can't have "missing" fields.
530+
) &&
531+
(columnReaderOptions_.useColumnNamesForColumnMapping_
532+
? !asRowType(fileType_->type())->containsChild(childSpec.fieldName())
533+
: childSpec.channel() >= fileType_->size());
532534
}
533535

534536
std::unique_ptr<velox::dwio::common::ColumnLoader>
@@ -540,7 +542,6 @@ SelectiveStructColumnReaderBase::makeColumnLoader(vector_size_t index) {
540542
void SelectiveStructColumnReaderBase::getValues(
541543
const RowSet& rows,
542544
VectorPtr* result) {
543-
VELOX_CHECK(!scanSpec_->children().empty());
544545
VELOX_CHECK_NOT_NULL(
545546
*result, "SelectiveStructColumnReaderBase expects a non-null result");
546547
VELOX_CHECK(

velox/dwio/common/SelectiveStructColumnReader.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#pragma once
1818

19+
#include "velox/dwio/common/Options.h"
1920
#include "velox/dwio/common/SelectiveColumnReaderInternal.h"
2021

2122
namespace facebook::velox::dwio::common {
@@ -113,13 +114,15 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
113114
static constexpr int32_t kConstantChildSpecSubscript = -1;
114115

115116
SelectiveStructColumnReaderBase(
117+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
116118
const TypePtr& requestedType,
117119
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
118120
FormatParams& params,
119121
velox::common::ScanSpec& scanSpec,
120122
bool isRoot = false,
121123
bool generateLazyChildren = true)
122124
: SelectiveColumnReader(requestedType, fileType, params, scanSpec),
125+
columnReaderOptions_(columnReaderOptions),
123126
debugString_(
124127
getExceptionContext().message(VeloxException::Type::kSystem)),
125128
isRoot_(isRoot),
@@ -180,6 +183,8 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
180183
}
181184
}
182185

186+
const dwio::common::ColumnReaderOptions& columnReaderOptions_;
187+
183188
// Context information obtained from ExceptionContext. Stored here
184189
// so that LazyVector readers under this can add this to their
185190
// ExceptionContext. Allows contextualizing reader errors to split

velox/dwio/dwrf/reader/SelectiveFlatMapColumnReader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class SelectiveFlatMapAsStructReader : public SelectiveStructColumnReaderBase {
203203
DwrfParams& params,
204204
common::ScanSpec& scanSpec)
205205
: SelectiveStructColumnReaderBase(
206+
columnReaderOptions,
206207
requestedType,
207208
fileType,
208209
params,
@@ -242,6 +243,7 @@ class SelectiveFlatMapAsMapReader : public SelectiveStructColumnReaderBase {
242243
DwrfParams& params,
243244
common::ScanSpec& scanSpec)
244245
: SelectiveStructColumnReaderBase(
246+
columnReaderOptions,
245247
requestedType,
246248
fileType,
247249
params,
@@ -286,6 +288,7 @@ class SelectiveFlatMapReader
286288
DwrfParams& params,
287289
common::ScanSpec& scanSpec)
288290
: dwio::common::SelectiveFlatMapColumnReader(
291+
columnReaderOptions,
289292
requestedType,
290293
fileType,
291294
params,

velox/dwio/dwrf/reader/SelectiveStructColumnReader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ SelectiveStructColumnReader::SelectiveStructColumnReader(
3131
common::ScanSpec& scanSpec,
3232
bool isRoot)
3333
: SelectiveStructColumnReaderBase(
34+
columnReaderOptions,
3435
requestedType,
3536
fileType,
3637
params,

velox/dwio/dwrf/reader/SelectiveStructColumnReader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ class SelectiveStructColumnReaderBase
2525
: public dwio::common::SelectiveStructColumnReaderBase {
2626
public:
2727
SelectiveStructColumnReaderBase(
28+
const dwio::common::ColumnReaderOptions& columnReaderOptions,
2829
const TypePtr& requestedType,
2930
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
3031
DwrfParams& params,
3132
common::ScanSpec& scanSpec,
3233
bool isRoot = false,
3334
bool generateLazyChildren = true)
3435
: dwio::common::SelectiveStructColumnReaderBase(
36+
columnReaderOptions,
3537
requestedType,
3638
fileType,
3739
params,

velox/dwio/parquet/reader/ParquetColumnReader.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
3838
const TypePtr& requestedType,
3939
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
4040
ParquetParams& params,
41-
common::ScanSpec& scanSpec) {
41+
common::ScanSpec& scanSpec,
42+
memory::MemoryPool& pool) {
4243
auto colName = scanSpec.fieldName();
4344

4445
switch (fileType->type()->kind()) {
@@ -59,7 +60,7 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
5960

6061
case TypeKind::ROW:
6162
return std::make_unique<StructColumnReader>(
62-
columnReaderOptions, requestedType, fileType, params, scanSpec);
63+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
6364

6465
case TypeKind::VARBINARY:
6566
case TypeKind::VARCHAR:
@@ -68,12 +69,11 @@ std::unique_ptr<dwio::common::SelectiveColumnReader> ParquetColumnReader::build(
6869
case TypeKind::ARRAY: {
6970
VELOX_CHECK(requestedType->isArray(), "Requested type must be array");
7071
return std::make_unique<ListColumnReader>(
71-
columnReaderOptions, requestedType, fileType, params, scanSpec);
72-
}
72+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
7373

7474
case TypeKind::MAP:
7575
return std::make_unique<MapColumnReader>(
76-
columnReaderOptions, requestedType, fileType, params, scanSpec);
76+
columnReaderOptions, requestedType, fileType, params, scanSpec, pool);
7777

7878
case TypeKind::BOOLEAN:
7979
return std::make_unique<BooleanColumnReader>(

velox/dwio/parquet/reader/ParquetColumnReader.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ParquetColumnReader {
4747
const TypePtr& requestedType,
4848
const std::shared_ptr<const dwio::common::TypeWithId>& fileType,
4949
ParquetParams& params,
50-
common::ScanSpec& scanSpec);
50+
common::ScanSpec& scanSpec,
51+
memory::MemoryPool& pool);
5152
};
5253
} // namespace facebook::velox::parquet

0 commit comments

Comments
 (0)