Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions velox/connectors/lakehouse/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ velox_add_library(
IcebergSplitReader.cpp
IcebergSplit.cpp
IcebergTableHandle.cpp
PartitionSpec.cpp
PositionalDeleteFileReader.cpp
IcebergDataSink.cpp)

Expand Down
161 changes: 161 additions & 0 deletions velox/connectors/lakehouse/iceberg/PartitionSpec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/connectors/lakehouse/iceberg/PartitionSpec.h"

namespace facebook::velox::connector::lakehouse::iceberg {

namespace {
std::string transformTypeToString(TransformType type) {
switch (type) {
case TransformType::kIdentity:
return "identity";
case TransformType::kHour:
return "hour";
case TransformType::kDay:
return "day";
case TransformType::kMonth:
return "month";
case TransformType::kYear:
return "year";
case TransformType::kBucket:
return "bucket";
case TransformType::kTruncate:
return "truncate";
}
VELOX_UNREACHABLE("Unknown TransformType");
}

TransformType transformTypeFromString(const std::string& str) {
if (str == "identity") {
return TransformType::kIdentity;
} else if (str == "hour") {
return TransformType::kHour;
} else if (str == "day") {
return TransformType::kDay;
} else if (str == "month") {
return TransformType::kMonth;
} else if (str == "year") {
return TransformType::kYear;
} else if (str == "bucket") {
return TransformType::kBucket;
} else if (str == "truncate") {
return TransformType::kTruncate;
} else {
VELOX_USER_FAIL("Unknown TransformType: {}", str);
}
}
} // anonymous namespace

folly::dynamic IcebergPartitionSpec::Field::serialize() const {
folly::dynamic obj = folly::dynamic::object;
obj["name"] = "Field";
obj["fieldName"] = name;
obj["transformType"] = transformTypeToString(transformType);
if (parameter.has_value()) {
obj["parameter"] = parameter.value();
} else {
obj["parameter"] = nullptr;
}
return obj;
}

std::shared_ptr<const ISerializable> IcebergPartitionSpec::Field::create(
const folly::dynamic& obj,
void* context) {
VELOX_CHECK(obj.isObject(), "Field::create expects object");

const auto* fieldNamePtr = obj.get_ptr("fieldName");
VELOX_CHECK(fieldNamePtr, "Field::create: missing 'fieldName'");
VELOX_CHECK(
fieldNamePtr->isString(), "Field::create: 'fieldName' must be string");
auto fieldName = fieldNamePtr->asString();

const auto* transformTypePtr = obj.get_ptr("transformType");
VELOX_CHECK(transformTypePtr, "Field::create: missing 'transformType'");
VELOX_CHECK(
transformTypePtr->isString(),
"Field::create: 'transformType' must be string");
auto transformType = transformTypeFromString(transformTypePtr->asString());

std::optional<int32_t> parameter = std::nullopt;
const auto* parameterPtr = obj.get_ptr("parameter");
if (parameterPtr && !parameterPtr->isNull()) {
VELOX_CHECK(
parameterPtr->isInt(),
"Field::create: 'parameter' must be integer if present");
parameter = static_cast<int32_t>(parameterPtr->asInt());
}

return std::make_shared<const Field>(fieldName, transformType, parameter);
}

void IcebergPartitionSpec::Field::registerSerDe() {
auto& registry = DeserializationWithContextRegistryForSharedPtr();
registry.Register("Field", Field::create);
}

folly::dynamic IcebergPartitionSpec::serialize() const {
folly::dynamic obj = folly::dynamic::object;
obj["name"] = "IcebergPartitionSpec";
obj["specId"] = specId;

folly::dynamic fieldsArray = folly::dynamic::array;
fieldsArray.reserve(fields.size());
for (const auto& field : fields) {
fieldsArray.push_back(field.serialize());
}
obj["fields"] = std::move(fieldsArray);
return obj;
}

std::shared_ptr<const ISerializable> IcebergPartitionSpec::create(
const folly::dynamic& obj,
void* context) {
VELOX_CHECK(obj.isObject(), "IcebergPartitionSpec::create expects object");

const auto* specIdPtr = obj.get_ptr("specId");
VELOX_CHECK(specIdPtr, "IcebergPartitionSpec::create: missing 'specId'");
VELOX_CHECK(
specIdPtr->isInt(),
"IcebergPartitionSpec::create: 'specId' must be integer");
auto specId = static_cast<int32_t>(specIdPtr->asInt());

const auto* fieldsPtr = obj.get_ptr("fields");
VELOX_CHECK(fieldsPtr, "IcebergPartitionSpec::create: missing 'fields'");
VELOX_CHECK(
fieldsPtr->isArray(),
"IcebergPartitionSpec::create: 'fields' must be array");

std::vector<Field> deserializedFields;
deserializedFields.reserve(fieldsPtr->size());
for (const auto& fieldObj : *fieldsPtr) {
auto fieldPtr = Field::create(fieldObj, context);
auto field = std::static_pointer_cast<const Field>(fieldPtr);
deserializedFields.push_back(*field);
}

return std::make_shared<const IcebergPartitionSpec>(
specId, std::move(deserializedFields));
}

void IcebergPartitionSpec::registerSerDe() {
Field::registerSerDe();
auto& registry = DeserializationWithContextRegistryForSharedPtr();
registry.Register("IcebergPartitionSpec", IcebergPartitionSpec::create);
}

} // namespace facebook::velox::connector::lakehouse::iceberg
21 changes: 19 additions & 2 deletions velox/connectors/lakehouse/iceberg/PartitionSpec.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <optional>
#include <string>
#include <vector>
#include "velox/common/serialization/Serializable.h"

namespace facebook::velox::connector::lakehouse::iceberg {

Expand All @@ -32,8 +33,8 @@ enum class TransformType {
kTruncate
};

struct IcebergPartitionSpec {
struct Field {
struct IcebergPartitionSpec : public ISerializable {
struct Field : public ISerializable {
// The column name and type of this partition field as it appears in the
// partition spec. The column can be a nested column in struct field.
std::string name;
Expand All @@ -51,13 +52,29 @@ struct IcebergPartitionSpec {
TransformType _transform,
std::optional<int32_t> _parameter)
: name(_name), transformType(_transform), parameter(_parameter) {}

folly::dynamic serialize() const override;

static std::shared_ptr<const ISerializable> create(
const folly::dynamic& obj,
void* context);

static void registerSerDe();
};

const int32_t specId;
const std::vector<Field> fields;

IcebergPartitionSpec(int32_t _specId, const std::vector<Field>& _fields)
: specId(_specId), fields(_fields) {}

folly::dynamic serialize() const override;

static std::shared_ptr<const ISerializable> create(
const folly::dynamic& obj,
void* context);

static void registerSerDe();
};

} // namespace facebook::velox::connector::lakehouse::iceberg
1 change: 1 addition & 0 deletions velox/connectors/lakehouse/iceberg/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ if(VELOX_BUILD_TESTING AND (NOT VELOX_DISABLE_GOOGLETEST))
IcebergSplitReaderBenchmarkTest.cpp
IcebergTableHandleTest.cpp
IcebergTestBase.cpp
PartitionSpecTest.cpp
Main.cpp)
add_test(velox_lakehouse_iceberg_test velox_lakehouse_iceberg_test)

Expand Down
Loading