Skip to content

Commit 9e54c79

Browse files
committed
Separate iceberg from Hive
1 parent c0807b8 commit 9e54c79

File tree

171 files changed

+29517
-2
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+29517
-2
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,9 @@ set(VELOX_GFLAGS_TYPE
127127
)
128128
option(VELOX_ENABLE_EXEC "Build exec." ON)
129129
option(VELOX_ENABLE_AGGREGATES "Build aggregates." ON)
130-
option(VELOX_ENABLE_HIVE_CONNECTOR "Build Hive connector." ON)
130+
option(VELOX_ENABLE_HIVE_CONNECTOR "Build the Hive connector." ON)
131+
#option(VELOX_ENABLE_HIVE_NEW_CONNECTOR "Build the new Hive connector." ON)
132+
#option(VELOX_ENABLE_ICEBERG_CONNECTOR "Build the ICEBERG connector that does NOT depend on the new Hive connector." ON)
131133
option(VELOX_ENABLE_TPCH_CONNECTOR "Build TPC-H connector." ON)
132134
option(VELOX_ENABLE_PRESTO_FUNCTIONS "Build Presto SQL functions." ON)
133135
option(VELOX_ENABLE_SPARK_FUNCTIONS "Build Spark SQL functions." ON)
@@ -663,6 +665,7 @@ install(FILES velox/type/Type.h DESTINATION "include/velox")
663665

664666
# Adding this down here prevents warnings in dependencies from stopping the
665667
# build
668+
set(TREAT_WARNINGS_AS_ERRORS OFF)
666669
if("${TREAT_WARNINGS_AS_ERRORS}")
667670
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
668671
endif()

velox/connectors/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ velox_link_libraries(velox_connector velox_common_config velox_vector)
1717

1818
add_subdirectory(fuzzer)
1919

20+
add_subdirectory(lakehouse)
21+
2022
if(${VELOX_ENABLE_HIVE_CONNECTOR})
2123
add_subdirectory(hive)
2224
endif()

velox/connectors/Connector.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ class ColumnHandle : public ISerializable {
9494
VELOX_UNSUPPORTED();
9595
}
9696

97+
virtual std::string toString() const {
98+
VELOX_NYI();
99+
}
100+
97101
folly::dynamic serialize() const override;
98102

99103
protected:
@@ -199,7 +203,7 @@ class DataSink {
199203
uint64_t recodeTimeNs{0};
200204
uint64_t compressionTimeNs{0};
201205

202-
common::SpillStats spillStats;
206+
velox::common::SpillStats spillStats;
203207

204208
bool empty() const;
205209

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#velox_add_library(velox_connector Connector.cpp)
16+
#
17+
#velox_link_libraries(velox_connector velox_common_config velox_vector)
18+
19+
add_subdirectory(base)
20+
21+
22+
add_subdirectory(hiveV2)
23+
24+
25+
add_subdirectory(iceberg)
26+
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#if(${VELOX_BUILD_TESTING})
16+
# add_subdirectory(tests)
17+
#elseif(${VELOX_BUILD_TEST_UTILS})
18+
# add_subdirectory(tests/utils)
19+
#endif()
20+
21+
22+
23+
velox_add_library(
24+
velox_connector_lakehouse_base
25+
STATIC
26+
ConnectorConfigBase.cpp
27+
DataSourceBase.cpp
28+
ConnectorSplitBase.cpp
29+
ConnectorUtil.cpp
30+
FileHandle.cpp
31+
SplitReaderBase.cpp
32+
TableHandleBase.cpp)
33+
34+
#velox_include_directories(velox_connectors_common PRIVATE ${Protobuf_INCLUDE_DIRS})
35+
36+
velox_link_libraries(
37+
velox_connector_lakehouse_base
38+
velox_buffer
39+
velox_caching
40+
velox_common_io
41+
velox_common_compression
42+
velox_common_config
43+
velox_dwio_common_encryption
44+
velox_dwio_common_exception
45+
velox_exception
46+
velox_expression
47+
velox_memory
48+
velox_type_tz
49+
Boost::regex
50+
Folly::folly
51+
glog::glog
52+
protobuf::libprotobuf)
53+
54+
if(${VELOX_BUILD_TESTING})
55+
add_subdirectory(tests)
56+
endif()
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "ConnectorConfigBase.h"
18+
19+
#include "velox/common/config/Config.h"
20+
21+
namespace facebook::velox::connector::lakehouse::base {
22+
23+
std::string ConnectorConfigBase::gcsEndpoint() const {
24+
return config_->get<std::string>(kGcsEndpoint, std::string(""));
25+
}
26+
27+
std::string ConnectorConfigBase::gcsCredentialsPath() const {
28+
return config_->get<std::string>(kGcsCredentialsPath, std::string(""));
29+
}
30+
31+
std::optional<int> ConnectorConfigBase::gcsMaxRetryCount() const {
32+
return static_cast<std::optional<int>>(config_->get<int>(kGcsMaxRetryCount));
33+
}
34+
35+
std::optional<std::string> ConnectorConfigBase::gcsMaxRetryTime() const {
36+
return static_cast<std::optional<std::string>>(
37+
config_->get<std::string>(kGcsMaxRetryTime));
38+
}
39+
40+
bool ConnectorConfigBase::isOrcUseColumnNames(
41+
const config::ConfigBase* session) const {
42+
return session->get<bool>(
43+
kOrcUseColumnNamesSession, config_->get<bool>(kOrcUseColumnNames, false));
44+
}
45+
46+
bool ConnectorConfigBase::isParquetUseColumnNames(
47+
const config::ConfigBase* session) const {
48+
return session->get<bool>(
49+
kParquetUseColumnNamesSession,
50+
config_->get<bool>(kParquetUseColumnNames, false));
51+
}
52+
53+
bool ConnectorConfigBase::isFileColumnNamesReadAsLowerCase(
54+
const config::ConfigBase* session) const {
55+
return session->get<bool>(
56+
kFileColumnNamesReadAsLowerCaseSession,
57+
config_->get<bool>(kFileColumnNamesReadAsLowerCase, false));
58+
}
59+
60+
bool ConnectorConfigBase::isPartitionPathAsLowerCase(
61+
const config::ConfigBase* session) const {
62+
return session->get<bool>(kPartitionPathAsLowerCaseSession, true);
63+
}
64+
65+
bool ConnectorConfigBase::allowNullPartitionKeys(
66+
const config::ConfigBase* session) const {
67+
return session->get<bool>(
68+
kAllowNullPartitionKeysSession,
69+
config_->get<bool>(kAllowNullPartitionKeys, true));
70+
}
71+
72+
int64_t ConnectorConfigBase::maxCoalescedBytes(
73+
const config::ConfigBase* session) const {
74+
return session->get<int64_t>(
75+
kMaxCoalescedBytesSession,
76+
config_->get<int64_t>(kMaxCoalescedBytes, 128 << 20)); // 128MB
77+
}
78+
79+
int32_t ConnectorConfigBase::maxCoalescedDistanceBytes(
80+
const config::ConfigBase* session) const {
81+
const auto distance = config::toCapacity(
82+
session->get<std::string>(
83+
kMaxCoalescedDistanceSession,
84+
config_->get<std::string>(kMaxCoalescedDistance, "512kB")),
85+
config::CapacityUnit::BYTE);
86+
VELOX_USER_CHECK_LE(
87+
distance,
88+
std::numeric_limits<int32_t>::max(),
89+
"The max merge distance to combine read requests must be less than 2GB."
90+
" Got {} bytes.",
91+
distance);
92+
return int32_t(distance);
93+
}
94+
95+
int32_t ConnectorConfigBase::prefetchRowGroups() const {
96+
return config_->get<int32_t>(kPrefetchRowGroups, 1);
97+
}
98+
99+
int32_t ConnectorConfigBase::loadQuantum(const config::ConfigBase* session) const {
100+
return session->get<int32_t>(
101+
kLoadQuantumSession, config_->get<int32_t>(kLoadQuantum, 8 << 20));
102+
}
103+
104+
int32_t ConnectorConfigBase::numCacheFileHandles() const {
105+
return config_->get<int32_t>(kNumCacheFileHandles, 20'000);
106+
}
107+
108+
uint64_t ConnectorConfigBase::fileHandleExpirationDurationMs() const {
109+
return config_->get<uint64_t>(kFileHandleExpirationDurationMs, 0);
110+
}
111+
112+
bool ConnectorConfigBase::isFileHandleCacheEnabled() const {
113+
return config_->get<bool>(kEnableFileHandleCache, true);
114+
}
115+
116+
std::string ConnectorConfigBase::writeFileCreateConfig() const {
117+
return config_->get<std::string>(kWriteFileCreateConfig, "");
118+
}
119+
120+
uint64_t ConnectorConfigBase::footerEstimatedSize() const {
121+
return config_->get<uint64_t>(kFooterEstimatedSize, 256UL << 10);
122+
}
123+
124+
uint64_t ConnectorConfigBase::filePreloadThreshold() const {
125+
return config_->get<uint64_t>(kFilePreloadThreshold, 8UL << 20);
126+
}
127+
128+
uint8_t ConnectorConfigBase::readTimestampUnit(
129+
const config::ConfigBase* session) const {
130+
const auto unit = session->get<uint8_t>(
131+
kReadTimestampUnitSession,
132+
config_->get<uint8_t>(kReadTimestampUnit, 3 /*milli*/));
133+
VELOX_CHECK(
134+
unit == 3 || unit == 6 /*micro*/ || unit == 9 /*nano*/,
135+
"Invalid timestamp unit.");
136+
return unit;
137+
}
138+
139+
bool ConnectorConfigBase::readTimestampPartitionValueAsLocalTime(
140+
const config::ConfigBase* session) const {
141+
return session->get<bool>(
142+
kReadTimestampPartitionValueAsLocalTimeSession,
143+
config_->get<bool>(kReadTimestampPartitionValueAsLocalTime, true));
144+
}
145+
146+
bool ConnectorConfigBase::readStatsBasedFilterReorderDisabled(
147+
const config::ConfigBase* session) const {
148+
return session->get<bool>(
149+
kReadStatsBasedFilterReorderDisabledSession,
150+
config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
151+
}
152+
153+
} // namespace facebook::velox::connector::lakehouse::base

0 commit comments

Comments
 (0)