diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 9a43ae090..740d7bf83 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,11 +14,11 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.2 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: extension_name: iceberg duckdb_version: v1.4.2 - ci_tools_version: v1.4.2 + ci_tools_version: main exclude_archs: 'windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads' extra_toolchains: 'python3' @@ -30,7 +30,7 @@ jobs: with: extension_name: iceberg duckdb_version: v1.4.2 - ci_tools_version: v1.4.2 + ci_tools_version: main exclude_archs: 'windows_amd64_mingw;wasm_mvp;wasm_eh;wasm_threads' deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} deploy_versioned: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 98f2ada4e..2662ecaa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,87 +2,12 @@ cmake_minimum_required(VERSION 3.5...3.29) # Set extension name here set(TARGET_NAME iceberg) -project(${TARGET_NAME}) - -set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard") -set(CMAKE_CXX_STANDARD_REQUIRED True) set(EXTENSION_NAME ${TARGET_NAME}_extension) -include_directories(src/include) - - -set(EXTENSION_SOURCES - src/iceberg_extension.cpp - src/iceberg_functions.cpp - src/iceberg_manifest.cpp - src/iceberg_manifest_list.cpp - src/avro_scan.cpp - src/iceberg_snapshot_lookup.cpp - src/catalog_api.cpp - src/iceberg_logging.cpp - src/catalog_utils.cpp - src/storage/iceberg_insert.cpp - src/storage/iceberg_delete.cpp - src/storage/iceberg_update.cpp - src/aws.cpp - src/hash_utils.cpp - src/base_manifest_reader.cpp - src/manifest_list_reader.cpp - src/manifest_file_reader.cpp - src/deletes/equality_delete.cpp - src/deletes/positional_delete.cpp - src/deletes/deletion_vector.cpp - src/metadata/iceberg_transform.cpp - src/metadata/iceberg_table_schema.cpp - src/metadata/iceberg_partition_spec.cpp - src/metadata/iceberg_sort_order.cpp - src/metadata/iceberg_snapshot.cpp - src/metadata/iceberg_field_mapping.cpp - src/metadata/iceberg_column_definition.cpp - src/metadata/iceberg_table_metadata.cpp - src/iceberg_predicate.cpp - src/iceberg_value.cpp - src/common/utils.cpp - src/common/url_utils.cpp - src/common/iceberg.cpp - src/common/api_utils.cpp - src/iceberg_functions/iceberg_multi_file_reader.cpp - src/iceberg_functions/iceberg_avro_multi_file_reader.cpp - src/iceberg_functions/iceberg_deletes_file_reader.cpp - src/iceberg_functions/iceberg_multi_file_list.cpp - src/iceberg_functions/iceberg_snapshots.cpp - src/iceberg_functions/iceberg_scan.cpp - src/iceberg_functions/iceberg_metadata.cpp - src/iceberg_functions/iceberg_column_stats.cpp - src/iceberg_functions/iceberg_partition_stats.cpp - src/iceberg_functions/iceberg_table_properties_functions.cpp - src/iceberg_functions/iceberg_to_ducklake.cpp - src/storage/authorization/sigv4.cpp - src/storage/authorization/none.cpp - src/storage/authorization/oauth2.cpp - src/storage/iceberg_transaction_data.cpp - src/storage/irc_authorization.cpp - src/storage/irc_catalog.cpp - src/storage/irc_schema_entry.cpp - src/storage/irc_schema_set.cpp - src/storage/irc_table_entry.cpp - src/storage/irc_table_set.cpp - src/storage/irc_transaction.cpp - src/storage/irc_authorization.cpp - src/storage/irc_transaction_manager.cpp - src/utils/iceberg_type.cpp - src/storage/table_update/iceberg_add_snapshot.cpp - src/storage/table_update/common.cpp - src/storage/iceberg_table_information.cpp - src/storage/create_table/iceberg_create_table_request.cpp -) - -add_subdirectory(src/rest_catalog/objects) - -add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES} ${ALL_OBJECT_FILES}) +set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -set(PARAMETERS "-warnings") -build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES} ${ALL_OBJECT_FILES}) +set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard") +set(CMAKE_CXX_STANDARD_REQUIRED True) if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") find_package(CURL REQUIRED) @@ -93,6 +18,53 @@ endif() # Roaring is installed via vcpkg and used here find_package(roaring CONFIG REQUIRED) +# Project-wide public include directory (use absolute path for clarity in IDEs) +set(PROJECT_INC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/include) + +add_subdirectory(src) +set(EXTENSION_SOURCES ${ALL_OBJECT_FILES}) + +set(ICEBERG_OBJECT_LIBS + iceberg_src_objects + iceberg_common_objects + iceberg_deletes_objects + iceberg_metadata_objects + iceberg_functions_objects + iceberg_utils_objects + iceberg_storage_objects + iceberg_storage_auth_objects + iceberg_storage_table_update_objects + iceberg_storage_create_table_objects + rest_catalog_objects +) + +foreach(tgt IN LISTS ICEBERG_OBJECT_LIBS) + if(TARGET ${tgt}) + if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # AWSSDK variables come from find_package(AWSSDK ...) + if(DEFINED AWSSDK_LINK_LIBRARIES) + target_link_libraries(${tgt} PUBLIC ${AWSSDK_LINK_LIBRARIES}) + endif() + if(DEFINED CURL_LIBRARIES) + target_link_libraries(${tgt} PUBLIC ${CURL_LIBRARIES}) + endif() + endif() + + # Link roaring imported targets (these are imported targets provided by the package) + # This will propagate include directories for + target_link_libraries(${tgt} PUBLIC roaring::roaring roaring::roaring-headers roaring::roaring-headers-cpp) + + # Ensure sources in these OBJECT libraries see headers in src/include + target_include_directories(${tgt} PUBLIC ${PROJECT_INC_DIR}) + endif() +endforeach() + +add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES} ${ALL_OBJECT_FILES}) + +set(PARAMETERS "-warnings") +build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES} ${ALL_OBJECT_FILES}) + + # Reset the TARGET_NAME, the AWS find_package build could bleed into our build - # overriding `TARGET_NAME` set(TARGET_NAME iceberg) @@ -116,6 +88,10 @@ endif() target_link_libraries(${EXTENSION_NAME} PUBLIC roaring::roaring roaring::roaring-headers roaring::roaring-headers-cpp) target_link_libraries(${TARGET_NAME}_loadable_extension roaring::roaring roaring::roaring-headers roaring::roaring-headers-cpp) +# Attach the project include directory to the final targets so IDEs index correctly +target_include_directories(${EXTENSION_NAME} PUBLIC ${PROJECT_INC_DIR}) +target_include_directories(${TARGET_NAME}_loadable_extension PUBLIC ${PROJECT_INC_DIR}) + install( TARGETS ${EXTENSION_NAME} ${TARGET_NAME}_loadable_extension EXPORT "${DUCKDB_EXPORT_SET}" diff --git a/extension_config.cmake b/extension_config.cmake index a65e0f795..66a164e2c 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -1,3 +1,9 @@ +# Extension from this repo +duckdb_extension_load(iceberg + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} + LOAD_TESTS +) + # This file is included by DuckDB's build system. It specifies which extension to load if (NOT EMSCRIPTEN) duckdb_extension_load(avro @@ -7,12 +13,6 @@ duckdb_extension_load(avro ) endif() -# Extension from this repo -duckdb_extension_load(iceberg - SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} - LOAD_TESTS -) - if (NOT EMSCRIPTEN) duckdb_extension_load(tpch) duckdb_extension_load(icu) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 000000000..9e9504192 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,31 @@ +add_subdirectory(common) +add_subdirectory(deletes) +add_subdirectory(metadata) +add_subdirectory(iceberg_functions) +add_subdirectory(utils) +add_subdirectory(storage) +add_subdirectory(rest_catalog/objects) + +# Top-level src/ translation units (those living directly under src/) +add_library( + iceberg_src_objects OBJECT + iceberg_extension.cpp + iceberg_functions.cpp + iceberg_manifest.cpp + iceberg_manifest_list.cpp + avro_scan.cpp + iceberg_snapshot_lookup.cpp + catalog_api.cpp + iceberg_logging.cpp + catalog_utils.cpp + aws.cpp + hash_utils.cpp + base_manifest_reader.cpp + manifest_list_reader.cpp + manifest_file_reader.cpp + iceberg_predicate.cpp + iceberg_value.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/catalog_api.cpp b/src/catalog_api.cpp index fd1029a67..3383bca02 100644 --- a/src/catalog_api.cpp +++ b/src/catalog_api.cpp @@ -1,6 +1,4 @@ #include "catalog_api.hpp" -#include "include/catalog_api.hpp" - #include "catalog_utils.hpp" #include "iceberg_logging.hpp" #include "storage/irc_catalog.hpp" @@ -14,9 +12,7 @@ #include "duckdb/common/error_data.hpp" #include "duckdb/common/http_util.hpp" #include "duckdb/common/exception/http_exception.hpp" -#include "include/storage/irc_authorization.hpp" -#include "include/storage/irc_catalog.hpp" - +#include "storage/irc_authorization.hpp" #include "rest_catalog/objects/list.hpp" using namespace duckdb_yyjson; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt new file mode 100644 index 000000000..2a845b9f5 --- /dev/null +++ b/src/common/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(iceberg_common_objects OBJECT utils.cpp url_utils.cpp iceberg.cpp + api_utils.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/deletes/CMakeLists.txt b/src/deletes/CMakeLists.txt new file mode 100644 index 000000000..7fad018db --- /dev/null +++ b/src/deletes/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(iceberg_deletes_objects OBJECT + equality_delete.cpp positional_delete.cpp deletion_vector.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/iceberg_functions/CMakeLists.txt b/src/iceberg_functions/CMakeLists.txt new file mode 100644 index 000000000..689f675c8 --- /dev/null +++ b/src/iceberg_functions/CMakeLists.txt @@ -0,0 +1,17 @@ +add_library( + iceberg_functions_objects OBJECT + iceberg_multi_file_reader.cpp + iceberg_avro_multi_file_reader.cpp + iceberg_deletes_file_reader.cpp + iceberg_multi_file_list.cpp + iceberg_snapshots.cpp + iceberg_scan.cpp + iceberg_metadata.cpp + iceberg_table_properties_functions.cpp + iceberg_to_ducklake.cpp + iceberg_column_stats.cpp + iceberg_partition_stats.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/metadata/CMakeLists.txt b/src/metadata/CMakeLists.txt new file mode 100644 index 000000000..813523eac --- /dev/null +++ b/src/metadata/CMakeLists.txt @@ -0,0 +1,14 @@ +add_library( + iceberg_metadata_objects OBJECT + iceberg_transform.cpp + iceberg_table_schema.cpp + iceberg_partition_spec.cpp + iceberg_sort_order.cpp + iceberg_snapshot.cpp + iceberg_field_mapping.cpp + iceberg_column_definition.cpp + iceberg_table_metadata.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 000000000..71dc41287 --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,23 @@ +add_subdirectory(authorization) +add_subdirectory(table_update) +add_subdirectory(create_table) + +add_library( + iceberg_storage_objects OBJECT + iceberg_insert.cpp + iceberg_delete.cpp + iceberg_update.cpp + iceberg_transaction_data.cpp + irc_authorization.cpp + irc_catalog.cpp + irc_schema_entry.cpp + irc_schema_set.cpp + irc_table_entry.cpp + irc_table_set.cpp + irc_transaction.cpp + irc_transaction_manager.cpp + iceberg_table_information.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/storage/authorization/CMakeLists.txt b/src/storage/authorization/CMakeLists.txt new file mode 100644 index 000000000..d80cf8776 --- /dev/null +++ b/src/storage/authorization/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(iceberg_storage_auth_objects OBJECT sigv4.cpp none.cpp oauth2.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/storage/create_table/CMakeLists.txt b/src/storage/create_table/CMakeLists.txt new file mode 100644 index 000000000..b36d1f36b --- /dev/null +++ b/src/storage/create_table/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(iceberg_storage_create_table_objects OBJECT + iceberg_create_table_request.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/storage/table_update/CMakeLists.txt b/src/storage/table_update/CMakeLists.txt new file mode 100644 index 000000000..0f9751655 --- /dev/null +++ b/src/storage/table_update/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(iceberg_storage_table_update_objects OBJECT iceberg_add_snapshot.cpp + common.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt new file mode 100644 index 000000000..32e32e884 --- /dev/null +++ b/src/utils/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(iceberg_utils_objects OBJECT iceberg_type.cpp) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE)