diff --git a/.gersemirc b/.gersemirc index 6d6ca521020..ebf5981dbfd 100644 --- a/.gersemirc +++ b/.gersemirc @@ -5,7 +5,6 @@ indent: 2 definitions: - CMake/third-party/FBCMakeParseArgs.cmake - CMake/third-party/FBThriftCppLibrary.cmake - - CMake/FindThrift.cmake - velox/experimental/breeze/cmake - velox/experimental/breeze/test - velox/experimental/breeze/perftest diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000000..2ccc0476447 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,140 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Ubuntu Benchmark + +on: + pull_request: + paths: + - .github/workflows/benchmark.yml + - scripts/ci/benchmark-requirements.txt + - scripts/setup-ubuntu.sh + + push: + branches: [main] + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.sha }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + benchmark: + if: github.repository == 'facebookincubator/velox' + runs-on: 8-core-ubuntu-22.04 + env: + CCACHE_DIR: ${{ github.workspace }}/ccache/ + CCACHE_BASEDIR: ${{ github.workspace }} + BINARY_DIR: ${{ github.workspace }}/benchmarks/ + CONTENDER_OUTPUT_PATH: ${{ github.workspace }}/benchmark-results/contender/ + INSTALL_PREFIX: ${{ github.workspace }}/dependencies + steps: + + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + path: velox + + persist-credentials: false + + - name: Restore Dependencies + uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + id: restore-deps + with: + path: ${{ env.INSTALL_PREFIX }} + key: dependencies-benchmark-${{ hashFiles('velox/scripts/setup-ubuntu.sh') }} + + - name: Install apt dependencies + env: + UV_TOOL_BIN_DIR: /usr/local/bin + run: | + source velox/scripts/setup-ubuntu.sh + install_apt_deps + + - name: Install compiled dependencies + if: ${{ steps.restore-deps.outputs.cache-hit != 'true' }} + env: + CCACHE_DISABLE: 'true' + run: | + source velox/scripts/setup-ubuntu.sh + run_and_time install_fmt + run_and_time install_protobuf + run_and_time install_boost + run_and_time install_fast_float + run_and_time install_folly + run_and_time install_fizz + run_and_time install_wangle + run_and_time install_mvfst + run_and_time install_fbthrift + run_and_time install_stemmer + run_and_time install_arrow + + - name: Save Dependencies + if: ${{ steps.restore-deps.outputs.cache-hit != 'true' }} + uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0 + with: + path: ${{ env.INSTALL_PREFIX }} + key: dependencies-benchmark-${{ hashFiles('velox/scripts/setup-ubuntu.sh') }} + + - name: Restore ccache + uses: apache/infrastructure-actions/stash/restore@3354c1565d4b0e335b78a76aedd82153a9e144d4 + id: restore-cache + with: + clean: true + path: ccache + key: ccache-benchmark + + - name: Clear CCache Statistics + run: | + ccache -sz + + - name: Build Contender Benchmarks + working-directory: velox + run: | + n_cores=$(nproc) + make benchmarks-basic-build NUM_THREADS=$n_cores MAX_HIGH_MEM_JOBS=$n_cores MAX_LINK_JOBS=$n_cores + mkdir -p ${BINARY_DIR}/contender/ + cp -r --verbose _build/release/velox/benchmarks/basic/* ${BINARY_DIR}/contender/ + + - name: CCache after + run: | + ccache -vs + + - name: Save ccache" + uses: apache/infrastructure-actions/stash/save@3354c1565d4b0e335b78a76aedd82153a9e144d4 + with: + path: ccache + key: ccache-benchmark + + - name: Install benchmark dependencies + run: | + python3 -m pip install -r velox/scripts/ci/benchmark-requirements.txt + + - name: Run Benchmarks - Contender + working-directory: velox + run: | + make benchmarks-basic-run \ + EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/contender/ --output_path ${CONTENDER_OUTPUT_PATH}" + + - name: Upload result artifact + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + path: benchmark-results + name: benchmark-results + retention-days: 5 diff --git a/.github/workflows/linux-build-base.yml b/.github/workflows/linux-build-base.yml index 401f3fa48f5..39fb9b1ce29 100644 --- a/.github/workflows/linux-build-base.yml +++ b/.github/workflows/linux-build-base.yml @@ -111,7 +111,7 @@ jobs: - name: Install Dependencies env: VELOX_BUILD_SHARED: "ON" - VELOX_ARROW_CMAKE_PATCH: ${{ github.workspace }}/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch + VELOX_FBTHRIFT_CMAKE_PATCH: ${{ github.workspace }}/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch run: | if git diff --name-only HEAD^1 HEAD | grep -q "scripts/setup-"; then echo "Removing previous AWS SDK and s2n installations to avoid conflicts..." @@ -123,6 +123,7 @@ jobs: mkdir /tmp/build cd /tmp/build source /opt/rh/gcc-toolset-12/enable + export VELOX_ARROW_CMAKE_PATCH="${GITHUB_WORKSPACE}/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch ${GITHUB_WORKSPACE}/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch" # Install basic deps with GCC. Some deps have problems (e.g. folly missing atomic lib). USE_CLANG=false bash /setup-centos9.sh @@ -434,7 +435,8 @@ jobs: - name: Install Dependencies env: - VELOX_ARROW_CMAKE_PATCH: ${{ github.workspace }}/velox/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch + VELOX_BUILD_SHARED: "ON" + VELOX_FBTHRIFT_CMAKE_PATCH: ${{ github.workspace }}/velox/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch run: | if git diff --name-only HEAD^1 HEAD | grep -q "scripts/setup-"; then # Overwrite old setup scripts with changed versions @@ -443,6 +445,7 @@ jobs: mkdir /tmp/build cd /tmp/build + export VELOX_ARROW_CMAKE_PATCH="${GITHUB_WORKSPACE}/velox/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch ${GITHUB_WORKSPACE}/velox/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch" USE_CLANG=false bash /setup-ubuntu.sh cd / @@ -460,6 +463,26 @@ jobs: run: | mkdir -p "$CCACHE_DIR" + - uses: actions/checkout@v5 + with: + path: velox + persist-credentials: false + + - name: Install Dependencies + run: | + source scripts/setup-ubuntu.sh + install_apt_deps + install_faiss_deps + # We can remove them once we bundle FBThrift. + install_fmt + install_boost + install_fast_float + install_folly + install_fizz + install_wangle + install_mvfst + install_fbthrift + - name: Clear CCache Statistics run: | ccache -sz @@ -469,10 +492,14 @@ jobs: env: VELOX_DEPENDENCY_SOURCE: SYSTEM ICU_SOURCE: SYSTEM - # Use BUNDLED gflags to provide PIC static gflags for .so plugins. - # The container's folly is built with -DGFLAGS_SHARED=FALSE so its - # exported config references gflags_static which BUNDLED gflags provides. - gflags_SOURCE: BUNDLED + # FBThrift has transitive Boost dependency. We can't mix + # bundled Boost and system Boost. + Boost_SOURCE: SYSTEM + # Recent FBThrift needs recent Folly. + folly_SOURCE: SYSTEM + # System FBThrift is built with system GFlags. We need to + # use system GFlags with system FBThrift. + gflags_SOURCE: SYSTEM # Keep system glog (container's glog is built against the same gflags # version). Without this, BUNDLED gflags cascades to BUNDLED glog # which conflicts with system glog loaded transitively. @@ -597,7 +624,7 @@ jobs: - name: Install Dependencies env: VELOX_BUILD_SHARED: "ON" - VELOX_ARROW_CMAKE_PATCH: ${{ github.workspace }}/velox/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch + VELOX_FBTHRIFT_CMAKE_PATCH: ${{ github.workspace }}/velox/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch run: | if git diff --name-only HEAD^1 HEAD | grep -q "scripts/setup-"; then # Overwrite old setup scripts with changed versions @@ -606,7 +633,9 @@ jobs: mkdir /tmp/build cd /tmp/build - # Install basic deps with GCC. + # Use absolute path within the container for patches + export VELOX_ARROW_CMAKE_PATCH="${GITHUB_WORKSPACE}/velox/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch ${GITHUB_WORKSPACE}/velox/CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch" + # Install basic deps with GCC. Some deps have problems (e.g. folly missing atomic lib). USE_CLANG=false bash /setup-fedora.sh cd / @@ -633,13 +662,11 @@ jobs: env: VELOX_DEPENDENCY_SOURCE: SYSTEM faiss_SOURCE: BUNDLED - fmt_SOURCE: BUNDLED simdjson_SOURCE: BUNDLED gRPC_SOURCE: SYSTEM MAKEFLAGS: NUM_THREADS=32 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=6 EXTRA_CMAKE_FLAGS: >- -DVELOX_ENABLE_PARQUET=ON - -DARROW_THRIFT_USE_SHARED=ON -DVELOX_ENABLE_EXAMPLES=ON run: | uv tool install --force cmake@3.31.1 diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 69a45d4950e..e81ec9d7ab5 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -66,6 +66,12 @@ jobs: with: persist-credentials: false + - name: Cache ccache + uses: apache/infrastructure-actions/stash/restore@3354c1565d4b0e335b78a76aedd82153a9e144d4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-macos-1-macos-15-${{ matrix.type }} + - name: Install Dependencies env: HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: 'TRUE' @@ -76,6 +82,17 @@ jobs: install_velox_deps_from_brew install_gflags install_glog + # We can use Homebrew's FBThrift once Homebrew's Folly + # enables 128bit integer support. We need to fix + # https://github.com/facebook/folly/issues/1666 for it. + install_boost + install_fmt + install_fast_float + install_folly + install_fizz + install_wangle + install_mvfst + install_fbthrift # Needed for faiss to find BLAS install_faiss_deps install_double_conversion @@ -88,15 +105,8 @@ jobs: fi echo "$INSTALL_PREFIX/bin" >> $GITHUB_PATH - - name: Cache ccache - uses: apache/infrastructure-actions/stash/restore@3354c1565d4b0e335b78a76aedd82153a9e144d4 - with: - path: ${{ env.CCACHE_DIR }} - key: ccache-macos-1-macos-15-${{ matrix.type }} - - name: Configure Build env: - fmt_SOURCE: BUNDLED #brew fmt11 is not supported faiss_SOURCE: BUNDLED #brew faiss is not supported CMAKE_POLICY_VERSION_MINIMUM: '3.5' run: | diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 8b774149d4f..49ea1d00ab0 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -95,7 +95,7 @@ jobs: # prevent errors when forks ff their main branch if: ${{ github.repository == 'facebookincubator/velox' }} runs-on: 32-core-ubuntu - container: ghcr.io/facebookincubator/velox-dev:centos9 + container: ghcr.io/czentgr/czentgr-test:dependencies timeout-minutes: 120 env: CCACHE_DIR: ${{ github.workspace }}/ccache @@ -664,7 +664,7 @@ jobs: spark-aggregate-fuzzer-run: name: Spark Aggregate Fuzzer runs-on: 4-core-ubuntu - container: ghcr.io/facebookincubator/velox-dev:spark-server + container: ghcr.io/czentgr/czentgr-test:spark-java needs: compile timeout-minutes: 30 steps: diff --git a/.github/workflows/ubuntu-bundled-deps.yml b/.github/workflows/ubuntu-bundled-deps.yml index 801e93d7d14..735eb340ccc 100644 --- a/.github/workflows/ubuntu-bundled-deps.yml +++ b/.github/workflows/ubuntu-bundled-deps.yml @@ -70,7 +70,11 @@ jobs: - name: Install Dependencies run: | - source scripts/setup-ubuntu.sh && install_apt_deps && install_faiss_deps + source scripts/setup-ubuntu.sh && \ + install_apt_deps && install_faiss_deps && \ + install_fmt && install_boost && install_fast_float && \ + install_folly && install_fizz && install_wangle && install_mvfst && \ + install_fbthrift - name: Clear CCache Statistics run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3124eae3d29..6cad5b27ccf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -76,7 +76,7 @@ repos: require_serial: true exclude: | (?x)^( - CMake/Find(Snappy|Sodium|Thrift|double-conversion)\.cmake| + CMake/Find(Snappy|Sodium|double-conversion)\.cmake| velox/docs/affiliations_map.txt| velox/.*/bitpacking\.(cpp|h)| velox/.*/Lemire/.*| diff --git a/CMake/FindArrow.cmake b/CMake/FindArrow.cmake index 99bd6412f6f..0c195c0841a 100644 --- a/CMake/FindArrow.cmake +++ b/CMake/FindArrow.cmake @@ -17,7 +17,6 @@ include(FindPackageHandleStandardArgs) find_library(ARROW_LIB libarrow.a) find_library(ARROW_TESTING_LIB libarrow_testing.a) find_path(ARROW_INCLUDE_PATH arrow/api.h) -find_package(Thrift) find_package_handle_standard_args( Arrow @@ -25,23 +24,18 @@ find_package_handle_standard_args( ARROW_LIB ARROW_TESTING_LIB ARROW_INCLUDE_PATH - Thrift_FOUND ) # Only add the libraries once. if(Arrow_FOUND AND NOT TARGET arrow) add_library(arrow STATIC IMPORTED GLOBAL) add_library(arrow_testing STATIC IMPORTED GLOBAL) - add_library(thrift ALIAS thrift::thrift) set_target_properties( arrow arrow_testing PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ARROW_INCLUDE_PATH} ) - set_target_properties( - arrow - PROPERTIES IMPORTED_LOCATION ${ARROW_LIB} INTERFACE_LINK_LIBRARIES thrift - ) + set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_LIB}) set_target_properties(arrow_testing PROPERTIES IMPORTED_LOCATION ${ARROW_TESTING_LIB}) endif() diff --git a/CMake/FindThrift.cmake b/CMake/FindThrift.cmake deleted file mode 100644 index 48c1a84b1d1..00000000000 --- a/CMake/FindThrift.cmake +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2012 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# * Find Thrift (a cross platform RPC lib/tool) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# Thrift_ROOT - When set, this path is inspected instead of standard library -# locations as the root of the Thrift installation. The environment variable -# THRIFT_HOME overrides this variable. -# -# This module defines Thrift_FOUND, whether Thrift is found or not -# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not -# -# thrift::thrift, a library target to use Thrift thrift::compiler, a executable -# target to use Thrift compiler - -function(EXTRACT_THRIFT_VERSION) - if(THRIFT_INCLUDE_DIR) - file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT) - string( - REGEX MATCH - "#define PACKAGE_VERSION \"[0-9.]+\"" - THRIFT_VERSION_DEFINITION - "${THRIFT_CONFIG_H_CONTENT}" - ) - string(REGEX MATCH "[0-9.]+" Thrift_VERSION "${THRIFT_VERSION_DEFINITION}") - set(Thrift_VERSION "${Thrift_VERSION}" PARENT_SCOPE) - else() - set(Thrift_VERSION "" PARENT_SCOPE) - endif() -endfunction(EXTRACT_THRIFT_VERSION) - -if(MSVC_TOOLCHAIN AND NOT DEFINED THRIFT_MSVC_LIB_SUFFIX) - if(NOT ARROW_THRIFT_USE_SHARED) - if(ARROW_USE_STATIC_CRT) - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(THRIFT_MSVC_LIB_SUFFIX "mtd") - else() - set(THRIFT_MSVC_LIB_SUFFIX "mt") - endif() - else() - if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") - set(THRIFT_MSVC_LIB_SUFFIX "mdd") - else() - set(THRIFT_MSVC_LIB_SUFFIX "md") - endif() - endif() - endif() -endif() -set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}") - -if(ARROW_THRIFT_USE_SHARED) - set(THRIFT_LIB_NAMES thrift) - if(CMAKE_IMPORT_LIBRARY_SUFFIX) - list( - APPEND - THRIFT_LIB_NAMES - "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" - ) - endif() - list( - APPEND - THRIFT_LIB_NAMES - "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) -else() - set( - THRIFT_LIB_NAMES - "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) -endif() - -if(Thrift_ROOT) - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" - ) - find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATHS ${Thrift_ROOT} PATH_SUFFIXES "include") - find_program(THRIFT_COMPILER thrift PATHS ${Thrift_ROOT} PATH_SUFFIXES "bin") - EXTRACT_THRIFT_VERSION() -else() - # THRIFT-4760: The pkgconfig files are currently only installed when using - # autotools. Starting with 0.13, they are also installed for the CMake-based - # installations of Thrift. - find_package(PkgConfig QUIET) - pkg_check_modules(THRIFT_PC thrift) - if(THRIFT_PC_FOUND) - set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}") - - list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}") - - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${THRIFT_PC_LIBRARY_DIRS} - NO_DEFAULT_PATH - ) - find_program( - THRIFT_COMPILER - thrift - HINTS ${THRIFT_PC_PREFIX} - NO_DEFAULT_PATH - PATH_SUFFIXES "bin" - ) - set(Thrift_VERSION ${THRIFT_PC_VERSION}) - else() - find_library( - THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib" - ) - find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include") - find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin") - EXTRACT_THRIFT_VERSION() - endif() -endif() - -if(THRIFT_COMPILER) - set(Thrift_COMPILER_FOUND TRUE) -else() - set(Thrift_COMPILER_FOUND FALSE) -endif() - -find_package_handle_standard_args( - Thrift - REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR - VERSION_VAR Thrift_VERSION - HANDLE_COMPONENTS -) - -if(Thrift_FOUND) - if(ARROW_THRIFT_USE_SHARED) - add_library(thrift::thrift SHARED IMPORTED) - else() - add_library(thrift::thrift STATIC IMPORTED) - endif() - set_target_properties( - thrift::thrift - PROPERTIES - IMPORTED_LOCATION "${THRIFT_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${THRIFT_INCLUDE_DIR}" - ) - if(WIN32 AND NOT MSVC_TOOLCHAIN) - # We don't need this for Visual C++ because Thrift uses "#pragma - # comment(lib, "Ws2_32.lib")" in thrift/windows/config.h for Visual C++. - set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32") - endif() - - if(Thrift_COMPILER_FOUND) - add_executable(thrift::compiler IMPORTED) - set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION "${THRIFT_COMPILER}") - endif() -endif() diff --git a/CMake/VeloxConfig.cmake.in b/CMake/VeloxConfig.cmake.in index 158f47277b4..bac5d755d68 100644 --- a/CMake/VeloxConfig.cmake.in +++ b/CMake/VeloxConfig.cmake.in @@ -57,9 +57,6 @@ block() if("@simdjson_SOURCE@" STREQUAL "SYSTEM") find_dependency(simdjson) endif() - if("@Thrift_FOUND@") - find_dependency(Thrift) - endif() if("@xsimd_SOURCE@" STREQUAL "SYSTEM") find_dependency(xsimd) endif() diff --git a/CMake/VeloxUtils.cmake b/CMake/VeloxUtils.cmake index 5095b987438..1368fdd8379 100644 --- a/CMake/VeloxUtils.cmake +++ b/CMake/VeloxUtils.cmake @@ -173,7 +173,7 @@ function(velox_add_library TARGET) ) endif() endif() - # create alias for compatability + # create alias for compatibility if(NOT TARGET ${TARGET}) add_library(${TARGET} ALIAS velox) endif() diff --git a/CMake/resolve_dependency_modules/README.md b/CMake/resolve_dependency_modules/README.md index 83d70a2c54e..ff6e184ea19 100644 --- a/CMake/resolve_dependency_modules/README.md +++ b/CMake/resolve_dependency_modules/README.md @@ -43,7 +43,6 @@ by Velox. See details on bundling below. | geos | 3.10.7 | Yes || | fast_float | v8.0.2 | Yes || | xxhash | default | No || -| thrift | 0.16 | No || # Bundled Dependency Management This module provides a dependency management system that allows us to automatically fetch and build dependencies from source if needed. diff --git a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt index 4f4e4031934..64d0c99508d 100644 --- a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt +++ b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt @@ -14,12 +14,6 @@ project(Arrow) if(VELOX_ENABLE_ARROW) - if(Thrift_FOUND) - set(THRIFT_SOURCE "SYSTEM") - else() - set(THRIFT_SOURCE "BUNDLED") - endif() - # Avoid issues in finding the boost headers and libraries # by setting the BOOST_ROOT to the install prefix. # The same logic is used in the setup script-common.sh to install boost. @@ -38,7 +32,6 @@ if(VELOX_ENABLE_ARROW) ARROW_CMAKE_ARGS -DARROW_PARQUET=OFF -DARROW_DEPENDENCY_SOURCE=AUTO - -DARROW_WITH_THRIFT=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_ZLIB=ON @@ -52,27 +45,15 @@ if(VELOX_ENABLE_ARROW) -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DARROW_BUILD_STATIC=ON - -DThrift_SOURCE=${THRIFT_SOURCE} -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} -DBOOST_ROOT=${BOOST_ROOT} + -DBoost_NO_BOOST_CMAKE=TRUE -DCMAKE_POLICY_VERSION_MINIMUM=3.5 # Remove with Arrow upgrade to Arrow 20. -DARROW_CXXFLAGS=-Wno-documentation ) set(ARROW_LIBDIR ${ARROW_PREFIX}/install/lib) - add_library(thrift STATIC IMPORTED GLOBAL) - if(NOT Thrift_FOUND) - set(THRIFT_ROOT ${ARROW_PREFIX}/src/arrow_ep-build/thrift_ep-install) - set(THRIFT_LIB ${THRIFT_ROOT}/lib/libthrift.a) - - file(MAKE_DIRECTORY ${THRIFT_ROOT}/include) - set(THRIFT_INCLUDE_DIR ${THRIFT_ROOT}/include) - endif() - - set_property(TARGET thrift PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${THRIFT_INCLUDE_DIR}) - set_property(TARGET thrift PROPERTY IMPORTED_LOCATION ${THRIFT_LIB}) - set(VELOX_ARROW_BUILD_VERSION 18.0.0) set( VELOX_ARROW_BUILD_SHA256_CHECKSUM @@ -92,10 +73,10 @@ if(VELOX_ENABLE_ARROW) URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM} SOURCE_SUBDIR cpp CMAKE_ARGS ${ARROW_CMAKE_ARGS} - BUILD_BYPRODUCTS ${ARROW_LIBDIR}/libarrow.a ${ARROW_LIBDIR}/libarrow_testing.a ${THRIFT_LIB} + BUILD_BYPRODUCTS ${ARROW_LIBDIR}/libarrow.a ${ARROW_LIBDIR}/libarrow_testing.a PATCH_COMMAND - git apply ${CMAKE_CURRENT_LIST_DIR}/cmake-compatibility.patch && git apply - ${CMAKE_CURRENT_LIST_DIR}/thrift-download.patch + patch -p1 -i ${CMAKE_CURRENT_LIST_DIR}/cmake-compatibility.patch && patch -p1 -i + ${CMAKE_CURRENT_LIST_DIR}/arrow-testing-boost.patch ) add_library(arrow STATIC IMPORTED GLOBAL) @@ -109,7 +90,7 @@ if(VELOX_ENABLE_ARROW) PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ARROW_PREFIX}/install/include ) set_target_properties(arrow PROPERTIES IMPORTED_LOCATION ${ARROW_LIBDIR}/libarrow.a) - set_property(TARGET arrow PROPERTY INTERFACE_LINK_LIBRARIES ${RE2} thrift) + set_property(TARGET arrow PROPERTY INTERFACE_LINK_LIBRARIES ${RE2}) set_target_properties( arrow_testing PROPERTIES IMPORTED_LOCATION ${ARROW_LIBDIR}/libarrow_testing.a diff --git a/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch b/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch new file mode 100644 index 00000000000..110bb066fae --- /dev/null +++ b/CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +This is based on +https://github.com/apache/arrow/pull/45424 . + +diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake +index db151b4..1da99f1 100644 +--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake +@@ -1266,13 +1266,19 @@ endif() + # - Gandiva has a compile-time (header-only) dependency on Boost, not runtime. + # - Tests need Boost at runtime. + # - S3FS and Flight benchmarks need Boost at runtime. ++# - arrow_testing uses boost::filesystem. So arrow_testing requires ++# Boost library. (boost::filesystem isn't header-only.) But if we ++# use arrow_testing as a static library without ++# using arrow::util::Process, we don't need boost::filesystem. + if(ARROW_BUILD_INTEGRATION + OR ARROW_BUILD_TESTS + OR (ARROW_FLIGHT AND (ARROW_TESTING OR ARROW_BUILD_BENCHMARKS)) +- OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS)) ++ OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS) ++ OR (ARROW_TESTING AND ARROW_BUILD_SHARED)) + set(ARROW_USE_BOOST TRUE) + set(ARROW_BOOST_REQUIRE_LIBRARY TRUE) + elseif(ARROW_GANDIVA ++ OR ARROW_TESTING + OR ARROW_WITH_THRIFT + OR (NOT ARROW_USE_NATIVE_INT128)) + set(ARROW_USE_BOOST TRUE) +diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt +index c911f0f..84673d4 100644 +--- a/cpp/src/arrow/CMakeLists.txt ++++ b/cpp/src/arrow/CMakeLists.txt +@@ -645,11 +645,10 @@ else() + endif() + + set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST}) +-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process) ++set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON) + set(ARROW_TESTING_STATIC_LINK_LIBS + arrow::flatbuffers + RapidJSON +- Boost::process + arrow_static + ${ARROW_GTEST_GTEST}) + set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) +@@ -674,7 +673,6 @@ set(ARROW_TESTING_SRCS + testing/fixed_width_test_util.cc + testing/generator.cc + testing/gtest_util.cc +- testing/process.cc + testing/random.cc + testing/util.cc) + diff --git a/CMake/resolve_dependency_modules/arrow/thrift-download.patch b/CMake/resolve_dependency_modules/arrow/thrift-download.patch deleted file mode 100644 index 92b8d87dd08..00000000000 --- a/CMake/resolve_dependency_modules/arrow/thrift-download.patch +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- a/cpp/thirdparty/versions.txt -+++ b/cpp/thirdparty/versions.txt -@@ -108,7 +108,7 @@ - ARROW_S2N_TLS_BUILD_VERSION=v1.3.35 - ARROW_S2N_TLS_BUILD_SHA256_CHECKSUM=9d32b26e6bfcc058d98248bf8fc231537e347395dd89cf62bb432b55c5da990d - ARROW_THRIFT_BUILD_VERSION=0.16.0 --ARROW_THRIFT_BUILD_SHA256_CHECKSUM=f460b5c1ca30d8918ff95ea3eb6291b3951cf518553566088f3f2be8981f6209 -+ARROW_THRIFT_BUILD_SHA256_CHECKSUM=df2931de646a366c2e5962af679018bca2395d586e00ba82d09c0379f14f8e7b - ARROW_UCX_BUILD_VERSION=1.12.1 - ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 - ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 -@@ -164,7 +164,7 @@ - "ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz" - "ARROW_S2N_TLS_URL s2n-${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz https://github.com/aws/s2n-tls/archive/${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz" - "ARROW_SNAPPY_URL snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz" -- "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://archive.apache.org/dist/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -+ "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://github.com/apache/thrift/archive/refs/tags/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz" - "ARROW_UCX_URL ucx-${ARROW_UCX_BUILD_VERSION}.tar.gz https://github.com/openucx/ucx/archive/v${ARROW_UCX_BUILD_VERSION}.tar.gz" - "ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" - "ARROW_XSIMD_URL xsimd-${ARROW_XSIMD_BUILD_VERSION}.tar.gz https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz" ---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake -+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake -@@ -817,20 +817,7 @@ - set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}") - else() - set_urls(THRIFT_SOURCE_URL -- "https://www.apache.org/dyn/closer.cgi?action=download&filename=/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://downloads.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://apache.claz.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://apache.cs.utah.edu/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://apache.mirrors.lucidnetworks.net/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://apache.osuosl.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://ftp.wayne.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://mirror.olnevhost.net/pub/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://mirrors.gigenet.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://mirrors.koehn.com/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://mirrors.ocf.berkeley.edu/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://mirrors.sonic.net/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "https://us.mirrors.quenda.co/apache/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" -- "${THIRDPARTY_MIRROR_URL}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz") -+ "https://github.com/apache/thrift/archive/refs/tags/v${ARROW_THRIFT_BUILD_VERSION}.tar.gz") - endif() - - if(DEFINED ENV{ARROW_UCX_URL}) diff --git a/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch b/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch new file mode 100644 index 00000000000..d1f57260d35 --- /dev/null +++ b/CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch @@ -0,0 +1,27 @@ +--- a/thrift/lib/cpp2/protocol/CompactProtocol.h ++++ b/thrift/lib/cpp2/protocol/CompactProtocol.h +@@ -380,6 +380,7 @@ class CompactProtocolReader : public detail::ProtocolBase { + template + friend class ProtocolReaderWithRefill; + friend class CompactProtocolReaderWithRefill; ++ friend class CompactV1ProtocolReaderWithRefill; + }; + + struct CompactProtocolReader::StructReadState { +--- a/thrift/lib/cpp2/protocol/CompactV1Protocol.h ++++ b/thrift/lib/cpp2/protocol/CompactV1Protocol.h +@@ -140,7 +140,14 @@ class CompactV1ProtocolReader : protected CompactProtocolReader { + using CompactProtocolReader::getCursor; + using CompactProtocolReader::getCursorPosition; + ++ using CompactProtocolReader::kOmitsStringSizes; ++ using CompactProtocolReader::kHasDeferredRead; ++ + static constexpr bool kSupportsArithmeticVectors() { return false; } ++ ++ template ++ friend class ProtocolReaderWithRefill; ++ friend class CompactV1ProtocolReaderWithRefill; + }; + + } // namespace apache::thrift diff --git a/CMake/third-party/FBThriftCppLibrary.cmake b/CMake/third-party/FBThriftCppLibrary.cmake index 416a88b752f..4807f90c26e 100644 --- a/CMake/third-party/FBThriftCppLibrary.cmake +++ b/CMake/third-party/FBThriftCppLibrary.cmake @@ -114,6 +114,7 @@ function(add_fbthrift_cpp_library LIB_NAME THRIFT_FILE) COMMAND "${CMAKE_COMMAND}" -E make_directory "${output_dir}" COMMAND + "${CMAKE_COMMAND}" -E env "LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$ENV{LD_LIBRARY_PATH}" "${FBTHRIFT_COMPILER}" --legacy-strict --gen "mstch_cpp2:${GEN_ARG_STR}" @@ -137,10 +138,7 @@ function(add_fbthrift_cpp_library LIB_NAME THRIFT_FILE) set(LIB_TYPE STATIC) endif () - add_library( - "${LIB_NAME}" ${LIB_TYPE} - ${generated_sources} - ) + add_library("${LIB_NAME}" STATIC ${generated_sources}) target_include_directories( "${LIB_NAME}" diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d3e37397ae..77721983f4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -339,8 +339,8 @@ endif() if(VELOX_ENABLE_PARQUET) add_definitions(-DVELOX_ENABLE_PARQUET) - # Native Parquet reader requires Apache Thrift and Arrow Parquet writer, which - # are included in Arrow. + # Native Parquet reader requires Arrow Parquet writer, which are + # included in Arrow. set(VELOX_ENABLE_ARROW ON) endif() @@ -404,9 +404,9 @@ endif() message("Setting CMAKE_CXX_FLAGS=${SCRIPT_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SCRIPT_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D USE_VELOX_COMMON_BASE") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_VELOX_COMMON_BASE") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D HAS_UNCAUGHT_EXCEPTIONS") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAS_UNCAUGHT_EXCEPTIONS") if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char") endif() @@ -547,19 +547,31 @@ velox_resolve_dependency( ) set( - BOOST_INCLUDE_LIBRARIES + BOOST_COMPONENTS atomic context date_time filesystem program_options regex - system thread ) +set( + BOOST_OPTIONAL_COMPONENTS + system # Boost.System is header only since Boost 1.89 +) velox_set_source(Boost) -velox_resolve_dependency(Boost 1.77.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES}) +velox_resolve_dependency( + Boost + 1.77.0 + MODULE + COMPONENTS ${BOOST_COMPONENTS} + OPTIONAL_COMPONENTS ${BOOST_OPTIONAL_COMPONENTS} +) +if(NOT TARGET Boost::system) + add_library(Boost::system ALIAS Boost::headers) +endif() velox_set_source(gflags) @@ -653,7 +665,7 @@ if(${VELOX_BUILD_TESTING}) velox_resolve_dependency(gRPC) endif() -if(VELOX_ENABLE_REMOTE_FUNCTIONS) +if(VELOX_ENABLE_REMOTE_FUNCTIONS OR VELOX_ENABLE_PARQUET) # TODO: Move this to use resolve_dependency(). For some reason, FBThrift # requires clients to explicitly install fizz and wangle. find_package(fizz CONFIG REQUIRED) diff --git a/scripts/docker/centos-multi.dockerfile b/scripts/docker/centos-multi.dockerfile index 93f499c8cf5..53be372a3aa 100644 --- a/scripts/docker/centos-multi.dockerfile +++ b/scripts/docker/centos-multi.dockerfile @@ -28,7 +28,9 @@ COPY scripts/setup-helper-functions.sh / COPY scripts/setup-versions.sh / COPY scripts/setup-common.sh / COPY scripts/setup-centos9.sh / +COPY CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch / COPY CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch / +COPY CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch / ARG VELOX_BUILD_SHARED=ON # Building libvelox.so requires folly and gflags to be built shared as well for now @@ -43,9 +45,15 @@ ENV UV_TOOL_BIN_DIR=/usr/local/bin \ UV_INSTALL_DIR=/usr/local/bin \ INSTALL_PREFIX=/deps -# CMake 4.0 removed support for cmake minimums of <=3.5 and will fail builds, this overrides it +# CMake 4.0 removed support for cmake minimums of <=3.5 and will fail +# builds, CMAKE_POLICY_VERSION_MINIMUM and cmake-compatibility.path +# override it. +# Apache Arrow 18.0.0 has a problem that Boost isn't detected when +# ARROW_TESTING=ON is only used: +# https://github.com/apache/arrow/pull/45424 ENV CMAKE_POLICY_VERSION_MINIMUM="3.5" \ - VELOX_ARROW_CMAKE_PATCH=/cmake-compatibility.patch + VELOX_ARROW_CMAKE_PATCH="/arrow-testing-boost.patch /cmake-compatibility.patch" \ + VELOX_FBTHRIFT_CMAKE_PATCH="/compactv1-protocol-refiller.patch" # Ensure libraries installed to INSTALL_PREFIX are found at runtime (e.g. # thrift1 needs libgflags.so.2.2 when folly links gflags statically but diff --git a/scripts/docker/fedora.dockerfile b/scripts/docker/fedora.dockerfile index 28478c5857b..0e1428fe91b 100644 --- a/scripts/docker/fedora.dockerfile +++ b/scripts/docker/fedora.dockerfile @@ -24,6 +24,8 @@ COPY scripts/setup-common.sh / COPY scripts/setup-centos9.sh / COPY scripts/setup-fedora.sh / COPY CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch / +COPY CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch / +COPY CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch / ARG VELOX_BUILD_SHARED=ON # Building libvelox.so requires folly and gflags to be built shared as well for now @@ -39,7 +41,8 @@ ENV UV_TOOL_BIN_DIR=/usr/local/bin \ # CMake 4.0 removed support for cmake minimums of <=3.5 and will fail builds, this overrides it ENV CMAKE_POLICY_VERSION_MINIMUM="3.5" \ - VELOX_ARROW_CMAKE_PATCH=/cmake-compatibility.patch + VELOX_ARROW_CMAKE_PATCH="/cmake-compatibility.patch /arrow-testing-boost.patch" \ + VELOX_FBTHRIFT_CMAKE_PATCH="/compactv1-protocol-refiller.patch" # Some CMake configs contain the hard coded prefix '/deps', we need to replace that with # the future location to avoid build errors in the base-image diff --git a/scripts/docker/ubuntu-22.04-cpp.dockerfile b/scripts/docker/ubuntu-22.04-cpp.dockerfile index b383e385337..ad96d5ddedb 100644 --- a/scripts/docker/ubuntu-22.04-cpp.dockerfile +++ b/scripts/docker/ubuntu-22.04-cpp.dockerfile @@ -23,8 +23,11 @@ RUN apt update && \ COPY scripts /velox/scripts/ COPY CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch / +COPY CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch / +COPY CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch / -ENV VELOX_ARROW_CMAKE_PATCH=/cmake-compatibility.patch \ +ENV VELOX_ARROW_CMAKE_PATCH="/cmake-compatibility.patch /arrow-testing-boost.patch" \ + VELOX_FBTHRIFT_CMAKE_PATCH="/compactv1-protocol-refiller.patch" \ UV_TOOL_BIN_DIR=/usr/local/bin \ UV_INSTALL_DIR=/usr/local/bin diff --git a/scripts/setup-centos9.sh b/scripts/setup-centos9.sh index 9853280b099..4ca9323fb17 100755 --- a/scripts/setup-centos9.sh +++ b/scripts/setup-centos9.sh @@ -115,7 +115,6 @@ function install_velox_deps { run_and_time install_fbthrift run_and_time install_duckdb run_and_time install_stemmer - run_and_time install_thrift run_and_time install_arrow run_and_time install_xsimd run_and_time install_simdjson diff --git a/scripts/setup-common.sh b/scripts/setup-common.sh index 9e98442574c..4e7d8528a1b 100755 --- a/scripts/setup-common.sh +++ b/scripts/setup-common.sh @@ -21,6 +21,7 @@ source "$SCRIPT_DIR"/setup-versions.sh VELOX_BUILD_SHARED=${VELOX_BUILD_SHARED:-"OFF"} #Build folly and gflags shared for use in libvelox.so. VELOX_ARROW_CMAKE_PATCH=${VELOX_ARROW_CMAKE_PATCH:-""} # avoid error due to +u +VELOX_FBTHRIFT_CMAKE_PATCH=${VELOX_FBTHRIFT_CMAKE_PATCH:-""} CMAKE_BUILD_TYPE="${BUILD_TYPE:-Release}" DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)} BUILD_GEOS="${BUILD_GEOS:-true}" @@ -79,6 +80,17 @@ function install_mvfst { function install_fbthrift { wget_and_untar https://github.com/facebook/fbthrift/archive/refs/tags/"${FB_OS_VERSION}".tar.gz fbthrift + + if [ -z "${VELOX_FBTHRIFT_CMAKE_PATCH}" ]; then + # We need to set a different path when building the Dockerfile. + ABSOLUTE_SCRIPTDIR=$(realpath "${SCRIPT_DIR}") + + VELOX_FBTHRIFT_CMAKE_PATCH="${ABSOLUTE_SCRIPTDIR}/../CMake/resolve_dependency_modules/fbthrift/compactv1-protocol-refiller.patch" + fi + ( + cd "$DEPENDENCY_DIR"/fbthrift || exit 1 + git apply "${VELOX_FBTHRIFT_CMAKE_PATCH}" || exit 1 + ) cmake_install_dir fbthrift -Denable_tests=OFF -DBUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF } @@ -205,20 +217,23 @@ function install_arrow { if [ -z "$VELOX_ARROW_CMAKE_PATCH" ]; then # We need to set a different path when building the Dockerfile. ABSOLUTE_SCRIPTDIR=$(realpath "$SCRIPT_DIR") - VELOX_ARROW_CMAKE_PATCH="$ABSOLUTE_SCRIPTDIR/../CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch" + + VELOX_ARROW_CMAKE_PATCH="$ABSOLUTE_SCRIPTDIR/../CMake/resolve_dependency_modules/arrow/arrow-testing-boost.patch" + VELOX_ARROW_CMAKE_PATCH+=" $ABSOLUTE_SCRIPTDIR/../CMake/resolve_dependency_modules/arrow/cmake-compatibility.patch" fi cd "$DEPENDENCY_DIR"/arrow || exit 1 - git apply "$VELOX_ARROW_CMAKE_PATCH" + for patch in $VELOX_ARROW_CMAKE_PATCH; do + git apply "$patch" || exit 1 + done # Presto needs this for Arrow Flight if [[ -n $EXTRA_ARROW_PATCH ]]; then - git apply "$EXTRA_ARROW_PATCH" + git apply "$EXTRA_ARROW_PATCH" || exit 1 fi ) || exit 1 cmake_install_dir arrow/cpp \ -DARROW_PARQUET=OFF \ - -DARROW_WITH_THRIFT=ON \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ @@ -235,33 +250,6 @@ function install_arrow { $EXTRA_ARROW_OPTIONS } -function install_thrift { - wget_and_untar https://github.com/apache/thrift/archive/"${THRIFT_VERSION}".tar.gz thrift - - EXTRA_CXXFLAGS="-O3 -fPIC" - # Clang will generate warnings and they need to be suppressed, otherwise the build will fail. - if [[ ${USE_CLANG} != "false" ]]; then - EXTRA_CXXFLAGS="-O3 -fPIC -Wno-inconsistent-missing-override -Wno-unused-but-set-variable" - fi - - CXX_FLAGS="$EXTRA_CXXFLAGS" cmake_install_dir thrift \ - -DBUILD_SHARED_LIBS=OFF \ - -DBUILD_COMPILER=ON \ - -DBUILD_EXAMPLES=OFF \ - -DBUILD_TUTORIALS=OFF \ - -DCMAKE_DEBUG_POSTFIX= \ - -DWITH_AS3=OFF \ - -DWITH_CPP=ON \ - -DWITH_C_GLIB=OFF \ - -DWITH_JAVA=OFF \ - -DWITH_JAVASCRIPT=OFF \ - -DWITH_LIBEVENT=OFF \ - -DWITH_NODEJS=OFF \ - -DWITH_PYTHON=OFF \ - -DWITH_QT5=OFF \ - -DWITH_ZLIB=OFF -} - function install_stemmer { wget_and_untar https://snowballstem.org/dist/libstemmer_c-"${STEMMER_VERSION}".tar.gz stemmer ( diff --git a/scripts/setup-fedora.sh b/scripts/setup-fedora.sh index 80d958f19a4..f9d10c414e7 100755 --- a/scripts/setup-fedora.sh +++ b/scripts/setup-fedora.sh @@ -58,10 +58,10 @@ function install_build_prerequisites { function install_velox_deps_from_dnf { dnf_install \ bison boost-devel c-ares-devel curl-devel double-conversion-devel \ - elfutils-libelf-devel flex fmt-devel gflags-devel glog-devel gmock-devel \ + elfutils-libelf-devel flex gflags-devel glog-devel gmock-devel \ gtest-devel libdwarf-devel libevent-devel libicu-devel \ libsodium-devel libzstd-devel lz4-devel openssl-devel-engine \ - re2-devel snappy-devel thrift-devel xxhash-devel zlib-devel grpc-devel grpc-plugins + re2-devel snappy-devel xxhash-devel zlib-devel grpc-devel grpc-plugins install_faiss_deps } @@ -69,6 +69,7 @@ function install_velox_deps_from_dnf { function install_velox_deps { run_and_time install_velox_deps_from_dnf run_and_time install_gcs_sdk_cpp #grpc, abseil, protobuf + run_and_time install_fmt run_and_time install_fast_float run_and_time install_folly run_and_time install_fizz diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh index a50fb02ae0e..9a2187be31c 100755 --- a/scripts/setup-helper-functions.sh +++ b/scripts/setup-helper-functions.sh @@ -264,6 +264,11 @@ function cmake_install { COMPILER_FLAGS+=${OS_CXXFLAGS} COMPILER_FLAGS+=${EXTRA_PKG_CXXFLAGS} + local CCACHE= + if [ "${NAME}" != "duckdb" ] && command -v ccache >/dev/null 2>&1; then + # DuckDB sets ccache automatically in its CMakeLists.txt. + CCACHE=ccache + fi # CMAKE_POSITION_INDEPENDENT_CODE is required so that Velox can be built into dynamic libraries \ cmake -Wno-dev "${CMAKE_OPTIONS}" -B"${BINARY_DIR}" \ -GNinja \ @@ -272,6 +277,8 @@ function cmake_install { "${INSTALL_PREFIX+-DCMAKE_PREFIX_PATH=}${INSTALL_PREFIX-}" \ "${INSTALL_PREFIX+-DCMAKE_INSTALL_PREFIX=}${INSTALL_PREFIX-}" \ -DCMAKE_CXX_FLAGS="$COMPILER_FLAGS" \ + -DCMAKE_C_COMPILER_LAUNCHER=${CCACHE} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=${CCACHE} \ -DBUILD_TESTING=OFF \ "$@" # Exit if the build fails. diff --git a/scripts/setup-macos.sh b/scripts/setup-macos.sh index 74be0cbd1bf..0c8e22b3082 100755 --- a/scripts/setup-macos.sh +++ b/scripts/setup-macos.sh @@ -43,7 +43,7 @@ export CMAKE_POLICY_VERSION_MINIMUM="3.5" DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)} # gflags and glog are installed from source to ensure version compatibility. # Homebrew's glog 0.7.x has breaking API changes that are incompatible with folly. -MACOS_VELOX_DEPS="bison flex googletest icu4c libevent libsodium lz4 openssl protobuf@21 simdjson snappy xz xxhash zstd" +MACOS_VELOX_DEPS="bison double-conversion fast_float flex googletest icu4c libevent libsodium lz4 openssl protobuf@21 simdjson snappy xz xxhash zstd" MACOS_BUILD_DEPS="ninja cmake" SUDO="${SUDO:-""}" @@ -194,9 +194,6 @@ function install_velox_deps { run_and_time install_fbthrift run_and_time install_xsimd run_and_time install_stemmer - # We allow arrow to bundle thrift on MacOS due to issues with bison and flex. - # See https://github.com/facebook/fbthrift/pull/317 for an explanation. - # run_and_time install_thrift run_and_time install_arrow run_and_time install_duckdb_clang run_and_time install_geos diff --git a/scripts/setup-manylinux.sh b/scripts/setup-manylinux.sh index 424db964cce..b5995bdb5d3 100755 --- a/scripts/setup-manylinux.sh +++ b/scripts/setup-manylinux.sh @@ -38,8 +38,6 @@ USE_CLANG="${USE_CLANG:-false}" export INSTALL_PREFIX=${INSTALL_PREFIX:-"/usr/local"} DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)/deps-download} -export THRIFT_VERSION="v0.21.0" - # CMake 4.0 removed support for cmake minimums of <=3.5 and will fail builds, this overrides it export CMAKE_POLICY_VERSION_MINIMUM="3.5" @@ -122,7 +120,6 @@ function install_velox_deps { run_and_time install_fbthrift run_and_time install_duckdb run_and_time install_stemmer - run_and_time install_thrift run_and_time install_arrow } diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index f055b812e23..f3154bd63f1 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -84,6 +84,7 @@ function install_build_prerequisites { ninja-build \ checkinstall \ git \ + patch \ pkg-config \ libtool \ wget @@ -243,7 +244,6 @@ function install_velox_deps { run_and_time install_conda run_and_time install_duckdb run_and_time install_stemmer - run_and_time install_thrift run_and_time install_arrow run_and_time install_xsimd run_and_time install_simdjson diff --git a/scripts/setup-versions.sh b/scripts/setup-versions.sh index 13b7d81143b..01d75bac0b7 100755 --- a/scripts/setup-versions.sh +++ b/scripts/setup-versions.sh @@ -41,7 +41,6 @@ GFLAGS_VERSION="v2.2.2" GLOG_VERSION="v0.6.0" LZO_VERSION="2.10" SNAPPY_VERSION="1.1.8" -THRIFT_VERSION="${THRIFT_VERSION:-v0.16.0}" STEMMER_VERSION="2.2.0" GEOS_VERSION="3.10.7" # shellcheck disable=SC2034 diff --git a/velox/common/dynamic_registry/tests/CMakeLists.txt b/velox/common/dynamic_registry/tests/CMakeLists.txt index 40ccba9a989..b1c2253af43 100644 --- a/velox/common/dynamic_registry/tests/CMakeLists.txt +++ b/velox/common/dynamic_registry/tests/CMakeLists.txt @@ -24,50 +24,59 @@ add_library(velox_overload_int_function_dynamic SHARED DynamicIntFunctionOverloa add_library(velox_overload_varchar_function_dynamic SHARED DynamicVarcharFunctionOverload.cpp) add_library(velox_function_non_default_dynamic SHARED DynamicFunctionNonDefault.cpp) -set(CMAKE_DYLIB_TEST_LINK_LIBRARIES fmt::fmt Folly::folly glog::glog xsimd) - -target_link_libraries(velox_function_dynamic PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES}) +add_library(velox_function_dynamic_link_test_common INTERFACE) +target_link_libraries(velox_function_dynamic_link_test_common INTERFACE fmt::fmt glog::glog xsimd) +if(folly_SOURCE STREQUAL BUNDLED) + # We can prevent errors related to 'is being linked both statically + # and dynamically into this executable,' particularly for + # folly::hazptr_use_executor(), by "-Wl,--exclude-libs,ALL". But it + # works only for bundled Folly. + target_link_libraries(velox_function_dynamic_link_test_common INTERFACE Folly::Folly) +else() + # We use only for Folly headers for system Folly. If we link Folly + # (static library) to shared libraries, multiple Folly exist in the + # same process. + target_include_directories( + velox_function_dynamic_link_test_common + INTERFACE $ + ) +endif() +if(APPLE) + target_link_options( + velox_function_dynamic_link_test_common + INTERFACE "-Wl,-undefined,dynamic_lookup" + ) +else() + # This ensures compatibility during Linux compilation by preventing errors + # related to 'is being linked both statically and dynamically into this + # executable,' particularly for folly::hazptr_use_executor(). + # + # This works for bundled Folly but not work with system Folly. + target_link_options(velox_function_dynamic_link_test_common INTERFACE "-Wl,--exclude-libs,ALL") +endif() +target_link_libraries(velox_function_dynamic PRIVATE velox_function_dynamic_link_test_common) target_link_libraries( velox_overwrite_int_function_dynamic - PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES} + PRIVATE velox_function_dynamic_link_test_common ) - target_link_libraries( velox_overwrite_varchar_function_dynamic - PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES} + PRIVATE velox_function_dynamic_link_test_common ) - -target_link_libraries(velox_function_err_dynamic PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES}) - +target_link_libraries(velox_function_err_dynamic PRIVATE velox_function_dynamic_link_test_common) target_link_libraries( velox_overload_int_function_dynamic - PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES} + PRIVATE velox_function_dynamic_link_test_common ) - target_link_libraries( velox_overload_varchar_function_dynamic - PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES} + PRIVATE velox_function_dynamic_link_test_common +) +target_link_libraries( + velox_function_non_default_dynamic + PRIVATE velox_function_dynamic_link_test_common ) - -target_link_libraries(velox_function_non_default_dynamic PRIVATE ${CMAKE_DYLIB_TEST_LINK_LIBRARIES}) - -if(APPLE) - set(COMMON_LIBRARY_LINK_OPTIONS "-Wl,-undefined,dynamic_lookup") -else() - # This ensures compatibility during Linux compilation by preventing errors - # related to 'is being linked both statically and dynamically into this - # executable,' particularly for folly_hazptr_use_executor." - set(COMMON_LIBRARY_LINK_OPTIONS "-Wl,--exclude-libs,ALL") -endif() - -target_link_options(velox_function_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_overwrite_int_function_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_overwrite_varchar_function_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_function_err_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_overload_int_function_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_overload_varchar_function_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) -target_link_options(velox_function_non_default_dynamic PRIVATE ${COMMON_LIBRARY_LINK_OPTIONS}) # Here's the actual test which will dynamically load the library defined above. add_executable(velox_function_dynamic_link_test DynamicLinkTest.cpp) diff --git a/velox/dwio/parquet/common/BloomFilter.cpp b/velox/dwio/parquet/common/BloomFilter.cpp index 211b842cf9e..5b631fb9185 100644 --- a/velox/dwio/parquet/common/BloomFilter.cpp +++ b/velox/dwio/parquet/common/BloomFilter.cpp @@ -18,11 +18,7 @@ #include "velox/dwio/parquet/common/BloomFilter.h" #include "velox/dwio/parquet/common/XxHasher.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" -#include "velox/dwio/parquet/thrift/ThriftTransport.h" - -#include -#include +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include #include @@ -74,27 +70,29 @@ void BlockSplitBloomFilter::init(const uint8_t* bitset, uint32_t numBytes) { static void validateBloomFilterHeader(const thrift::BloomFilterHeader& header) { std::stringstream error; - if (!header.algorithm.__isset.BLOCK) { - error << "Unsupported Bloom filter algorithm: "; - error << header.algorithm; + if (header.algorithm()->getType() != + thrift::BloomFilterAlgorithm::Type::BLOCK) { + error << "Unsupported Bloom filter algorithm: " + << header.algorithm()->getType(); VELOX_FAIL(error.str()); } - if (!header.hash.__isset.XXHASH) { - error << "Unsupported Bloom filter hash: ", error << header.hash; + if (header.hash()->getType() != thrift::BloomFilterHash::Type::XXHASH) { + error << "Unsupported Bloom filter hash: " << header.hash()->getType(); VELOX_FAIL(error.str()); } - if (!header.compression.__isset.UNCOMPRESSED) { - error << "Unsupported Bloom filter compression: ", - error << header.compression; + if (header.compression()->getType() != + thrift::BloomFilterCompression::Type::UNCOMPRESSED) { + error << "Unsupported Bloom filter compression: " + << header.compression()->getType(); VELOX_FAIL(error.str()); } - if (header.numBytes <= 0 || - static_cast(header.numBytes) > + if (*header.numBytes() <= 0 || + static_cast(*header.numBytes()) > BloomFilter::kMaximumBloomFilterBytes) { - error << "Bloom filter size is incorrect: " << header.numBytes + error << "Bloom filter size is incorrect: " << *header.numBytes() << ". Must be in range (" << 0 << ", " << BloomFilter::kMaximumBloomFilterBytes << "]."; VELOX_FAIL(error.str()); @@ -104,47 +102,32 @@ static void validateBloomFilterHeader(const thrift::BloomFilterHeader& header) { BlockSplitBloomFilter BlockSplitBloomFilter::deserialize( dwio::common::SeekableInputStream* input, memory::MemoryPool& pool) { - const void* headerBuffer; - int32_t size; - input->Next(&headerBuffer, &size); - const char* bufferStart = reinterpret_cast(headerBuffer); - const char* bufferEnd = bufferStart + size; - - std::shared_ptr transport = - std::make_shared( - input, bufferStart, bufferEnd); - apache::thrift::protocol::TCompactProtocolT protocol( - transport); thrift::BloomFilterHeader header; - uint32_t headerSize = header.read(&protocol); + auto result = thrift::deserialize(&header, input, nullptr, 0); validateBloomFilterHeader(header); - const int32_t bloomFilterSize = header.numBytes; - if (bloomFilterSize + headerSize <= size) { + auto data = result.remainedData; + const auto dataSize = result.remainedDataBytes; + const int32_t bloomFilterSize = *header.numBytes(); + if (bloomFilterSize <= dataSize) { // The bloom filter data is entirely contained in the buffer we just read // => just return it. BlockSplitBloomFilter bloomFilter(&pool); - bloomFilter.init( - reinterpret_cast(headerBuffer) + headerSize, - bloomFilterSize); + bloomFilter.init(data, bloomFilterSize); return bloomFilter; } // We have read a part of the bloom filter already, copy it to the target // buffer and read the remaining part from the InputStream. auto buffer = AlignedBuffer::allocate(bloomFilterSize, &pool); - const auto bloomFilterSizeInHeaderBuffer = size - headerSize; - if (bloomFilterSizeInHeaderBuffer > 0) { - std::memcpy( - buffer->asMutable(), - reinterpret_cast(headerBuffer) + headerSize, - bloomFilterSizeInHeaderBuffer); + const auto bloomFilterSizeInData = dataSize; + if (bloomFilterSizeInData > 0) { + std::memcpy(buffer->asMutable(), data, bloomFilterSizeInData); } - const auto requiredReadSize = bloomFilterSize - bloomFilterSizeInHeaderBuffer; + const auto requiredReadSize = bloomFilterSize - bloomFilterSizeInData; input->readFully( - buffer->asMutable() + bloomFilterSizeInHeaderBuffer, - requiredReadSize); + buffer->asMutable() + bloomFilterSizeInData, requiredReadSize); VELOX_CHECK_EQ( buffer->size(), bloomFilterSize, @@ -165,38 +148,30 @@ void BlockSplitBloomFilter::writeTo( if (algorithm_ != BloomFilter::Algorithm::BLOCK) { VELOX_FAIL("BloomFilter does not support Algorithm other than BLOCK"); } - header.algorithm.__set_BLOCK(thrift::SplitBlockAlgorithm()); + header.algorithm()->set_BLOCK(thrift::SplitBlockAlgorithm()); if (hashStrategy_ != HashStrategy::XXHASH) { VELOX_FAIL("BloomFilter does not support Hash other than XXHASH"); } - header.hash.__set_XXHASH(thrift::XxHash()); + header.hash()->set_XXHASH(thrift::XxHash()); if (compressionStrategy_ != CompressionStrategy::UNCOMPRESSED) { VELOX_FAIL( "BloomFilter does not support Compression other than UNCOMPRESSED"); } - header.compression.__set_UNCOMPRESSED(thrift::Uncompressed()); - header.__set_numBytes(numBytes_); + header.compression()->set_UNCOMPRESSED(thrift::Uncompressed()); + header.numBytes() = numBytes_; - std::shared_ptr memBuffer = - std::make_shared(); - apache::thrift::protocol::TCompactProtocolFactoryT< - apache::thrift::transport::TMemoryBuffer> - factory; - std::shared_ptr protocol = - factory.getProtocol(memBuffer); + folly::IOBufQueue buffer; try { - memBuffer->resetBuffer(); - header.write(protocol.get()); + thrift::serialize(header, &buffer); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't serialize thrift: " << e.what() << "\n"; VELOX_FAIL(ss.str()); } - uint8_t* outBuffer; - uint32_t outLength; - memBuffer->getBuffer(&outBuffer, &outLength); + std::string output; + buffer.appendToString(output); // write header - sink->write(reinterpret_cast(outBuffer), outLength); + sink->write(output.data(), output.size()); // write bitset sink->write(data_->as(), numBytes_); } diff --git a/velox/dwio/parquet/common/CMakeLists.txt b/velox/dwio/parquet/common/CMakeLists.txt index 4f1256edd75..d100d1d4a5d 100644 --- a/velox/dwio/parquet/common/CMakeLists.txt +++ b/velox/dwio/parquet/common/CMakeLists.txt @@ -39,6 +39,5 @@ velox_link_libraries( fmt::fmt Folly::folly Snappy::snappy - thrift zstd::zstd ) diff --git a/velox/dwio/parquet/reader/CMakeLists.txt b/velox/dwio/parquet/reader/CMakeLists.txt index d73c8a31c67..9107201f1a1 100644 --- a/velox/dwio/parquet/reader/CMakeLists.txt +++ b/velox/dwio/parquet/reader/CMakeLists.txt @@ -54,7 +54,6 @@ velox_add_library( velox_link_libraries( velox_dwio_native_parquet_reader - velox_dwio_parquet_thrift velox_dwio_parquet_common velox_type velox_dwio_common @@ -62,6 +61,5 @@ velox_link_libraries( fmt::fmt arrow Snappy::snappy - thrift zstd::zstd ) diff --git a/velox/dwio/parquet/reader/IntegerColumnReader.h b/velox/dwio/parquet/reader/IntegerColumnReader.h index 01f45694ab1..c583b3f5fce 100644 --- a/velox/dwio/parquet/reader/IntegerColumnReader.h +++ b/velox/dwio/parquet/reader/IntegerColumnReader.h @@ -17,6 +17,9 @@ #pragma once #include "velox/dwio/common/SelectiveIntegerColumnReader.h" +#include "velox/dwio/parquet/reader/ParquetColumnReader.h" +#include "velox/dwio/parquet/reader/ParquetTypeWithId.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include "velox/type/DecimalUtil.h" namespace facebook::velox::parquet { @@ -57,8 +60,9 @@ class IntegerColumnReader : public dwio::common::SelectiveIntegerColumnReader { void getValues(const RowSet& rows, VectorPtr* result) override { auto& fileType = static_cast(*fileType_); auto logicalType = fileType.logicalType_; - if (logicalType.has_value() && logicalType.value().__isset.INTEGER && - !logicalType.value().INTEGER.isSigned) { + if (logicalType && + logicalType->getType() == thrift::LogicalType::Type::INTEGER && + !*logicalType->get_INTEGER().isSigned()) { getUnsignedIntValues(rows, requestedType_, result); } else { getIntValues(rows, requestedType_, result); diff --git a/velox/dwio/parquet/reader/Metadata.cpp b/velox/dwio/parquet/reader/Metadata.cpp index 114ea92e92c..98304808909 100644 --- a/velox/dwio/parquet/reader/Metadata.cpp +++ b/velox/dwio/parquet/reader/Metadata.cpp @@ -15,7 +15,9 @@ */ #include "velox/dwio/parquet/reader/Metadata.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" + +#include namespace facebook::velox::parquet { @@ -42,70 +44,67 @@ inline std::optional decodeInt64Stat(const std::string& bytes) { template inline std::optional getMin(const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.min_value - ? load(columnChunkStats.min_value.data()) - : (columnChunkStats.__isset.min - ? std::optional(load(columnChunkStats.min.data())) + return columnChunkStats.min_value() + ? load(columnChunkStats.min_value()->data()) + : (columnChunkStats.min() + ? std::optional(load(columnChunkStats.min()->data())) : std::nullopt); } template inline std::optional getMax(const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.max_value - ? std::optional(load(columnChunkStats.max_value.data())) - : (columnChunkStats.__isset.max - ? std::optional(load(columnChunkStats.max.data())) + return columnChunkStats.max_value() + ? std::optional(load(columnChunkStats.max_value()->data())) + : (columnChunkStats.max() + ? std::optional(load(columnChunkStats.max()->data())) : std::nullopt); } template <> inline std::optional getMin( const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.min_value - ? decodeInt64Stat(columnChunkStats.min_value) - : (columnChunkStats.__isset.min ? decodeInt64Stat(columnChunkStats.min) - : std::nullopt); + return columnChunkStats.min_value() + ? decodeInt64Stat(*columnChunkStats.min_value()) + : (columnChunkStats.min() ? decodeInt64Stat(*columnChunkStats.min()) + : std::nullopt); } template <> inline std::optional getMax( const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.max_value - ? decodeInt64Stat(columnChunkStats.max_value) - : (columnChunkStats.__isset.max ? decodeInt64Stat(columnChunkStats.max) - : std::nullopt); + return columnChunkStats.max_value() + ? decodeInt64Stat(*columnChunkStats.max_value()) + : (columnChunkStats.max() ? decodeInt64Stat(*columnChunkStats.max()) + : std::nullopt); } template <> inline std::optional getMin( const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.min_value - ? std::optional(columnChunkStats.min_value) - : (columnChunkStats.__isset.min ? std::optional(columnChunkStats.min) - : std::nullopt); + return columnChunkStats.min_value() + ? columnChunkStats.min_value().to_optional() + : columnChunkStats.min().to_optional(); } template <> inline std::optional getMax( const thrift::Statistics& columnChunkStats) { - return columnChunkStats.__isset.max_value - ? std::optional(columnChunkStats.max_value) - : (columnChunkStats.__isset.max ? std::optional(columnChunkStats.max) - : std::nullopt); + return columnChunkStats.max_value() + ? columnChunkStats.max_value().to_optional() + : columnChunkStats.max().to_optional(); } std::unique_ptr buildColumnStatisticsFromThrift( const thrift::Statistics& columnChunkStats, const velox::Type& type, uint64_t numRowsInRowGroup) { - std::optional nullCount = columnChunkStats.__isset.null_count - ? std::optional(columnChunkStats.null_count) - : std::nullopt; - std::optional valueCount = nullCount.has_value() + std::optional nullCount = + columnChunkStats.null_count().to_optional(); + std::optional valueCount = nullCount ? std::optional(numRowsInRowGroup - nullCount.value()) : std::nullopt; - std::optional hasNull = columnChunkStats.__isset.null_count - ? std::optional(columnChunkStats.null_count > 0) + std::optional hasNull = columnChunkStats.null_count() + ? std::optional(*columnChunkStats.null_count() > 0) : std::nullopt; switch (type.kind()) { @@ -184,7 +183,7 @@ std::unique_ptr buildColumnStatisticsFromThrift( } common::CompressionKind thriftCodecToCompressionKind( - thrift::CompressionCodec::type codec) { + thrift::CompressionCodec codec) { switch (codec) { case thrift::CompressionCodec::UNCOMPRESSED: return common::CompressionKind::CompressionKind_NONE; @@ -203,7 +202,7 @@ common::CompressionKind thriftCodecToCompressionKind( default: VELOX_UNSUPPORTED( "Unsupported compression type: " + - facebook::velox::parquet::thrift::to_string(codec)); + std::to_string(static_cast(codec))); break; } } @@ -219,21 +218,26 @@ FOLLY_ALWAYS_INLINE const thrift::ColumnChunk* thriftColumnChunkPtr( } int64_t ColumnChunkMetaDataPtr::numValues() const { - return thriftColumnChunkPtr(ptr_)->meta_data.num_values; + return apache::thrift::can_throw( + *thriftColumnChunkPtr(ptr_)->meta_data()->num_values()); } bool ColumnChunkMetaDataPtr::hasMetadata() const { - return thriftColumnChunkPtr(ptr_)->__isset.meta_data; + return thriftColumnChunkPtr(ptr_)->meta_data().has_value(); } bool ColumnChunkMetaDataPtr::hasStatistics() const { return hasMetadata() && - thriftColumnChunkPtr(ptr_)->meta_data.__isset.statistics; + apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->statistics() + .has_value(); } bool ColumnChunkMetaDataPtr::hasDictionaryPageOffset() const { return hasMetadata() && - thriftColumnChunkPtr(ptr_)->meta_data.__isset.dictionary_page_offset; + apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->dictionary_page_offset() + .has_value(); } std::unique_ptr @@ -242,44 +246,70 @@ ColumnChunkMetaDataPtr::getColumnStatistics( int64_t numRows) { VELOX_CHECK(hasStatistics()); return buildColumnStatisticsFromThrift( - thriftColumnChunkPtr(ptr_)->meta_data.statistics, *type, numRows); + apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->statistics()), + *type, + numRows); }; std::string ColumnChunkMetaDataPtr::getColumnMetadataStatsMinValue() { VELOX_CHECK(hasStatistics()); - return thriftColumnChunkPtr(ptr_)->meta_data.statistics.min_value; + return apache::thrift::can_throw( + *apache::thrift::can_throw( + apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->statistics()) + ->min_value()); } std::string ColumnChunkMetaDataPtr::getColumnMetadataStatsMaxValue() { VELOX_CHECK(hasStatistics()); - return thriftColumnChunkPtr(ptr_)->meta_data.statistics.max_value; + return apache::thrift::can_throw( + *apache::thrift::can_throw( + apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->statistics()) + ->max_value()); } int64_t ColumnChunkMetaDataPtr::getColumnMetadataStatsNullCount() { VELOX_CHECK(hasStatistics()); - return thriftColumnChunkPtr(ptr_)->meta_data.statistics.null_count; + return apache::thrift::can_throw( + *apache::thrift::can_throw( + apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->statistics()) + ->null_count()); } int64_t ColumnChunkMetaDataPtr::dataPageOffset() const { - return thriftColumnChunkPtr(ptr_)->meta_data.data_page_offset; + return apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->data_page_offset()); } int64_t ColumnChunkMetaDataPtr::dictionaryPageOffset() const { VELOX_CHECK(hasDictionaryPageOffset()); - return thriftColumnChunkPtr(ptr_)->meta_data.dictionary_page_offset; + return apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->dictionary_page_offset()); } common::CompressionKind ColumnChunkMetaDataPtr::compression() const { return thriftCodecToCompressionKind( - thriftColumnChunkPtr(ptr_)->meta_data.codec); + apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->codec())); } int64_t ColumnChunkMetaDataPtr::totalCompressedSize() const { - return thriftColumnChunkPtr(ptr_)->meta_data.total_compressed_size; + return apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->total_compressed_size()); } int64_t ColumnChunkMetaDataPtr::totalUncompressedSize() const { - return thriftColumnChunkPtr(ptr_)->meta_data.total_uncompressed_size; + return apache::thrift::can_throw( + *apache::thrift::can_throw(thriftColumnChunkPtr(ptr_)->meta_data()) + ->total_uncompressed_size()); } FOLLY_ALWAYS_INLINE const thrift::RowGroup* thriftRowGroupPtr( @@ -293,48 +323,55 @@ RowGroupMetaDataPtr::RowGroupMetaDataPtr(const void* metadata) RowGroupMetaDataPtr::~RowGroupMetaDataPtr() = default; int RowGroupMetaDataPtr::numColumns() const { - return thriftRowGroupPtr(ptr_)->columns.size(); + return thriftRowGroupPtr(ptr_)->columns()->size(); } int32_t RowGroupMetaDataPtr::sortingColumnIdx(int i) const { - return thriftRowGroupPtr(ptr_)->sorting_columns[i].column_idx; + return apache::thrift::can_throw( + *apache::thrift::can_throw(*thriftRowGroupPtr(ptr_)->sorting_columns())[i] + .column_idx()); } bool RowGroupMetaDataPtr::sortingColumnDescending(int i) const { - return thriftRowGroupPtr(ptr_)->sorting_columns[i].descending; + return apache::thrift::can_throw( + *apache::thrift::can_throw(*thriftRowGroupPtr(ptr_)->sorting_columns())[i] + .descending()); } bool RowGroupMetaDataPtr::sortingColumnNullsFirst(int i) const { - return thriftRowGroupPtr(ptr_)->sorting_columns[i].nulls_first; + return apache::thrift::can_throw( + *apache::thrift::can_throw(*thriftRowGroupPtr(ptr_)->sorting_columns())[i] + .nulls_first()); } int64_t RowGroupMetaDataPtr::numRows() const { - return thriftRowGroupPtr(ptr_)->num_rows; + return *thriftRowGroupPtr(ptr_)->num_rows(); } int64_t RowGroupMetaDataPtr::totalByteSize() const { - return thriftRowGroupPtr(ptr_)->total_byte_size; + return *thriftRowGroupPtr(ptr_)->total_byte_size(); } bool RowGroupMetaDataPtr::hasFileOffset() const { - return thriftRowGroupPtr(ptr_)->__isset.file_offset; + return thriftRowGroupPtr(ptr_)->file_offset().has_value(); } int64_t RowGroupMetaDataPtr::fileOffset() const { - return thriftRowGroupPtr(ptr_)->file_offset; + return apache::thrift::can_throw(*thriftRowGroupPtr(ptr_)->file_offset()); } bool RowGroupMetaDataPtr::hasTotalCompressedSize() const { - return thriftRowGroupPtr(ptr_)->__isset.total_compressed_size; + return thriftRowGroupPtr(ptr_)->total_compressed_size().has_value(); } int64_t RowGroupMetaDataPtr::totalCompressedSize() const { - return thriftRowGroupPtr(ptr_)->total_compressed_size; + return apache::thrift::can_throw( + *thriftRowGroupPtr(ptr_)->total_compressed_size()); } ColumnChunkMetaDataPtr RowGroupMetaDataPtr::columnChunk(int i) const { return ColumnChunkMetaDataPtr( - reinterpret_cast(&thriftRowGroupPtr(ptr_)->columns[i])); + reinterpret_cast(&(*thriftRowGroupPtr(ptr_)->columns())[i])); } FOLLY_ALWAYS_INLINE const thrift::FileMetaData* thriftFileMetaDataPtr( @@ -349,26 +386,29 @@ FileMetaDataPtr::~FileMetaDataPtr() = default; RowGroupMetaDataPtr FileMetaDataPtr::rowGroup(int i) const { return RowGroupMetaDataPtr( reinterpret_cast( - &thriftFileMetaDataPtr(ptr_)->row_groups[i])); + &(*thriftFileMetaDataPtr(ptr_)->row_groups())[i])); } int64_t FileMetaDataPtr::numRows() const { - return thriftFileMetaDataPtr(ptr_)->num_rows; + return *thriftFileMetaDataPtr(ptr_)->num_rows(); } int FileMetaDataPtr::numRowGroups() const { - return thriftFileMetaDataPtr(ptr_)->row_groups.size(); + return thriftFileMetaDataPtr(ptr_)->row_groups()->size(); } int64_t FileMetaDataPtr::keyValueMetadataSize() const { - return thriftFileMetaDataPtr(ptr_)->key_value_metadata.size(); + return apache::thrift::can_throw( + thriftFileMetaDataPtr(ptr_)->key_value_metadata()) + ->size(); } bool FileMetaDataPtr::keyValueMetadataContains( const std::string_view key) const { - auto thriftKeyValueMeta = thriftFileMetaDataPtr(ptr_)->key_value_metadata; + auto thriftKeyValueMeta = apache::thrift::can_throw( + *thriftFileMetaDataPtr(ptr_)->key_value_metadata()); for (const auto& kv : thriftKeyValueMeta) { - if (kv.key == key) { + if (*kv.key() == key) { return true; } } @@ -379,15 +419,22 @@ std::string FileMetaDataPtr::keyValueMetadataValue( const std::string_view key) const { int thriftKeyValueMetaSize = keyValueMetadataSize(); for (size_t i = 0; i < thriftKeyValueMetaSize; i++) { - if (key == thriftFileMetaDataPtr(ptr_)->key_value_metadata[i].key) { - return thriftFileMetaDataPtr(ptr_)->key_value_metadata[i].value; + if (key == + apache::thrift::can_throw( + *apache::thrift::can_throw( + *thriftFileMetaDataPtr(ptr_)->key_value_metadata())[i] + .key())) { + return apache::thrift::can_throw( + *apache::thrift::can_throw( + *thriftFileMetaDataPtr(ptr_)->key_value_metadata())[i] + .value()); } } VELOX_FAIL(fmt::format("Input key {} is not in the key value metadata", key)); } std::string FileMetaDataPtr::createdBy() const { - return thriftFileMetaDataPtr(ptr_)->created_by; + return apache::thrift::can_throw(*thriftFileMetaDataPtr(ptr_)->created_by()); } } // namespace facebook::velox::parquet diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index 0163dced799..2d1f5d6c3bc 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -16,16 +16,15 @@ #include "velox/dwio/parquet/reader/PageReader.h" +#include #include "velox/common/testutil/TestValue.h" #include "velox/common/time/Timer.h" #include "velox/dwio/common/BufferUtil.h" #include "velox/dwio/common/ColumnVisitors.h" #include "velox/dwio/parquet/common/LevelConversion.h" -#include "velox/dwio/parquet/thrift/ThriftTransport.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include "velox/vector/FlatVector.h" -#include // @manual - using facebook::velox::common::testutil::TestValue; namespace facebook::velox::parquet { @@ -51,9 +50,9 @@ void PageReader::seekToPage(int64_t row) { break; } PageHeader pageHeader = readPageHeader(); - pageStart_ = pageDataStart_ + pageHeader.compressed_page_size; + pageStart_ = pageDataStart_ + *pageHeader.compressed_page_size(); - switch (pageHeader.type) { + switch (*pageHeader.type()) { case thrift::PageType::DATA_PAGE: prepareDataPageV1(pageHeader, row); break; @@ -63,7 +62,7 @@ void PageReader::seekToPage(int64_t row) { case thrift::PageType::DICTIONARY_PAGE: if (row == kRepDefOnly) { skipBytes( - pageHeader.compressed_page_size, + *pageHeader.compressed_page_size(), inputStream_.get(), bufferStart_, bufferEnd_); @@ -84,32 +83,57 @@ void PageReader::seekToPage(int64_t row) { PageHeader PageReader::readPageHeader() { TestValue::adjust( "facebook::velox::parquet::PageReader::readPageHeader", this); - if (bufferEnd_ == bufferStart_) { - const void* buffer; - int32_t size; - uint64_t readUs{0}; - { - MicrosecondTimer timer(&readUs); - inputStream_->Next(&buffer, &size); - } - stats_.pageLoadTimeNs.increment(readUs * 1'000); - bufferStart_ = reinterpret_cast(buffer); - bufferEnd_ = bufferStart_ + size; - } - - std::shared_ptr transport = - std::make_shared( - inputStream_.get(), bufferStart_, bufferEnd_); - apache::thrift::protocol::TCompactProtocolT protocol( - transport); PageHeader pageHeader; - uint64_t readBytes; - readBytes = pageHeader.read(&protocol); + auto result = thrift::deserialize( + &pageHeader, + inputStream_.get(), + reinterpret_cast(bufferStart_), + bufferEnd_ - bufferStart_); + pageDataStart_ = pageStart_ + result.readBytes; - pageDataStart_ = pageStart_ + readBytes; + // Keep buffer alive so deserialized pageHeader data remains valid. + thriftBuffer_ = std::move(result.lastBuffer); + + updateBufferPointersAfterDeserialization(result); + + stats_.pageLoadTimeNs.increment(result.readUs * 1'000); return pageHeader; } +void PageReader::updateBufferPointersAfterDeserialization( + const thrift::DeserializeResult& result) { + // No refiller used - remainedData points to unconsumed data in original + // buffer + if (!result.usedRefiller) { + bufferStart_ = toCharPtr(result.remainedData); + bufferEnd_ = bufferStart_ + result.remainedDataBytes; + return; + } + + // Refiller was used - position pointers to remaining stream data + // The refiller read new data from the stream. We need to calculate how much + // of that stream data was consumed and position our pointers accordingly. + // + // result.readBytes - total bytes consumed from the stream + // initialDataBytes - how many bytes were in the initial buffer + // Bytes consumed from new stream data = result.readBytes - initialDataBytes + + const size_t initialDataBytes = bufferEnd_ - bufferStart_; + const char* streamStart = toCharPtr(result.streamData); + + if (result.readBytes > initialDataBytes) { + // We consumed some bytes from the new stream data. + const size_t bytesConsumedFromNewStream = + result.readBytes - initialDataBytes; + bufferStart_ = streamStart + bytesConsumedFromNewStream; + bufferEnd_ = streamStart + result.streamDataBytes; + } else { + // We only consumed from initial buffer, stream data is untouched. + bufferStart_ = streamStart; + bufferEnd_ = streamStart + result.streamDataBytes; + } +} + const char* PageReader::readBytes(int32_t size, BufferPtr& copy) { uint64_t readUs{0}; { @@ -212,27 +236,27 @@ void PageReader::updateRowInfoAfterPageSkipped() { void PageReader::prepareDataPageV1(const PageHeader& pageHeader, int64_t row) { VELOX_CHECK( - pageHeader.type == thrift::PageType::DATA_PAGE && - pageHeader.__isset.data_page_header); - numRepDefsInPage_ = pageHeader.data_page_header.num_values; + *pageHeader.type() == thrift::PageType::DATA_PAGE && + pageHeader.data_page_header()); + numRepDefsInPage_ = *pageHeader.data_page_header()->num_values(); setPageRowInfo(row == kRepDefOnly); if (row != kRepDefOnly && numRowsInPage_ != kRowsUnknown && numRowsInPage_ + rowOfPage_ <= row) { dwio::common::skipBytes( - pageHeader.compressed_page_size, + *pageHeader.compressed_page_size(), inputStream_.get(), bufferStart_, bufferEnd_); return; } - pageData_ = readBytes(pageHeader.compressed_page_size, pageBuffer_); + pageData_ = readBytes(*pageHeader.compressed_page_size(), pageBuffer_); pageData_ = decompressData( pageData_, - pageHeader.compressed_page_size, - pageHeader.uncompressed_page_size); - auto pageEnd = pageData_ + pageHeader.uncompressed_page_size; - auto remainingBytes = pageHeader.uncompressed_page_size; + *pageHeader.compressed_page_size(), + *pageHeader.uncompressed_page_size()); + auto pageEnd = pageData_ + *pageHeader.uncompressed_page_size(); + auto remainingBytes = *pageHeader.uncompressed_page_size(); if (maxRepeat_ > 0) { VELOX_CHECK_GE( remainingBytes, @@ -282,7 +306,7 @@ void PageReader::prepareDataPageV1(const PageHeader& pageHeader, int64_t row) { } encodedDataSize_ = pageEnd - pageData_; - encoding_ = pageHeader.data_page_header.encoding; + encoding_ = *pageHeader.data_page_header()->encoding(); if (!hasChunkRepDefs_ && (numRowsInPage_ == kRowsUnknown || maxDefine_ > 1)) { readPageDefLevels(); } @@ -293,13 +317,13 @@ void PageReader::prepareDataPageV1(const PageHeader& pageHeader, int64_t row) { } void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) { - VELOX_CHECK(pageHeader.__isset.data_page_header_v2); - numRepDefsInPage_ = pageHeader.data_page_header_v2.num_values; + VELOX_CHECK(pageHeader.data_page_header_v2().has_value()); + numRepDefsInPage_ = *pageHeader.data_page_header_v2()->num_values(); setPageRowInfo(row == kRepDefOnly); if (row != kRepDefOnly && numRowsInPage_ != kRowsUnknown && numRowsInPage_ + rowOfPage_ <= row) { skipBytes( - pageHeader.compressed_page_size, + *pageHeader.compressed_page_size(), inputStream_.get(), bufferStart_, bufferEnd_); @@ -307,11 +331,11 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) { } uint32_t defineLength = - pageHeader.data_page_header_v2.definition_levels_byte_length; + *pageHeader.data_page_header_v2()->definition_levels_byte_length(); uint32_t repeatLength = - pageHeader.data_page_header_v2.repetition_levels_byte_length; + *pageHeader.data_page_header_v2()->repetition_levels_byte_length(); - auto bytes = pageHeader.compressed_page_size; + auto bytes = *pageHeader.compressed_page_size(); VELOX_CHECK_LE( static_cast(repeatLength) + defineLength, bytes, @@ -342,21 +366,24 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) { } auto levelsSize = repeatLength + defineLength; pageData_ += levelsSize; - if (pageHeader.data_page_header_v2.__isset.is_compressed && - pageHeader.data_page_header_v2.is_compressed && - (pageHeader.compressed_page_size - levelsSize > 0)) { + // parquet.thrift uses "7: optional bool is_compressed = true;" but + // FBThrift doesn't support "optional" and default value. (The + // default value isn't used for missing is_compressed.) So we need + // to use value_or(true) here. + if (pageHeader.data_page_header_v2()->is_compressed().value_or(true) && + (*pageHeader.compressed_page_size() - levelsSize > 0)) { pageData_ = decompressData( pageData_, - pageHeader.compressed_page_size - levelsSize, - pageHeader.uncompressed_page_size - levelsSize); + *pageHeader.compressed_page_size() - levelsSize, + *pageHeader.uncompressed_page_size() - levelsSize); } if (row == kRepDefOnly) { skipBytes(bytes, inputStream_.get(), bufferStart_, bufferEnd_); return; } - encodedDataSize_ = pageHeader.uncompressed_page_size - levelsSize; - encoding_ = pageHeader.data_page_header_v2.encoding; + encodedDataSize_ = *pageHeader.uncompressed_page_size() - levelsSize; + encoding_ = *pageHeader.data_page_header_v2()->encoding(); if (numRowsInPage_ == kRowsUnknown) { readPageDefLevels(); } @@ -366,20 +393,20 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) { } void PageReader::prepareDictionary(const PageHeader& pageHeader) { - dictionary_.numValues = pageHeader.dictionary_page_header.num_values; - dictionaryEncoding_ = pageHeader.dictionary_page_header.encoding; - dictionary_.sorted = pageHeader.dictionary_page_header.__isset.is_sorted && - pageHeader.dictionary_page_header.is_sorted; + dictionary_.numValues = *pageHeader.dictionary_page_header()->num_values(); + dictionaryEncoding_ = *pageHeader.dictionary_page_header()->encoding(); + dictionary_.sorted = pageHeader.dictionary_page_header()->is_sorted() && + *pageHeader.dictionary_page_header()->is_sorted(); VELOX_CHECK( dictionaryEncoding_ == Encoding::PLAIN_DICTIONARY || dictionaryEncoding_ == Encoding::PLAIN); if (codec_ != common::CompressionKind::CompressionKind_NONE) { - pageData_ = readBytes(pageHeader.compressed_page_size, pageBuffer_); + pageData_ = readBytes(*pageHeader.compressed_page_size(), pageBuffer_); pageData_ = decompressData( pageData_, - pageHeader.compressed_page_size, - pageHeader.uncompressed_page_size); + *pageHeader.compressed_page_size(), + *pageHeader.uncompressed_page_size()); } auto parquetType = type_->parquetType_.value(); @@ -479,7 +506,7 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { case thrift::Type::BYTE_ARRAY: { dictionary_.values = AlignedBuffer::allocate(dictionary_.numValues, &pool_); - auto numBytes = pageHeader.uncompressed_page_size; + auto numBytes = *pageHeader.uncompressed_page_size(); auto values = dictionary_.values->asMutable(); dictionary_.strings = AlignedBuffer::allocate(numBytes, &pool_); auto strings = dictionary_.strings->asMutable(); @@ -595,7 +622,7 @@ void PageReader::makeFilterCache(dwio::common::ScanState& state) { } namespace { -int32_t parquetTypeBytes(thrift::Type::type type) { +int32_t parquetTypeBytes(thrift::Type type) { switch (type) { case thrift::Type::INT32: case thrift::Type::FLOAT: @@ -971,6 +998,10 @@ bool PageReader::rowsForPage( auto rowZero = visitBase_ + visitorRows_[currentVisitorRow_]; if (rowZero >= rowOfPage_ + numRowsInPage_) { seekToPage(rowZero); + // If seekToPage set numRowsInPage_=0, we've reached the end of the chunk + if (numRowsInPage_ == 0) { + return false; + } if (hasChunkRepDefs_) { numLeafNullsConsumed_ = rowOfPage_; } diff --git a/velox/dwio/parquet/reader/PageReader.h b/velox/dwio/parquet/reader/PageReader.h index 9d3969ef75e..13a474e62d6 100644 --- a/velox/dwio/parquet/reader/PageReader.h +++ b/velox/dwio/parquet/reader/PageReader.h @@ -216,6 +216,19 @@ class PageReader { // 'hasChunkRepDefs_' is false. void readPageDefLevels(); + // Updates bufferStart_ and bufferEnd_ based on deserialization result. + // Handles both refiller and non-refiller cases. + void updateBufferPointersAfterDeserialization( + const thrift::DeserializeResult& result); + + static inline const char* toCharPtr(const uint8_t* ptr) { + return reinterpret_cast(ptr); + } + + static inline const char* toCharPtr(const void* ptr) { + return static_cast(ptr); + } + // Returns a pointer to contiguous space for the next 'size' bytes // from current position. Copies data into 'copy' if the range // straddles buffers. Allocates or resizes 'copy' as needed. @@ -390,6 +403,9 @@ class PageReader { const int64_t chunkSize_; const char* bufferStart_{nullptr}; const char* bufferEnd_{nullptr}; + // Holds the buffer from the last Thrift deserialization to keep + // deserialized data pointers valid + std::unique_ptr thriftBuffer_; BufferPtr tempNulls_; BufferPtr nullsInReadRange_; BufferPtr multiPageNulls_; @@ -434,7 +450,7 @@ class PageReader { raw_vector leafNulls_; // Encoding of current page. - thrift::Encoding::type encoding_; + thrift::Encoding encoding_; // Row number of first value in current page from start of ColumnChunk. int64_t rowOfPage_{0}; @@ -458,7 +474,7 @@ class PageReader { // Dictionary contents. dwio::common::DictionaryValues dictionary_; - thrift::Encoding::type dictionaryEncoding_; + thrift::Encoding dictionaryEncoding_; // Offset of current page's header from start of ColumnChunk. uint64_t pageStart_{0}; diff --git a/velox/dwio/parquet/reader/ParquetColumnReader.cpp b/velox/dwio/parquet/reader/ParquetColumnReader.cpp index 2fbd90d6560..83ae2903e48 100644 --- a/velox/dwio/parquet/reader/ParquetColumnReader.cpp +++ b/velox/dwio/parquet/reader/ParquetColumnReader.cpp @@ -29,7 +29,7 @@ #include "velox/dwio/parquet/reader/StructColumnReader.h" #include "velox/dwio/parquet/reader/TimeColumnReader.h" #include "velox/dwio/parquet/reader/TimestampColumnReader.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" namespace facebook::velox::parquet { diff --git a/velox/dwio/parquet/reader/ParquetReader.cpp b/velox/dwio/parquet/reader/ParquetReader.cpp index 7e141096810..30ded8d3714 100644 --- a/velox/dwio/parquet/reader/ParquetReader.cpp +++ b/velox/dwio/parquet/reader/ParquetReader.cpp @@ -16,13 +16,13 @@ #include "velox/dwio/parquet/reader/ParquetReader.h" -#include //@manual +#include #include "velox/dwio/common/StatisticsBuilder.h" #include "velox/dwio/parquet/reader/ParquetColumnReader.h" #include "velox/dwio/parquet/reader/ParquetStatsContext.h" #include "velox/dwio/parquet/reader/StructColumnReader.h" -#include "velox/dwio/parquet/thrift/ThriftTransport.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include "velox/functions/lib/string/StringImpl.h" namespace facebook::velox::parquet { @@ -308,31 +308,33 @@ void ReaderBase::loadFileMetaData() { missingLength, stream.get(), copy.data(), bufferStart, bufferEnd); } - std::shared_ptr thriftTransport = - std::make_shared( - copy.data() + footerOffsetInBuffer, footerLength); - auto thriftProtocol = std::make_unique< - apache::thrift::protocol::TCompactProtocolT>( - thriftTransport); fileMetaData_ = std::make_unique(); - fileMetaData_->read(thriftProtocol.get()); + thrift::deserialize( + fileMetaData_.get(), + std::string_view( + reinterpret_cast(copy.data() + footerOffsetInBuffer), + footerLength)); } void ReaderBase::initializeSchema() { - if (fileMetaData_->__isset.encryption_algorithm) { + if (fileMetaData_->encryption_algorithm()) { VELOX_UNSUPPORTED("Encrypted Parquet files are not supported"); } VELOX_CHECK_GT( - fileMetaData_->schema.size(), + fileMetaData_->schema()->size(), 1, "Invalid Parquet schema: Need at least one non-root column in the file"); + // parquet.thrift says "The root of the schema does not have a + // repetition_type" but there are Parquet files that specify + // REQUIRED explicitly. VELOX_CHECK_EQ( - fileMetaData_->schema[0].repetition_type, + (*fileMetaData_->schema())[0].repetition_type().value_or( + thrift::FieldRepetitionType::REQUIRED), thrift::FieldRepetitionType::REQUIRED, "Invalid Parquet schema: root element must be REQUIRED"); VELOX_CHECK_GT( - fileMetaData_->schema[0].num_children, + *(*fileMetaData_->schema())[0].num_children(), 0, "Invalid Parquet schema: root element must have at least 1 child"); @@ -340,7 +342,7 @@ void ReaderBase::initializeSchema() { uint32_t maxRepeat = 0; uint32_t schemaIdx = 0; uint32_t columnIdx = 0; - uint32_t maxSchemaElementIdx = fileMetaData_->schema.size() - 1; + uint32_t maxSchemaElementIdx = fileMetaData_->schema()->size() - 1; std::vector columnNames; // Setting the parent schema index of the root("hive_schema") to be 0, which // is the root itself. This is ok because it's never required to check the @@ -360,7 +362,11 @@ void ReaderBase::initializeSchema() { } void ReaderBase::initializeVersion() { - version_ = SemanticVersion::parse(fileMetaData_->created_by); + if (fileMetaData_->created_by()) { + version_ = SemanticVersion::parse(*fileMetaData_->created_by()); + } else { + version_ = std::nullopt; + } } std::unique_ptr ReaderBase::getParquetColumnInfo( @@ -374,31 +380,31 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( const TypePtr& parentRequestedType, std::vector& columnNames) const { VELOX_CHECK(fileMetaData_ != nullptr); - VELOX_CHECK_LT(schemaIdx, fileMetaData_->schema.size()); + VELOX_CHECK_LT(schemaIdx, fileMetaData_->schema()->size()); - auto& schema = fileMetaData_->schema; + auto& schema = *fileMetaData_->schema(); uint32_t curSchemaIdx = schemaIdx; auto& schemaElement = schema[curSchemaIdx]; bool isRepeated = false; bool isOptional = false; - if (schemaElement.__isset.repetition_type) { - if (schemaElement.repetition_type != + if (schemaElement.repetition_type()) { + if (*schemaElement.repetition_type() != thrift::FieldRepetitionType::REQUIRED) { maxDefine++; } - if (schemaElement.repetition_type == + if (apache::thrift::can_throw(*schemaElement.repetition_type()) == thrift::FieldRepetitionType::REPEATED) { maxRepeat++; isRepeated = true; } - if (schemaElement.repetition_type == + if (*schemaElement.repetition_type() == thrift::FieldRepetitionType::OPTIONAL) { isOptional = true; } } - auto name = schemaElement.name; + auto name = *schemaElement.name(); if (isFileColumnNamesReadAsLowerCase()) { name = functions::stringImpl::utf8StrToLowerCopy(name); } @@ -411,9 +417,9 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( columnNames.push_back(name); } - if (!schemaElement.__isset.type) { // inner node + if (!schemaElement.type()) { // inner node VELOX_CHECK( - schemaElement.__isset.num_children && schemaElement.num_children > 0, + schemaElement.num_children() && *schemaElement.num_children() > 0, "Node has no children but should"); VELOX_CHECK( !requestedType || requestedType->isRow() || requestedType->isArray() || @@ -422,9 +428,9 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( std::vector> children; auto curSchemaIdx = schemaIdx; - for (int32_t i = 0; i < schemaElement.num_children; i++) { + for (int32_t i = 0; i < *schemaElement.num_children(); i++) { ++schemaIdx; - auto childName = schema[schemaIdx].name; + auto childName = *schema[schemaIdx].name(); if (isFileColumnNamesReadAsLowerCase()) { childName = functions::stringImpl::utf8StrToLowerCopy(childName); } @@ -498,8 +504,8 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( VELOX_CHECK(!children.empty()); name = columnNames.at(curSchemaIdx); - if (schemaElement.__isset.converted_type) { - switch (schemaElement.converted_type) { + if (schemaElement.converted_type()) { + switch (*schemaElement.converted_type()) { case thrift::ConvertedType::LIST: { VELOX_CHECK_EQ(children.size(), 1); const auto& child = children[0]; @@ -507,7 +513,7 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( // In case the child is a MAP or current element is repeated then // wrap child around additional ARRAY if (child->type()->kind() == TypeKind::MAP || - schemaElement.repetition_type == + apache::thrift::can_throw(*schemaElement.repetition_type()) == thrift::FieldRepetitionType::REPEATED) { return std::make_unique( TypeFactory::create(child->type()), @@ -532,14 +538,15 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( // If the MAP_KEY_VALUE annotated group's parent is a MAP, it should // be the repeated key_value group that directly contains the key and // value children. - if (schema[parentSchemaIdx].converted_type == - thrift::ConvertedType::MAP) { + if (schema[parentSchemaIdx].converted_type() && + *schema[parentSchemaIdx].converted_type() == + thrift::ConvertedType::MAP) { // TODO: the group names need to be checked. According to the spec, // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps // the name of the schema element being 'key_value' is // also an indication of this is a map type VELOX_CHECK_EQ( - schemaElement.repetition_type, + *schemaElement.repetition_type(), thrift::FieldRepetitionType::REPEATED); VELOX_CHECK_EQ(children.size(), 2); @@ -595,14 +602,15 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( default: VELOX_UNREACHABLE( "Invalid SchemaElement converted_type: {}, name: {}", - schemaElement.converted_type, + *schemaElement.converted_type(), name); } } else { - if (schemaElement.repetition_type == - thrift::FieldRepetitionType::REPEATED) { - if (schema[parentSchemaIdx].converted_type == - thrift::ConvertedType::LIST) { + if (schemaElement.repetition_type() && + *schemaElement.repetition_type() == + thrift::FieldRepetitionType::REPEATED) { + auto converted_type = schema[parentSchemaIdx].converted_type(); + if (converted_type && *converted_type == thrift::ConvertedType::LIST) { // TODO: the group names need to be checked. According to spec, // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists // the name of the schema element being 'array' is @@ -610,7 +618,7 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( // child of LIST VELOX_CHECK_GE(children.size(), 1); if (children.size() == 1 && name != "array" && - name != schema[parentSchemaIdx].name + "_tuple") { + name != *schema[parentSchemaIdx].name() + "_tuple") { auto type = TypeFactory::create(children[0]->type()); return std::make_unique( @@ -676,10 +684,9 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( return res; } } else if ( - schema[parentSchemaIdx].converted_type == - thrift::ConvertedType::MAP || - schema[parentSchemaIdx].converted_type == - thrift::ConvertedType::MAP_KEY_VALUE) { + converted_type && + (*converted_type == thrift::ConvertedType::MAP || + *converted_type == thrift::ConvertedType::MAP_KEY_VALUE)) { // children of MAP VELOX_CHECK_EQ(children.size(), 2); auto type = TypeFactory::create( @@ -761,21 +768,14 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( } else { // leaf node name = columnNames.at(curSchemaIdx); const auto veloxType = convertType(schemaElement, requestedType); - int32_t precision = - schemaElement.__isset.precision ? schemaElement.precision : 0; - int32_t scale = schemaElement.__isset.scale ? schemaElement.scale : 0; - int32_t type_length = - schemaElement.__isset.type_length ? schemaElement.type_length : 0; + int32_t precision = schemaElement.precision().value_or(0); + int32_t scale = schemaElement.scale().value_or(0); + int32_t type_length = schemaElement.type_length().value_or(0); std::vector> children; const std::optional logicalType_ = - schemaElement.__isset.logicalType - ? std::optional(schemaElement.logicalType) - : std::nullopt; - const std::optional convertedType = - schemaElement.__isset.converted_type - ? std::optional( - schemaElement.converted_type) - : std::nullopt; + schemaElement.logicalType().to_optional(); + const std::optional convertedType = + schemaElement.converted_type().to_optional(); auto leafTypePtr = std::make_unique( veloxType, @@ -784,7 +784,7 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( maxSchemaElementIdx, columnIdx++, name, - schemaElement.type, + *schemaElement.type(), logicalType_, convertedType, maxRepeat, @@ -795,7 +795,7 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( scale, type_length); - if (schemaElement.repetition_type == + if (apache::thrift::can_throw(*schemaElement.repetition_type()) == thrift::FieldRepetitionType::REPEATED) { // Array children.clear(); @@ -826,28 +826,27 @@ std::unique_ptr ReaderBase::getParquetColumnInfo( TypePtr ReaderBase::convertType( const thrift::SchemaElement& schemaElement, const TypePtr& requestedType) const { - VELOX_CHECK(schemaElement.__isset.type && schemaElement.num_children == 0); VELOX_CHECK( - schemaElement.type != thrift::Type::FIXED_LEN_BYTE_ARRAY || - schemaElement.__isset.type_length, + schemaElement.type() && schemaElement.num_children().value_or(0) == 0); + VELOX_CHECK( + *schemaElement.type() != thrift::Type::FIXED_LEN_BYTE_ARRAY || + schemaElement.type_length(), "FIXED_LEN_BYTE_ARRAY requires length to be set"); static constexpr const char* kTypeMappingErrorFmtStr = "Converted type {} is not allowed for requested type {}"; - - const bool isRepeated = schemaElement.__isset.repetition_type && - schemaElement.repetition_type == thrift::FieldRepetitionType::REPEATED; + const bool isRepeated = schemaElement.repetition_type() && + *schemaElement.repetition_type() == thrift::FieldRepetitionType::REPEATED; const bool allowNarrowing = options_.allowInt32Narrowing(); - - if (schemaElement.__isset.converted_type) { - switch (schemaElement.converted_type) { + if (schemaElement.converted_type()) { + switch (*schemaElement.converted_type()) { case thrift::ConvertedType::INT_8: case thrift::ConvertedType::UINT_8: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT32, "{} converted type can only be set for value of thrift::Type::INT32", - schemaElement.converted_type); + *schemaElement.converted_type()); VELOX_CHECK( !requestedType || isCompatible( @@ -865,10 +864,10 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::INT_16: case thrift::ConvertedType::UINT_16: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT32, "{} converted type can only be set for value of thrift::Type::INT32", - schemaElement.converted_type); + *schemaElement.converted_type()); VELOX_CHECK( !requestedType || isCompatible( @@ -886,10 +885,10 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::INT_32: case thrift::ConvertedType::UINT_32: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT32, "{} converted type can only be set for value of thrift::Type::INT32", - schemaElement.converted_type); + *schemaElement.converted_type()); VELOX_CHECK( !requestedType || isCompatible( @@ -907,10 +906,10 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::INT_64: case thrift::ConvertedType::UINT_64: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT64, "{} converted type can only be set for value of thrift::Type::INT64", - schemaElement.converted_type); + *schemaElement.converted_type()); VELOX_CHECK( !requestedType || isCompatible(requestedType, isRepeated, isInt64Compatible), @@ -921,7 +920,7 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::DATE: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT32, "DATE converted type can only be set for value of thrift::Type::INT32"); VELOX_CHECK( @@ -938,7 +937,7 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::TIMESTAMP_MICROS: case thrift::ConvertedType::TIMESTAMP_MILLIS: VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::INT64, "TIMESTAMP_MICROS or TIMESTAMP_MILLIS converted type can only be set for value of thrift::Type::INT64"); VELOX_CHECK( @@ -956,10 +955,10 @@ TypePtr ReaderBase::convertType( case thrift::ConvertedType::DECIMAL: { VELOX_CHECK( - schemaElement.__isset.precision && schemaElement.__isset.scale, + schemaElement.precision() && schemaElement.scale(), "DECIMAL requires a length and scale specifier!"); - const auto schemaElementPrecision = schemaElement.precision; - const auto schemaElementScale = schemaElement.scale; + const auto schemaElementPrecision = *schemaElement.precision(); + const auto schemaElementScale = *schemaElement.scale(); auto type = DECIMAL(schemaElementPrecision, schemaElementScale); if (requestedType) { VELOX_CHECK( @@ -992,7 +991,7 @@ TypePtr ReaderBase::convertType( } case thrift::ConvertedType::UTF8: - switch (schemaElement.type) { + switch (*schemaElement.type()) { case thrift::Type::BYTE_ARRAY: case thrift::Type::FIXED_LEN_BYTE_ARRAY: VELOX_CHECK( @@ -1013,7 +1012,7 @@ TypePtr ReaderBase::convertType( } case thrift::ConvertedType::ENUM: { VELOX_CHECK_EQ( - schemaElement.type, + *schemaElement.type(), thrift::Type::BYTE_ARRAY, "ENUM converted type can only be set for value of thrift::Type::BYTE_ARRAY"); VELOX_CHECK( @@ -1031,8 +1030,8 @@ TypePtr ReaderBase::convertType( } case thrift::ConvertedType::TIME_MILLIS: VELOX_CHECK_EQ( - schemaElement.type, - thrift::Type::INT32, + static_cast(*schemaElement.type()), + static_cast(thrift::Type::INT32), "TIME_MILLIS converted type can only be set for value of thrift::Type::INT32"); VELOX_CHECK( !requestedType || @@ -1057,11 +1056,11 @@ TypePtr ReaderBase::convertType( default: VELOX_FAIL( "Unsupported Parquet SchemaElement converted type: {}", - schemaElement.converted_type); + *schemaElement.converted_type()); } } else { - switch (schemaElement.type) { - case thrift::Type::type::BOOLEAN: + switch (*schemaElement.type()) { + case thrift::Type::BOOLEAN: VELOX_CHECK( !requestedType || isCompatible( @@ -1074,7 +1073,7 @@ TypePtr ReaderBase::convertType( "BOOLEAN", requestedType->toString()); return BOOLEAN(); - case thrift::Type::type::INT32: + case thrift::Type::INT32: VELOX_CHECK( !requestedType || isCompatible( @@ -1088,10 +1087,11 @@ TypePtr ReaderBase::convertType( "INTEGER", requestedType->toString()); return INTEGER(); - case thrift::Type::type::INT64: + case thrift::Type::INT64: // For Int64 Timestamp in nano precision - if (schemaElement.__isset.logicalType && - schemaElement.logicalType.__isset.TIMESTAMP) { + if (schemaElement.logicalType() && + schemaElement.logicalType()->getType() == + thrift::LogicalType::Type::TIMESTAMP) { VELOX_CHECK( !requestedType || isCompatible( @@ -1112,7 +1112,7 @@ TypePtr ReaderBase::convertType( "BIGINT", requestedType->toString()); return BIGINT(); - case thrift::Type::type::INT96: + case thrift::Type::INT96: VELOX_CHECK( !requestedType || isCompatible( @@ -1125,7 +1125,7 @@ TypePtr ReaderBase::convertType( "TIMESTAMP", requestedType->toString()); return TIMESTAMP(); // INT96 only maps to a timestamp - case thrift::Type::type::FLOAT: + case thrift::Type::FLOAT: VELOX_CHECK( !requestedType || isCompatible( @@ -1139,7 +1139,7 @@ TypePtr ReaderBase::convertType( "REAL", requestedType->toString()); return REAL(); - case thrift::Type::type::DOUBLE: + case thrift::Type::DOUBLE: VELOX_CHECK( !requestedType || isCompatible( @@ -1152,8 +1152,8 @@ TypePtr ReaderBase::convertType( "DOUBLE", requestedType->toString()); return DOUBLE(); - case thrift::Type::type::BYTE_ARRAY: - case thrift::Type::type::FIXED_LEN_BYTE_ARRAY: + case thrift::Type::BYTE_ARRAY: + case thrift::Type::FIXED_LEN_BYTE_ARRAY: if (requestedType && isCompatible(requestedType, isRepeated, [](const TypePtr& type) { return type->isVarchar(); @@ -1174,7 +1174,8 @@ TypePtr ReaderBase::convertType( default: VELOX_FAIL( - "Unknown Parquet SchemaElement type: {}", schemaElement.type); + "Unknown Parquet SchemaElement type: {}", + static_cast(*schemaElement.type())); } } } @@ -1220,12 +1221,20 @@ int64_t ReaderBase::rowGroupUncompressedSize( int32_t rowGroupIndex, const dwio::common::TypeWithId& type) const { if (type.column() != ParquetTypeWithId::kNonLeaf) { - VELOX_CHECK_LT(rowGroupIndex, fileMetaData_->row_groups.size()); + VELOX_CHECK_LT(rowGroupIndex, fileMetaData_->row_groups()->size()); VELOX_CHECK_LT( - type.column(), fileMetaData_->row_groups[rowGroupIndex].columns.size()); - return fileMetaData_->row_groups[rowGroupIndex] - .columns[type.column()] - .meta_data.total_uncompressed_size; + type.column(), + apache::thrift::can_throw(*fileMetaData_->row_groups())[rowGroupIndex] + .columns() + ->size()); + return apache::thrift::can_throw( + *apache::thrift::can_throw( + *fileMetaData_->row_groups())[rowGroupIndex] + .columns() + .value()[type.column()] + .meta_data()) + .total_uncompressed_size() + .value(); } int64_t sum = 0; for (auto child : type.getChildren()) { @@ -1246,7 +1255,7 @@ class ParquetRowReader::Impl { : pool_{readerBase->getMemoryPool()}, readerBase_{readerBase}, options_{options}, - rowGroups_{readerBase_->thriftFileMetaData().row_groups}, + rowGroups_{*readerBase_->thriftFileMetaData().row_groups()}, nextRowGroupIdsIdx_{0}, currentRowGroupPtr_{nullptr}, rowsInCurrentRowGroup_{0}, @@ -1308,20 +1317,29 @@ class ParquetRowReader::Impl { uint64_t rowNumber = 0; for (auto i = 0; i < rowGroups_.size(); i++) { - VELOX_CHECK_GT(rowGroups_[i].columns.size(), 0); + VELOX_CHECK_GT(rowGroups_[i].columns()->size(), 0); auto fileOffset = - (rowGroups_[i].__isset.file_offset && rowGroups_[i].file_offset != 0) - ? rowGroups_[i].file_offset - : rowGroups_[i].columns[0].meta_data.__isset.dictionary_page_offset - ? rowGroups_[i].columns[0].meta_data.dictionary_page_offset - : rowGroups_[i].columns[0].meta_data.data_page_offset; + (rowGroups_[i].file_offset() && + apache::thrift::can_throw(*rowGroups_[i].file_offset()) != 0) + ? apache::thrift::can_throw(*rowGroups_[i].file_offset()) + : apache::thrift::can_throw( + rowGroups_[i].columns().value()[0].meta_data()) + ->dictionary_page_offset() + ? apache::thrift::can_throw( + *apache::thrift::can_throw( + rowGroups_[i].columns().value()[0].meta_data()) + ->dictionary_page_offset()) + : apache::thrift::can_throw( + *apache::thrift::can_throw( + rowGroups_[i].columns().value()[0].meta_data()) + ->data_page_offset()); VELOX_CHECK_GT(fileOffset, 0); auto rowGroupInRange = (fileOffset >= options_.offset() && fileOffset < options_.limit()); auto isExcluded = (i < res.totalCount && bits::isBitSet(res.filterResult.data(), i)); - auto isEmpty = rowGroups_[i].num_rows == 0; + auto isEmpty = apache::thrift::can_throw(*rowGroups_[i].num_rows()) == 0; // Add a row group to read if it is within range and not empty and not in // the excluded list. @@ -1333,14 +1351,14 @@ class ParquetRowReader::Impl { // Clear the metadata of row groups that are not read. This helps // reduce the memory consumption. ColumnChunks consume the most // memory. Skip the 0th RowGroup as it is used by estimatedRowSize(). - rowGroups_[i].columns.clear(); + rowGroups_[i].columns()->clear(); } if (rowGroupInRange) { skippedStrides_++; } } - rowNumber += rowGroups_[i].num_rows; + rowNumber += *rowGroups_[i].num_rows(); } } @@ -1394,7 +1412,7 @@ class ParquetRowReader::Impl { } estimatedRowSize_ = readerBase_->rowGroupUncompressedSize( index, *readerBase_->schemaWithId()) / - rowGroups_[index].num_rows; + *rowGroups_[index].num_rows(); lastRowGroupWithRowEstimate_ = index; return estimatedRowSize_; } @@ -1426,7 +1444,7 @@ class ParquetRowReader::Impl { nextRowGroupIdsIdx_, static_cast(*columnReader_)); currentRowGroupPtr_ = &rowGroups_[rowGroupIds_[nextRowGroupIdsIdx_]]; - rowsInCurrentRowGroup_ = currentRowGroupPtr_->num_rows; + rowsInCurrentRowGroup_ = *currentRowGroupPtr_->num_rows(); currentRowInGroup_ = 0; nextRowGroupIdsIdx_++; columnReader_->seekToRowGroup(nextRowGroupIndex); @@ -1508,7 +1526,7 @@ ParquetReader::ParquetReader( : readerBase_(std::make_shared(std::move(input), options)) {} std::optional ParquetReader::numberOfRows() const { - return readerBase_->thriftFileMetaData().num_rows; + return *readerBase_->thriftFileMetaData().num_rows(); } std::unique_ptr ParquetReader::columnStatistics( diff --git a/velox/dwio/parquet/reader/ParquetStatsContext.h b/velox/dwio/parquet/reader/ParquetStatsContext.h index ab3a9364e32..529485e88a9 100644 --- a/velox/dwio/parquet/reader/ParquetStatsContext.h +++ b/velox/dwio/parquet/reader/ParquetStatsContext.h @@ -18,7 +18,7 @@ #include "velox/dwio/common/Statistics.h" #include "velox/dwio/parquet/reader/SemanticVersion.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" namespace facebook::velox::parquet { @@ -29,7 +29,7 @@ struct ParquetStatsContext : dwio::common::StatsContext { ParquetStatsContext(const std::optional& version) : parquetVersion(version) {} - bool shouldIgnoreStatistics(thrift::Type::type type) const { + bool shouldIgnoreStatistics(thrift::Type type) const { if (!parquetVersion.has_value()) { return true; } diff --git a/velox/dwio/parquet/reader/ParquetTypeWithId.h b/velox/dwio/parquet/reader/ParquetTypeWithId.h index fda748becd1..57be3e639b1 100644 --- a/velox/dwio/parquet/reader/ParquetTypeWithId.h +++ b/velox/dwio/parquet/reader/ParquetTypeWithId.h @@ -18,7 +18,7 @@ #include "velox/dwio/common/TypeWithId.h" #include "velox/dwio/parquet/common/LevelConversion.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" namespace facebook::velox::parquet { @@ -41,9 +41,9 @@ class ParquetTypeWithId : public dwio::common::TypeWithId { uint32_t maxId, uint32_t column, std::string name, - std::optional parquetType, + std::optional parquetType, std::optional logicalType, - std::optional convertedType, + std::optional convertedType, uint32_t maxRepeat, uint32_t maxDefine, bool isOptional, @@ -84,9 +84,9 @@ class ParquetTypeWithId : public dwio::common::TypeWithId { const&&; const std::string name_; - const std::optional parquetType_; + const std::optional parquetType_; const std::optional logicalType_; - const std::optional convertedType_; + const std::optional convertedType_; const uint32_t maxRepeat_; const uint32_t maxDefine_; const bool isOptional_; diff --git a/velox/dwio/parquet/reader/SemanticVersion.cpp b/velox/dwio/parquet/reader/SemanticVersion.cpp index a7c1892f7cf..622ee416827 100644 --- a/velox/dwio/parquet/reader/SemanticVersion.cpp +++ b/velox/dwio/parquet/reader/SemanticVersion.cpp @@ -59,7 +59,7 @@ std::optional SemanticVersion::parse( } } -bool SemanticVersion::shouldIgnoreStatistics(thrift::Type::type type) const { +bool SemanticVersion::shouldIgnoreStatistics(thrift::Type type) const { if (type != thrift::Type::BYTE_ARRAY && type != thrift::Type::FIXED_LEN_BYTE_ARRAY) { return false; diff --git a/velox/dwio/parquet/reader/SemanticVersion.h b/velox/dwio/parquet/reader/SemanticVersion.h index 7ae18618bc8..86cdb293a87 100644 --- a/velox/dwio/parquet/reader/SemanticVersion.h +++ b/velox/dwio/parquet/reader/SemanticVersion.h @@ -16,7 +16,7 @@ #pragma once -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include #include @@ -34,7 +34,7 @@ class SemanticVersion { static std::optional parse(const std::string& input); - bool shouldIgnoreStatistics(thrift::Type::type type) const; + bool shouldIgnoreStatistics(thrift::Type type) const; std::string toString() const; diff --git a/velox/dwio/parquet/reader/TimeColumnReader.h b/velox/dwio/parquet/reader/TimeColumnReader.h index a6ae23ae202..4321b8b1975 100644 --- a/velox/dwio/parquet/reader/TimeColumnReader.h +++ b/velox/dwio/parquet/reader/TimeColumnReader.h @@ -18,7 +18,7 @@ #include "velox/dwio/parquet/reader/IntegerColumnReader.h" #include "velox/dwio/parquet/reader/ParquetColumnReader.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" namespace facebook::velox::parquet { @@ -36,14 +36,14 @@ class TimeColumnReader : public IntegerColumnReader { const auto typeWithId = std::static_pointer_cast(fileType_); if (auto logicalType = typeWithId->logicalType_) { - VELOX_CHECK(logicalType->__isset.TIME); - const auto unit = logicalType->TIME.unit; + VELOX_CHECK(logicalType->getType() == thrift::LogicalType::Type::TIME); + const auto unit = logicalType->get_TIME().unit(); VELOX_CHECK( - unit.__isset.MILLIS, + unit->getType() == thrift::TimeUnit::Type::MILLIS, "TIME precision other than milliseconds is not supported"); } else if (auto convertedType = typeWithId->convertedType_) { VELOX_CHECK( - convertedType == thrift::ConvertedType::type::TIME_MILLIS, + convertedType == thrift::ConvertedType::TIME_MILLIS, "TIME converted type other than TIME_MILLIS is not supported"); } else { VELOX_NYI("Logical type and converted type are not provided for TIME."); diff --git a/velox/dwio/parquet/reader/TimestampColumnReader.h b/velox/dwio/parquet/reader/TimestampColumnReader.h index 6ee0cf7fc5e..197b159523b 100644 --- a/velox/dwio/parquet/reader/TimestampColumnReader.h +++ b/velox/dwio/parquet/reader/TimestampColumnReader.h @@ -18,7 +18,7 @@ #include "velox/dwio/parquet/reader/IntegerColumnReader.h" #include "velox/dwio/parquet/reader/ParquetColumnReader.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" namespace facebook::velox::parquet { namespace { @@ -96,22 +96,22 @@ class TimestampColumnReader : public IntegerColumnReader { const auto typeWithId = std::static_pointer_cast(fileType_); if (auto logicalType = typeWithId->logicalType_) { - VELOX_CHECK(logicalType->__isset.TIMESTAMP); - const auto unit = logicalType->TIMESTAMP.unit; - if (unit.__isset.MILLIS) { + VELOX_CHECK( + logicalType->getType() == thrift::LogicalType::Type::TIMESTAMP); + const auto& unit = logicalType->get_TIMESTAMP().unit(); + if (unit->getType() == thrift::TimeUnit::Type::MILLIS) { filePrecision_ = TimestampPrecision::kMilliseconds; - } else if (unit.__isset.MICROS) { + } else if (unit->getType() == thrift::TimeUnit::Type::MICROS) { filePrecision_ = TimestampPrecision::kMicroseconds; - } else if (unit.__isset.NANOS) { + } else if (unit->getType() == thrift::TimeUnit::Type::NANOS) { filePrecision_ = TimestampPrecision::kNanoseconds; } else { VELOX_UNREACHABLE(); } } else if (auto convertedType = typeWithId->convertedType_) { - if (convertedType == thrift::ConvertedType::type::TIMESTAMP_MILLIS) { + if (convertedType == thrift::ConvertedType::TIMESTAMP_MILLIS) { filePrecision_ = TimestampPrecision::kMilliseconds; - } else if ( - convertedType == thrift::ConvertedType::type::TIMESTAMP_MICROS) { + } else if (convertedType == thrift::ConvertedType::TIMESTAMP_MICROS) { filePrecision_ = TimestampPrecision::kMicroseconds; } else { VELOX_UNREACHABLE(); diff --git a/velox/dwio/parquet/tests/CMakeLists.txt b/velox/dwio/parquet/tests/CMakeLists.txt index de1431e8866..26313ecd44d 100644 --- a/velox/dwio/parquet/tests/CMakeLists.txt +++ b/velox/dwio/parquet/tests/CMakeLists.txt @@ -29,7 +29,6 @@ set( add_subdirectory(common) add_subdirectory(reader) -add_subdirectory(thrift) add_subdirectory(writer) add_executable(velox_dwio_parquet_tpch_test ParquetTpchTest.cpp) diff --git a/velox/dwio/parquet/tests/common/CMakeLists.txt b/velox/dwio/parquet/tests/common/CMakeLists.txt index 19cf296e68f..6f9607fee77 100644 --- a/velox/dwio/parquet/tests/common/CMakeLists.txt +++ b/velox/dwio/parquet/tests/common/CMakeLists.txt @@ -19,7 +19,6 @@ target_link_libraries( velox_dwio_parquet_common_test velox_dwio_parquet_common arrow - thrift velox_link_libs velox_exec GTest::gtest diff --git a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp index e5ef4e5e294..a2c7acae8c4 100644 --- a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp +++ b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp @@ -869,11 +869,11 @@ TEST_F(E2EFilterTest, writeDecimalAsInteger) { auto types = parquetReader.typeWithId()->getChildren(); auto c0 = std::dynamic_pointer_cast(types[0]); - EXPECT_EQ(c0->parquetType_.value(), thrift::Type::type::INT32); + EXPECT_EQ(c0->parquetType_.value(), thrift::Type::INT32); auto c1 = std::dynamic_pointer_cast(types[1]); - EXPECT_EQ(c1->parquetType_.value(), thrift::Type::type::INT64); + EXPECT_EQ(c1->parquetType_.value(), thrift::Type::INT64); auto c2 = std::dynamic_pointer_cast(types[2]); - EXPECT_EQ(c2->parquetType_.value(), thrift::Type::type::FIXED_LEN_BYTE_ARRAY); + EXPECT_EQ(c2->parquetType_.value(), thrift::Type::FIXED_LEN_BYTE_ARRAY); } TEST_F(E2EFilterTest, configurableWriteSchema) { diff --git a/velox/dwio/parquet/tests/reader/ParquetPageReaderTest.cpp b/velox/dwio/parquet/tests/reader/ParquetPageReaderTest.cpp index bda28a60ee1..2ebe7aa9d9e 100644 --- a/velox/dwio/parquet/tests/reader/ParquetPageReaderTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetPageReaderTest.cpp @@ -16,11 +16,10 @@ #include "velox/dwio/parquet/reader/PageReader.h" -#include -#include +#include #include "velox/common/base/tests/GTestUtils.h" #include "velox/dwio/parquet/tests/ParquetTestBase.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" using namespace facebook::velox; using namespace facebook::velox::common; @@ -44,17 +43,17 @@ TEST_F(ParquetPageReaderTest, smallPage) { headerSize, stats); auto header = pageReader->readPageHeader(); - EXPECT_EQ(header.type, thrift::PageType::type::DATA_PAGE); - EXPECT_EQ(header.uncompressed_page_size, 16950); - EXPECT_EQ(header.compressed_page_size, 10759); - EXPECT_EQ(header.data_page_header.num_values, 21738); + EXPECT_EQ(*header.type(), thrift::PageType::DATA_PAGE); + EXPECT_EQ(*header.uncompressed_page_size(), 16950); + EXPECT_EQ(*header.compressed_page_size(), 10759); + EXPECT_EQ(*header.data_page_header()->num_values(), 21738); // expectedMinValue: "aaaa...aaaa" std::string expectedMinValue(39, 'a'); // expectedMaxValue: "zzzz...zzzz" std::string expectedMaxValue(49, 'z'); - auto minValue = header.data_page_header.statistics.min_value; - auto maxValue = header.data_page_header.statistics.max_value; + auto minValue = *header.data_page_header()->statistics()->min_value(); + auto maxValue = *header.data_page_header()->statistics()->max_value(); EXPECT_EQ(minValue, expectedMinValue); EXPECT_EQ(maxValue, expectedMaxValue); EXPECT_GT(stats.pageLoadTimeNs.sum(), 0); @@ -76,17 +75,17 @@ TEST_F(ParquetPageReaderTest, largePage) { stats); auto header = pageReader->readPageHeader(); - EXPECT_EQ(header.type, thrift::PageType::type::DATA_PAGE); - EXPECT_EQ(header.uncompressed_page_size, 1050822); - EXPECT_EQ(header.compressed_page_size, 66759); - EXPECT_EQ(header.data_page_header.num_values, 970); + EXPECT_EQ(*header.type(), thrift::PageType::DATA_PAGE); + EXPECT_EQ(*header.uncompressed_page_size(), 1050822); + EXPECT_EQ(*header.compressed_page_size(), 66759); + EXPECT_EQ(*header.data_page_header()->num_values(), 970); // expectedMinValue: "aaaa...aaaa" std::string expectedMinValue(1295, 'a'); // expectedMinValue: "zzzz...zzzz" std::string expectedMaxValue(2255, 'z'); - auto minValue = header.data_page_header.statistics.min_value; - auto maxValue = header.data_page_header.statistics.max_value; + auto minValue = *header.data_page_header()->statistics()->min_value(); + auto maxValue = *header.data_page_header()->statistics()->max_value(); EXPECT_EQ(minValue, expectedMinValue); EXPECT_EQ(maxValue, expectedMaxValue); EXPECT_GT(stats.pageLoadTimeNs.sum(), 0); @@ -122,8 +121,80 @@ TEST(CompressionOptionsTest, testCompressionOptions) { dwio::common::compression::Compressor::PARQUET_ZLIB_WINDOW_BITS); } +namespace { + +// Helper to serialize a PageHeader using Thrift compact protocol. +std::string serializePageHeader(const thrift::PageHeader& header) { + return apache::thrift::CompactSerializer::serialize(header); +} + +// Helper to create a DATA_PAGE header with specified sizes. +thrift::PageHeader createDataPageV1Header( + int32_t uncompressedSize, + int32_t compressedSize, + int32_t numValues) { + thrift::PageHeader header; + header.type() = thrift::PageType::DATA_PAGE; + header.uncompressed_page_size() = uncompressedSize; + header.compressed_page_size() = compressedSize; + + thrift::DataPageHeader dataHeader; + dataHeader.num_values() = numValues; + dataHeader.encoding() = thrift::Encoding::PLAIN; + dataHeader.definition_level_encoding() = thrift::Encoding::RLE; + dataHeader.repetition_level_encoding() = thrift::Encoding::RLE; + header.data_page_header() = dataHeader; + + return header; +} + +// Helper to create a DATA_PAGE_V2 header with specified sizes. +thrift::PageHeader createDataPageV2Header( + int32_t uncompressedSize, + int32_t compressedSize, + int32_t numValues, + int32_t definitionLevelsByteLength, + int32_t repetitionLevelsByteLength) { + thrift::PageHeader header; + header.type() = thrift::PageType::DATA_PAGE_V2; + header.uncompressed_page_size() = uncompressedSize; + header.compressed_page_size() = compressedSize; + + thrift::DataPageHeaderV2 dataHeader; + dataHeader.num_values() = numValues; + dataHeader.num_nulls() = 0; + dataHeader.num_rows() = numValues; + dataHeader.encoding() = thrift::Encoding::PLAIN; + dataHeader.definition_levels_byte_length() = definitionLevelsByteLength; + dataHeader.repetition_levels_byte_length() = repetitionLevelsByteLength; + dataHeader.is_compressed() = false; + header.data_page_header_v2() = dataHeader; + + return header; +} + +/// Helper to create a DICTIONARY_PAGE header with specified sizes. +thrift::PageHeader createDictionaryPageHeader( + int32_t uncompressedSize, + int32_t compressedSize, + int32_t numValues) { + thrift::PageHeader header; + header.type() = thrift::PageType::DICTIONARY_PAGE; + header.uncompressed_page_size() = uncompressedSize; + header.compressed_page_size() = compressedSize; + + thrift::DictionaryPageHeader dictPageHeader; + dictPageHeader.num_values() = numValues; + dictPageHeader.encoding() = thrift::Encoding::PLAIN; + header.dictionary_page_header() = dictPageHeader; + + return header; +} + +} // namespace + // Test that prepareDictionary rejects FIXED_LEN_BYTE_ARRAY dictionary pages -// where the Parquet type length exceeds the Velox type length. This guards +// where the Parquet type length exceeds the Velox type length. This guards // against heap buffer overflow from malicious Parquet files that have a patched // precision (e.g. decimal128 with typeLength=16 but precision lowered to make // Velox choose int64_t with cppSizeInBytes=8). @@ -137,21 +208,9 @@ TEST_F(ParquetPageReaderTest, fixedLenByteArrayDictOverflow) { constexpr int32_t kDictPageSize = kNumDictValues * kParquetTypeLength; // Create a DICTIONARY_PAGE header. - thrift::PageHeader dictHeader; - dictHeader.__set_type(thrift::PageType::DICTIONARY_PAGE); - dictHeader.__set_uncompressed_page_size(kDictPageSize); - dictHeader.__set_compressed_page_size(kDictPageSize); - thrift::DictionaryPageHeader dictPageHeader; - dictPageHeader.__set_num_values(kNumDictValues); - dictPageHeader.__set_encoding(thrift::Encoding::PLAIN); - dictHeader.__set_dictionary_page_header(dictPageHeader); - - auto transport = std::make_shared(); - apache::thrift::protocol::TCompactProtocolT< - apache::thrift::transport::TMemoryBuffer> - protocol(transport); - dictHeader.write(&protocol); - std::string dictHeaderBytes = transport->getBufferAsString(); + auto dictHeader = + createDictionaryPageHeader(kDictPageSize, kDictPageSize, kNumDictValues); + std::string dictHeaderBytes = serializePageHeader(dictHeader); // Dictionary page data (content doesn't matter, check fires before read). std::string dictPageData(kDictPageSize, '\0'); @@ -160,23 +219,17 @@ TEST_F(ParquetPageReaderTest, fixedLenByteArrayDictOverflow) { // dictionary page. constexpr int32_t kDataPageSize = 8; thrift::PageHeader dataHeader; - dataHeader.__set_type(thrift::PageType::DATA_PAGE); - dataHeader.__set_uncompressed_page_size(kDataPageSize); - dataHeader.__set_compressed_page_size(kDataPageSize); + dataHeader.type() = thrift::PageType::DATA_PAGE; + dataHeader.uncompressed_page_size() = kDataPageSize; + dataHeader.compressed_page_size() = kDataPageSize; thrift::DataPageHeader dataPageHeader; - dataPageHeader.__set_num_values(1); - dataPageHeader.__set_encoding(thrift::Encoding::RLE_DICTIONARY); - dataPageHeader.__set_definition_level_encoding(thrift::Encoding::RLE); - dataPageHeader.__set_repetition_level_encoding(thrift::Encoding::RLE); - dataHeader.__set_data_page_header(dataPageHeader); - - auto transport2 = - std::make_shared(); - apache::thrift::protocol::TCompactProtocolT< - apache::thrift::transport::TMemoryBuffer> - protocol2(transport2); - dataHeader.write(&protocol2); - std::string dataHeaderBytes = transport2->getBufferAsString(); + dataPageHeader.num_values() = 1; + dataPageHeader.encoding() = thrift::Encoding::RLE_DICTIONARY; + dataPageHeader.definition_level_encoding() = thrift::Encoding::RLE; + dataPageHeader.repetition_level_encoding() = thrift::Encoding::RLE; + dataHeader.data_page_header() = dataPageHeader; + + std::string dataHeaderBytes = serializePageHeader(dataHeader); std::string dataPageData(kDataPageSize, '\0'); @@ -223,65 +276,30 @@ TEST_F(ParquetPageReaderTest, fixedLenByteArrayDictOverflow) { VELOX_ASSERT_THROW(pageReader->skip(1), ""); } -namespace { - -// Helper to serialize a PageHeader using Thrift compact protocol. -std::string serializePageHeader(const thrift::PageHeader& header) { - auto transport = std::make_shared(); - apache::thrift::protocol::TCompactProtocolT< - apache::thrift::transport::TMemoryBuffer> - protocol(transport); - header.write(&protocol); - return transport->getBufferAsString(); -} - -// Helper to create a DATA_PAGE header with specified sizes. -thrift::PageHeader createDataPageV1Header( - int32_t uncompressedSize, - int32_t compressedSize, - int32_t numValues) { - thrift::PageHeader header; - header.__set_type(thrift::PageType::DATA_PAGE); - header.__set_uncompressed_page_size(uncompressedSize); - header.__set_compressed_page_size(compressedSize); - - thrift::DataPageHeader dataHeader; - dataHeader.__set_num_values(numValues); - dataHeader.__set_encoding(thrift::Encoding::PLAIN); - dataHeader.__set_definition_level_encoding(thrift::Encoding::RLE); - dataHeader.__set_repetition_level_encoding(thrift::Encoding::RLE); - header.__set_data_page_header(dataHeader); - - return header; -} - -// Helper to create a DATA_PAGE_V2 header with specified sizes. -thrift::PageHeader createDataPageV2Header( - int32_t uncompressedSize, - int32_t compressedSize, - int32_t numValues, - int32_t definitionLevelsByteLength, - int32_t repetitionLevelsByteLength) { - thrift::PageHeader header; - header.__set_type(thrift::PageType::DATA_PAGE_V2); - header.__set_uncompressed_page_size(uncompressedSize); - header.__set_compressed_page_size(compressedSize); - - thrift::DataPageHeaderV2 dataHeader; - dataHeader.__set_num_values(numValues); - dataHeader.__set_num_nulls(0); - dataHeader.__set_num_rows(numValues); - dataHeader.__set_encoding(thrift::Encoding::PLAIN); - dataHeader.__set_definition_levels_byte_length(definitionLevelsByteLength); - dataHeader.__set_repetition_levels_byte_length(repetitionLevelsByteLength); - dataHeader.__set_is_compressed(false); - header.__set_data_page_header_v2(dataHeader); +// Example test demonstrating proper FBThrift dictionary page creation. +// This serves as a reference for converting any OSS-specific tests. +TEST_F(ParquetPageReaderTest, dictionaryPageExample) { + constexpr int32_t kDictPageSize = 100; + constexpr int32_t kNumDictValues = 10; + + // Create dictionary page header using FBThrift API + auto dictHeader = + createDictionaryPageHeader(kDictPageSize, kDictPageSize, kNumDictValues); + + // Verify the header was created correctly + EXPECT_EQ(*dictHeader.type(), thrift::PageType::DICTIONARY_PAGE); + EXPECT_EQ(*dictHeader.uncompressed_page_size(), kDictPageSize); + EXPECT_EQ(*dictHeader.compressed_page_size(), kDictPageSize); + EXPECT_EQ(*dictHeader.dictionary_page_header()->num_values(), kNumDictValues); + EXPECT_EQ( + *dictHeader.dictionary_page_header()->encoding(), + thrift::Encoding::PLAIN); - return header; + // Serialize using FBThrift CompactSerializer + std::string serialized = serializePageHeader(dictHeader); + EXPECT_GT(serialized.size(), 0); } -} // namespace - // Test that prepareDataPageV1 rejects pages with defineLength exceeding page // size. This guards against heap buffer overflow from corrupt Parquet files. TEST_F(ParquetPageReaderTest, corruptDefineLengthV1) { diff --git a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp index bc4e1a710f2..abd9143c389 100644 --- a/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp +++ b/velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp @@ -19,7 +19,7 @@ #include "velox/dwio/parquet/reader/ParquetStatsContext.h" #include "velox/dwio/parquet/reader/SemanticVersion.h" #include "velox/dwio/parquet/tests/ParquetTestBase.h" -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include "velox/expression/ExprToSubfieldFilter.h" #include "velox/vector/tests/utils/VectorMaker.h" diff --git a/velox/dwio/parquet/tests/thrift/CMakeLists.txt b/velox/dwio/parquet/tests/thrift/CMakeLists.txt deleted file mode 100644 index 65f89cc6935..00000000000 --- a/velox/dwio/parquet/tests/thrift/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -add_executable(velox_dwio_parquet_thrift_test ThriftTransportTest.cpp) - -add_test(velox_dwio_parquet_thrift_test velox_dwio_parquet_thrift_test) -target_link_libraries( - velox_dwio_parquet_thrift_test - arrow - thrift - velox_link_libs - GTest::gtest - GTest::gtest_main -) diff --git a/velox/dwio/parquet/tests/thrift/ThriftTransportTest.cpp b/velox/dwio/parquet/tests/thrift/ThriftTransportTest.cpp deleted file mode 100644 index 489b721aff6..00000000000 --- a/velox/dwio/parquet/tests/thrift/ThriftTransportTest.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/dwio/parquet/thrift/ThriftTransport.h" -#include -#include - -using namespace facebook::velox; -using namespace facebook::velox::dwio::common; -using namespace facebook::velox::parquet::thrift; - -class ThriftTransportTest : public testing::Test { - protected: - void SetUp() override { - input_.resize(bufferSize_); - output_.resize(bufferSize_); - for (size_t i = 0; i < input_.size(); ++i) { - input_[i] = static_cast(i); - } - } - - void prepareThriftStreamingTransport() { - inputStream_ = std::make_shared( - input_.data(), input_.size(), 20); - int32_t batchSize_; - const void* bufferPointer; - if (!inputStream_->Next(&bufferPointer, &batchSize_)) { - VELOX_CHECK(false, "Reading past end"); - } - bufferStart_ = static_cast(bufferPointer); - bufferEnd_ = bufferStart_ + batchSize_; - transport_ = std::make_shared( - inputStream_.get(), bufferStart_, bufferEnd_); - } - - void prepareThriftBufferedTransport() { - transport_ = - std::make_shared(input_.data(), bufferSize_); - } - - static constexpr uint32_t bufferSize_ = 200; - static constexpr uint32_t batchSize_ = 20; - std::vector input_; - std::vector output_; - const char* bufferStart_{nullptr}; - const char* bufferEnd_{nullptr}; - std::shared_ptr inputStream_; - std::shared_ptr transport_; -}; - -TEST_F(ThriftTransportTest, streaming) { - prepareThriftStreamingTransport(); - transport_->read(output_.data(), 10); - transport_->read(output_.data() + 10, 50); - transport_->read(output_.data() + 60, 140); - - for (size_t i = 0; i < input_.size(); ++i) { - VELOX_CHECK_EQ(input_[i], output_[i]); - } -} - -TEST_F(ThriftTransportTest, streamingOutOfBoundry) { - prepareThriftStreamingTransport(); - transport_->read(output_.data(), 10); - transport_->read(output_.data() + 10, 50); - transport_->read(output_.data() + 60, 140); - - // The whole inputStream_ is consumed. - EXPECT_ANY_THROW(transport_->read(output_.data() + bufferSize_, 1)); -} - -TEST_F(ThriftTransportTest, buffered) { - prepareThriftBufferedTransport(); - transport_->read(output_.data(), 10); - transport_->read(output_.data() + 10, 50); - transport_->read(output_.data() + 60, 140); - - for (size_t i = 0; i < input_.size(); ++i) { - VELOX_CHECK_EQ(input_[i], output_[i]); - } -} - -TEST_F(ThriftTransportTest, bufferedOutOfBoundry) { - prepareThriftStreamingTransport(); - transport_->read(output_.data(), 10); - transport_->read(output_.data() + 10, 50); - transport_->read(output_.data() + 60, 140); - - // The whole inputStream_ is consumed. - EXPECT_ANY_THROW(transport_->read(output_.data() + bufferSize_, 1)); -} - -// Define main so that gflags get processed. -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - folly::Init init{&argc, &argv, false}; - return RUN_ALL_TESTS(); -} diff --git a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp index 6a0a0c18cc1..976b28bc6d6 100644 --- a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp +++ b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp @@ -214,15 +214,15 @@ TEST_F(ParquetWriterTest, dictionaryEncodingWithDictionaryPageSize) { testEnableDictionaryAndDictionaryPageSizeToGetPageHeader( defaultConfigFromFile, defaultSessionPropertiesFromFile, true); // We use the default version of data page (V1) - EXPECT_EQ(defaultHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*defaultHeader.type(), thrift::PageType::DATA_PAGE); // Dictionary encoding is enabled as default EXPECT_EQ( - defaultHeader.data_page_header.encoding, + *defaultHeader.data_page_header()->encoding(), thrift::Encoding::RLE_DICTIONARY); // Default dictionary page size is 1MB (same as data page size), so it can // contain a dictionary for all values. So all data will be in the first // data page - EXPECT_EQ(defaultHeader.data_page_header.num_values, kRows); + EXPECT_EQ(*defaultHeader.data_page_header()->num_values(), kRows); // Test normal config @@ -252,9 +252,10 @@ TEST_F(ParquetWriterTest, dictionaryEncodingWithDictionaryPageSize) { normalConfigFromFile, normalSessionProperties, false); // We use the default version of data page (V1) - EXPECT_EQ(normalHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*normalHeader.type(), thrift::PageType::DATA_PAGE); // The second data page will fall back to PLAIN encoding - EXPECT_EQ(normalHeader.data_page_header.encoding, thrift::Encoding::PLAIN); + EXPECT_EQ( + *normalHeader.data_page_header()->encoding(), thrift::Encoding::PLAIN); // Test incorrect enable dictionary config @@ -338,14 +339,15 @@ TEST_F(ParquetWriterTest, dictionaryEncodingOff) { withoutPageSizeConfigFromFile, withoutPageSizeSessionProperties); // We use the default version of data page (V1) - EXPECT_EQ(withoutPageSizeHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*withoutPageSizeHeader.type(), thrift::PageType::DATA_PAGE); // Since we turn off the dictionary encoding, and the default data page size // is 1MB, there is only one page, and its encoding should be PLAIN, which // means the configuration is applied EXPECT_EQ( - withoutPageSizeHeader.data_page_header.encoding, thrift::Encoding::PLAIN); + *withoutPageSizeHeader.data_page_header()->encoding(), + thrift::Encoding::PLAIN); // All rows will be on the only data page, this is a sanity check - EXPECT_EQ(withoutPageSizeHeader.data_page_header.num_values, kRows); + EXPECT_EQ(*withoutPageSizeHeader.data_page_header()->num_values(), kRows); // Test dictionary off but with dictionary page size configured @@ -370,10 +372,11 @@ TEST_F(ParquetWriterTest, dictionaryEncodingOff) { // Should be the same as without dictionary page size configured, because // when the dictionary is disabled, the dictionary page silze is meaningless - EXPECT_EQ(withPageSizeHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*withPageSizeHeader.type(), thrift::PageType::DATA_PAGE); EXPECT_EQ( - withPageSizeHeader.data_page_header.encoding, thrift::Encoding::PLAIN); - EXPECT_EQ(withPageSizeHeader.data_page_header.num_values, kRows); + *withPageSizeHeader.data_page_header()->encoding(), + thrift::Encoding::PLAIN); + EXPECT_EQ(*withPageSizeHeader.data_page_header()->num_values(), kRows); } TEST_F(ParquetWriterTest, compression) { @@ -448,16 +451,17 @@ TEST_F(ParquetWriterTest, testPageSizeAndBatchSizeConfiguration) { const auto defaultHeader = testPageSizeAndBatchSizeToGetPageHeader( defaultConfigFromFile, defaultSessionPropertiesFromFile); // We use the default version of data page (V1) - EXPECT_EQ(defaultHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*defaultHeader.type(), thrift::PageType::DATA_PAGE); // We don't use compressor here EXPECT_EQ( - defaultHeader.uncompressed_page_size, defaultHeader.compressed_page_size); + *defaultHeader.uncompressed_page_size(), + defaultHeader.compressed_page_size()); // The default page size is 1MB, which can actually contains all data in one // page - EXPECT_EQ(defaultHeader.compressed_page_size, 17529); + EXPECT_EQ(*defaultHeader.compressed_page_size(), 17529); // As mentioned above, the default page size can contain all data in one page // so the number of values of the first page equals to the total number - EXPECT_EQ(defaultHeader.data_page_header.num_values, kRows); + EXPECT_EQ(*defaultHeader.data_page_header()->num_values(), kRows); // Test normal config @@ -479,14 +483,15 @@ TEST_F(ParquetWriterTest, testPageSizeAndBatchSizeConfiguration) { const auto normalHeader = testPageSizeAndBatchSizeToGetPageHeader( normalConfigFromFile, normalSessionProperties); // We use the default version of data page (V1) - EXPECT_EQ(normalHeader.type, thrift::PageType::type::DATA_PAGE); + EXPECT_EQ(*normalHeader.type(), thrift::PageType::DATA_PAGE); // We don't use compressor here EXPECT_EQ( - normalHeader.uncompressed_page_size, normalHeader.compressed_page_size); + *normalHeader.uncompressed_page_size(), + *normalHeader.compressed_page_size()); // 1485B < 2KB < 1MB, which means the page size is applied (default is 1MB) - EXPECT_EQ(normalHeader.compressed_page_size, 1485); + EXPECT_EQ(*normalHeader.compressed_page_size(), 1485); // 1067 % 97 == 0, which means the batch size is applied (default is 1024) - EXPECT_EQ(normalHeader.data_page_header.num_values, 1067); + EXPECT_EQ(*normalHeader.data_page_header()->num_values(), 1067); // Test incorrect page size config @@ -545,14 +550,15 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { // (thrift::PageType::type) used. const auto testDataPageVersion = [&](std::unordered_map configFromFile, - std::unordered_map sessionProperties) { - auto* sinkPtr = write( - data, std::move(configFromFile), std::move(sessionProperties)); - return readPageHeader(sinkPtr, 0).type; - }; + std::unordered_map sessionProperties) + -> thrift::PageType { + auto* sinkPtr = + write(data, std::move(configFromFile), std::move(sessionProperties)); + return *readPageHeader(sinkPtr, 0).type_ref(); + }; // Test default behavior - DataPage should be V1. - ASSERT_EQ(testDataPageVersion({}, {}), thrift::PageType::type::DATA_PAGE); + ASSERT_EQ(testDataPageVersion({}, {}), thrift::PageType::DATA_PAGE); // Simulate setting DataPage version to V2 via Hive config from file. std::unordered_map configFromFile = { @@ -561,8 +567,7 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { "V2"}}; ASSERT_EQ( - testDataPageVersion(configFromFile, {}), - thrift::PageType::type::DATA_PAGE_V2); + testDataPageVersion(configFromFile, {}), thrift::PageType::DATA_PAGE_V2); // Simulate setting DataPage version to V1 via Hive config from file. configFromFile = { @@ -571,8 +576,7 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { "V1"}}; ASSERT_EQ( - testDataPageVersion(configFromFile, {}), - thrift::PageType::type::DATA_PAGE); + testDataPageVersion(configFromFile, {}), thrift::PageType::DATA_PAGE); // Simulate setting DataPage version to V2 via connector session property. std::unordered_map sessionProperties = { @@ -580,14 +584,13 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { ASSERT_EQ( testDataPageVersion({}, sessionProperties), - thrift::PageType::type::DATA_PAGE_V2); + thrift::PageType::DATA_PAGE_V2); // Simulate setting DataPage version to V1 via connector session property. sessionProperties = {{parquet::WriterOptions::kParquetDataPageVersion, "V1"}}; ASSERT_EQ( - testDataPageVersion({}, sessionProperties), - thrift::PageType::type::DATA_PAGE); + testDataPageVersion({}, sessionProperties), thrift::PageType::DATA_PAGE); // Simulate setting DataPage version to V1 via connector session property, // and to V2 via Hive config from file. Session property should take @@ -599,8 +602,7 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { "V2"}}; ASSERT_EQ( - testDataPageVersion({}, sessionProperties), - thrift::PageType::type::DATA_PAGE); + testDataPageVersion({}, sessionProperties), thrift::PageType::DATA_PAGE); // Simulate setting DataPage version to V2 via connector session property, // and to V1 via Hive config from file. Session property should take @@ -613,7 +615,7 @@ TEST_F(ParquetWriterTest, toggleDataPageVersion) { ASSERT_EQ( testDataPageVersion({}, sessionProperties), - thrift::PageType::type::DATA_PAGE_V2); + thrift::PageType::DATA_PAGE_V2); } DEBUG_ONLY_TEST_F(ParquetWriterTest, unitFromWriterOptions) { diff --git a/velox/dwio/parquet/thrift/CMakeLists.txt b/velox/dwio/parquet/thrift/CMakeLists.txt index 57aed73c01a..0bb6a9c74ea 100644 --- a/velox/dwio/parquet/thrift/CMakeLists.txt +++ b/velox/dwio/parquet/thrift/CMakeLists.txt @@ -12,11 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -velox_add_library( +include(FBThriftCppLibrary) + +add_fbthrift_cpp_library(velox_dwio_parquet_thrift_raw parquet.thrift) +add_library(velox_dwio_parquet_thrift INTERFACE) +target_link_libraries(velox_dwio_parquet_thrift INTERFACE velox_dwio_parquet_thrift_raw) +velox_add_test_headers( velox_dwio_parquet_thrift - ParquetThriftTypes.cpp - HEADERS - ParquetThriftTypes.h - ThriftTransport.h + CompactV1ProtocolReaderWithRefill.h + ParquetThrift.h ) -velox_link_libraries(velox_dwio_parquet_thrift arrow thrift Boost::headers fmt::fmt) +if(VELOX_MONO_LIBRARY) + target_link_libraries(velox velox_dwio_parquet_thrift_raw) +endif() + +velox_install_library_headers() +get_target_property(generated_headers velox_dwio_parquet_thrift_raw PUBLIC_HEADER) +get_target_property(header_install_dir velox_dwio_parquet_thrift_raw HEADER_INSTALL_DIR) +install(FILES ${generated_headers} DESTINATION ${header_install_dir}) diff --git a/velox/dwio/parquet/thrift/CompactV1ProtocolReaderWithRefill.h b/velox/dwio/parquet/thrift/CompactV1ProtocolReaderWithRefill.h new file mode 100644 index 00000000000..2dea4c674a6 --- /dev/null +++ b/velox/dwio/parquet/thrift/CompactV1ProtocolReaderWithRefill.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace apache::thrift { + +using VirtualCompactV1Reader = + ProtocolReaderWithRefill; + +// Note this is a copy of the CompactProtocolReaderWithRefill with a change to +// the template of the actual protocol reader to use CompactV1ProtocolReader. +// All other code is identical. +class CompactV1ProtocolReaderWithRefill : public VirtualCompactV1Reader { + public: + explicit CompactV1ProtocolReaderWithRefill(Refiller refiller) + : VirtualCompactV1Reader(std::move(refiller)) {} + + inline void readMessageBegin( + std::string& /* name */, + MessageType& /* messageType */, + int32_t& /* seqid */) override { + // Only called in python so leave it unimplemented. + throw std::runtime_error("not implemented"); + } + + inline void readFieldBegin( + std::string& name, + TType& fieldType, + int16_t& fieldId) override { + ensureFieldBegin(); + protocol_.readFieldBegin(name, fieldType, fieldId); + } + + inline void readMapBegin(TType& keyType, TType& valType, uint32_t& size) + override { + ensureMapBegin(); + protocol_.readMapBegin(keyType, valType, size); + } + + inline void readListBegin(TType& elemType, uint32_t& size) override { + ensureListBegin(); + protocol_.readListBegin(elemType, size); + } + + inline void readBool(bool& value) override { + if (!protocol_.boolValue_.hasBoolValue) { + ensureBuffer(1); + } + protocol_.readBool(value); + } + + inline void readBool(std::vector::reference value) override { + bool ret = false; + readBool(ret); + value = ret; + } + + inline void readByte(int8_t& byte) override { + ensureBuffer(1); + protocol_.readByte(byte); + } + + inline void readI16(int16_t& i16) override { + ensureInteger(); + protocol_.readI16(i16); + } + + inline void readI32(int32_t& i32) override { + ensureInteger(); + protocol_.readI32(i32); + } + + inline void readI64(int64_t& i64) override { + ensureInteger(); + protocol_.readI64(i64); + } + + inline void readDouble(double& dub) override { + ensureBuffer(8); + protocol_.readDouble(dub); + } + + inline void readFloat(float& flt) override { + ensureBuffer(4); + protocol_.readFloat(flt); + } + + inline void readString(std::string& str) override { + readStringImpl(str); + } + + inline void readString(folly::fbstring& str) override { + readStringImpl(str); + } + + inline void readBinary(std::string& str) override { + readStringImpl(str); + } + + inline void readBinary(folly::fbstring& str) override { + readStringImpl(str); + } + + inline void readBinary(apache::thrift::detail::SkipNoopString& str) override { + readStringImpl(str); + } + + inline void readBinary(std::unique_ptr& str) override { + readBinaryIOBufImpl(str); + } + + inline void readBinary(folly::IOBuf& str) override { + readBinaryIOBufImpl(str); + } + + inline void skip(TType type, int depth = 0) override { + apache::thrift::skip(*this, type, depth); + } + + inline void skipBytes(size_t bytes) override { + ensureBuffer(bytes); + protocol_.skipBytes(bytes); + } + + private: + /** + * Make sure a varint can be read from the current buffer after idx bytes. + * If not, call the refiller to read more bytes. + * + * A varint is stored with up to 10 bytes and only the last byte's + * MSB isn't set. If the current buffer size is >= idx + 10, return. The + * following call to readVarint may still fail if the first 10 bytes + * all have MSB set, but it's not the problem to be addressed here. + * + * Otherwise, check if a byte with MSB not set can be found. If so, return. + * Otherwise, call the refiller to ask for 1 more byte because the exact + * size of the varint is still unknown but at least 1 more byte is required. + * A sane transport reads more data even if asked for just 1 byte so this + * should not cause any performance problem. After the new buffer is ready, + * start all over again. + **/ + void ensureInteger(size_t idx = 0) { + while (protocol_.in_.length() - idx < 10) { + if (protocol_.in_.length() <= idx) { + ensureBuffer(idx + 1); + } else { + auto avail = protocol_.in_.peekBytes(); + const uint8_t* b = avail.data() + idx; + while (idx < avail.size()) { + if (!(*b++ & 0x80)) { + return; + } + idx++; + } + + ensureBuffer(avail.size() + 1); + } + } + } + + void ensureFieldBegin() { + // Fast path: at most 4 bytes are needed to read field begin. + if (protocol_.in_.length() >= 4) { + return; + } + + // At least 1 byte is needed to read ftype. + ensureBuffer(1); + if (protocol_.in_.length() >= 4) { + return; + } + auto avail = protocol_.in_.peekBytes(); + const uint8_t* b = avail.data(); + int8_t byte = folly::Endian::big(*b); + int8_t type = (byte & 0x0f); + + if (type == TType::T_STOP) { + return; + } + + int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4); + if (modifier == 0) { + ensureInteger(1); + } + } + + void ensureMapBegin() { + // Fast path: at most 11 bytes are needed to read map begin. + if (protocol_.in_.length() >= 11) { + return; + } + + ensureInteger(); + if (protocol_.in_.length() >= 11) { + return; + } + + auto avail = protocol_.in_.peekBytes(); + const uint8_t* b = avail.data(); + size_t bytes = 1; + while (bytes <= avail.size()) { + if (!(*b++ & 0x80)) { + break; + } + bytes++; + } + // Non-empty maps have an additional byte for the key/value type. + if (bytes == avail.size() && *avail.data()) { + ensureBuffer(avail.size() + 1); + } + } + + void ensureListBegin() { + // Fast path: at most 11 bytes are needed to read list begin. + if (protocol_.in_.length() >= 11) { + return; + } + + ensureBuffer(1); + auto avail = protocol_.in_.peekBytes(); + const uint8_t* b = avail.data(); + int8_t size_and_type = folly::Endian::big(*b); + int32_t lsize = ((uint8_t)size_and_type >> 4) & 0x0f; + if (lsize == 15) { + ensureInteger(1); + } + } + + template + void readStringImpl(StrType& str) { + ensureInteger(); + int32_t size = 0; + protocol_.readStringSize(size); + + ensureBuffer(size); + protocol_.readStringBody(str, size); + } + + template + void readBinaryIOBufImpl(StrType& str) { + ensureInteger(); + int32_t size = 0; + protocol_.readStringSize(size); + + ensureBuffer(size); + protocol_.in_.clone(str, size); + } +}; + +template <> +inline bool canReadNElements( + CompactV1ProtocolReaderWithRefill& /* prot */, + uint32_t /* n */, + std::initializer_list /* types */) { + return true; +} + +} // namespace apache::thrift diff --git a/velox/dwio/parquet/thrift/ParquetThrift.h b/velox/dwio/parquet/thrift/ParquetThrift.h new file mode 100644 index 00000000000..96d867d4a35 --- /dev/null +++ b/velox/dwio/parquet/thrift/ParquetThrift.h @@ -0,0 +1,336 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "velox/common/base/Exceptions.h" +#include "velox/dwio/common/SeekableInputStream.h" +#include "velox/dwio/parquet/thrift/CompactV1ProtocolReaderWithRefill.h" +#include "velox/dwio/parquet/thrift/gen-cpp2/parquet_types.h" +#include "velox/dwio/parquet/thrift/gen-cpp2/parquet_types_custom_protocol.h" + +namespace facebook::velox::parquet::thrift { +template < + typename Enum, + bool IsEnum = + std::is_same_v::type, Enum>> +fmt::underlying_t format_as(Enum value) { + return fmt::underlying(value); +} + +template < + typename Enum, + bool IsEnum = + std::is_same_v::type, Enum>> +std::ostream& operator<<(std::ostream& os, const Enum& value) { + std::string_view name; + if (apache::thrift::TEnumTraits::findName(value, &name)) { + os.write(name.data(), name.size()); + } else { + os << static_cast(value); + } + return os; +} + +template +unsigned long deserialize(ThriftStruct* thriftStruct, std::string_view data) { + apache::thrift::CompactV1ProtocolReader reader; + folly::IOBuf buffer( + folly::IOBuf::WRAP_BUFFER, + folly::ByteRange( + reinterpret_cast(data.data()), data.size())); + reader.setInput(&buffer); + try { + return thriftStruct->read(&reader); + } catch (apache::thrift::protocol::TProtocolException& e) { + VELOX_FAIL("Thrift deserialize error: {}", e.what()); + } +} + +struct DeserializeResult { + unsigned long readBytes; + const uint8_t* remainedData; + size_t remainedDataBytes; + uint64_t readUs; + // Holds the last buffer read from the refiller to keep remainedData valid. + std::unique_ptr lastBuffer; + // Track if we consumed data from the initial buffer or needed refills. + bool usedRefiller; + // If we used the refiller, store the actual stream position data. + const void* streamData; + int32_t streamDataBytes; +}; + +struct StreamReader { + facebook::velox::dwio::common::SeekableInputStream* input; + uint64_t& totalReadUs; + const void*& lastStreamData; + int32_t& lastStreamDataBytes; + + bool readNext(const void** data, int32_t* dataBytes) { + bool haveData; + uint64_t readUs{0}; + { + MicrosecondTimer timer(&readUs); + haveData = input->Next(data, dataBytes); + } + totalReadUs += readUs; + // Track the last data read from stream + if (haveData) { + lastStreamData = *data; + lastStreamDataBytes = *dataBytes; + } + return haveData; + } +}; + +// Ensures we have initial data to start deserialization. +// If no initial data is provided, reads from the stream. +// Returns pair of (data pointer, size). +inline std::pair ensureInitialData( + StreamReader& reader, + const uint8_t* initialData, + size_t initialDataBytes) { + if (initialDataBytes > 0) { + return {initialData, initialDataBytes}; + } + + const void* buffer; + int32_t size; + reader.readNext(&buffer, &size); + return {reinterpret_cast(buffer), static_cast(size)}; +} + +inline size_t calculateConsumedBytes( + bool usedRefiller, + size_t readBytes, + int32_t totalBytesReadBeforeRefill, + const uint8_t* coalescedBufferStart, + size_t coalescedBufferSize, + const uint8_t* remainedData) { + if (!usedRefiller) { + return readBytes; + } + + if (!coalescedBufferStart || coalescedBufferSize == 0) { + return readBytes; + } + + const auto coalescedEnd = coalescedBufferStart + coalescedBufferSize; + VELOX_CHECK( + remainedData >= coalescedBufferStart && remainedData < coalescedEnd, + "Cursor not in coalesced buffer range"); + + size_t bytesConsumedFromCoalesced = remainedData - coalescedBufferStart; + + return totalBytesReadBeforeRefill + bytesConsumedFromCoalesced; +} + +// Manages buffer refilling for Thrift deserialization with +// CompactProtocolReaderWithRefill. Ensures all deserialized data points to a +// single contiguous buffer by coalescing unconsumed bytes with newly read data. +// +// When the protocol reader needs more data, this refiller: +// 1. Reads new data from the stream +// 2. Creates a contiguous buffer containing unconsumed bytes + new data +// 3. Continues reading until requested bytes are available +// +// The coalesced buffer is necessary because Thrift deserialization may create +// pointers into the buffer that must remain valid throughout deserialization. +// +// Tracks metrics to calculate total bytes consumed from the stream: +// - totalBytesReadBeforeRefill: Bytes consumed from initial buffer +// - currentDataBytesInRefill: Unconsumed bytes when refiller was called +// - coalescedBufferStart/Size: Address range of the coalesced buffer +class ThriftStreamRefiller { + public: + ThriftStreamRefiller( + StreamReader& streamReader, + bool& usedRefiller, + int32_t& totalBytesReadBeforeRefill, + int32_t& currentDataBytesInRefill, + const uint8_t*& coalescedBufferStart, + size_t& coalescedBufferSize, + std::unique_ptr& lastRefillBuffer) + : streamReader_(streamReader), + usedRefiller_(usedRefiller), + totalBytesReadBeforeRefill_(totalBytesReadBeforeRefill), + currentDataBytesInRefill_(currentDataBytesInRefill), + coalescedBufferStart_(coalescedBufferStart), + coalescedBufferSize_(coalescedBufferSize), + lastRefillBuffer_(lastRefillBuffer) {} + + std::unique_ptr operator()( + const uint8_t* currentData, + int32_t currentDataBytes, + int32_t totalBytesRead, + int32_t requestedBytes) { + usedRefiller_ = true; + totalBytesReadBeforeRefill_ = totalBytesRead; + currentDataBytesInRefill_ = currentDataBytes; + + const void* data; + int32_t dataBytes{0}; + if (!streamReader_.readNext(&data, &dataBytes) || dataBytes == 0) { + // Return nullptr to signal end of stream + return nullptr; + } + + auto coalescedBuffer = createCoalescedBuffer( + currentData, currentDataBytes, data, dataBytes, requestedBytes); + + coalescedBufferStart_ = coalescedBuffer->data(); + coalescedBufferSize_ = coalescedBuffer->length(); + + lastRefillBuffer_ = std::move(coalescedBuffer); + return lastRefillBuffer_->clone(); + } + + private: + static void appendToContiguousBuffer( + folly::IOBuf* buffer, + const void* data, + size_t dataBytes) { + buffer->reserve(0, dataBytes); + memcpy(buffer->writableTail(), data, dataBytes); + buffer->append(dataBytes); + } + + // Creates a contiguous buffer that includes: + // 1. The unconsumed bytes from currentData + // 2. The new data just read from the stream + // 3. Additional data read until requestedBytes is satisfied + // This ensures all deserialized data points to a single stable buffer. + std::unique_ptr createCoalescedBuffer( + const uint8_t* currentData, + int32_t currentDataBytes, + const void* initialData, + int32_t initialDataBytes, + int32_t requestedBytes) { + std::unique_ptr coalescedBuffer; + size_t totalSize = currentDataBytes + initialDataBytes; + + if (currentDataBytes > 0) { + coalescedBuffer = folly::IOBuf::copyBuffer(currentData, currentDataBytes); + appendToContiguousBuffer( + coalescedBuffer.get(), initialData, initialDataBytes); + } else { + coalescedBuffer = folly::IOBuf::copyBuffer(initialData, initialDataBytes); + } + + while (totalSize < requestedBytes) { + const void* data = nullptr; + int32_t dataBytes = 0; + if (!streamReader_.readNext(&data, &dataBytes) || dataBytes == 0) { + break; + } + + appendToContiguousBuffer(coalescedBuffer.get(), data, dataBytes); + totalSize += dataBytes; + } + + return coalescedBuffer; + } + + StreamReader& streamReader_; + bool& usedRefiller_; + int32_t& totalBytesReadBeforeRefill_; + int32_t& currentDataBytesInRefill_; + const uint8_t*& coalescedBufferStart_; + size_t& coalescedBufferSize_; + std::unique_ptr& lastRefillBuffer_; +}; + +template +DeserializeResult deserialize( + ThriftStruct* thriftStruct, + facebook::velox::dwio::common::SeekableInputStream* input, + const uint8_t* initialData, + size_t initialDataBytes) { + uint64_t totalReadUs{0}; + std::unique_ptr lastRefillBuffer; + bool usedRefiller = false; + const void* lastStreamData = initialData; + int totalBytesReadBeforeRefill = 0; + int currentDataBytesInRefill = 0; + const uint8_t* coalescedBufferStart = nullptr; + size_t coalescedBufferSize = 0; + + VELOX_CHECK_LE(initialDataBytes, std::numeric_limits::max()); + int32_t lastStreamDataBytes = initialDataBytes; + + StreamReader streamReader{ + input, totalReadUs, lastStreamData, lastStreamDataBytes}; + + auto [data, size] = + ensureInitialData(streamReader, initialData, initialDataBytes); + initialData = data; + initialDataBytes = size; + + ThriftStreamRefiller refiller( + streamReader, + usedRefiller, + totalBytesReadBeforeRefill, + currentDataBytesInRefill, + coalescedBufferStart, + coalescedBufferSize, + lastRefillBuffer); + + apache::thrift::CompactV1ProtocolReaderWithRefill reader(std::ref(refiller)); + folly::IOBuf initialBuffer( + folly::IOBuf::WRAP_BUFFER, initialData, initialDataBytes); + + reader.setInput(&initialBuffer); + try { + DeserializeResult result; + result.readBytes = thriftStruct->read(&reader); + + auto cursor = reader.getCursor(); + result.remainedData = cursor.data(); + result.remainedDataBytes = cursor.length(); + result.readUs = totalReadUs; + result.lastBuffer = std::move(lastRefillBuffer); + result.usedRefiller = usedRefiller; + result.streamData = lastStreamData; + result.streamDataBytes = lastStreamDataBytes; + + result.readBytes = calculateConsumedBytes( + usedRefiller, + result.readBytes, + totalBytesReadBeforeRefill, + coalescedBufferStart, + coalescedBufferSize, + result.remainedData); + + return result; + } catch (const std::exception& e) { + VELOX_FAIL("Thrift deserialize error: {}", e.what()); + } +} + +template +uint32_t serialize( + const ThriftStruct& thriftStruct, + folly::IOBufQueue* buffer) { + apache::thrift::CompactV1ProtocolWriter writer; + writer.setOutput(buffer); + return thriftStruct.write(&writer); +} +}; // namespace facebook::velox::parquet::thrift diff --git a/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp b/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp deleted file mode 100644 index 674c99300f7..00000000000 --- a/velox/dwio/parquet/thrift/ParquetThriftTypes.cpp +++ /dev/null @@ -1,7638 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Autogenerated by Thrift Compiler (0.14.1) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#include "ParquetThriftTypes.h" - -#include -#include - -#include - -namespace facebook::velox::parquet::thrift { - -int _kTypeValues[] = { - Type::BOOLEAN, - Type::INT32, - Type::INT64, - Type::INT96, - Type::FLOAT, - Type::DOUBLE, - Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY}; -const char* _kTypeNames[] = { - "BOOLEAN", - "INT32", - "INT64", - "INT96", - "FLOAT", - "DOUBLE", - "BYTE_ARRAY", - "FIXED_LEN_BYTE_ARRAY"}; -const std::map _Type_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const Type::type& val) { - std::map::const_iterator it = - _Type_VALUES_TO_NAMES.find(val); - if (it != _Type_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const Type::type& val) { - std::map::const_iterator it = - _Type_VALUES_TO_NAMES.find(val); - if (it != _Type_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kConvertedTypeValues[] = { - /** - * a BYTE_ARRAY actually contains UTF8 encoded chars - */ - ConvertedType::UTF8, - /** - * a map is converted as an optional field containing a repeated key/value - * pair - */ - ConvertedType::MAP, - /** - * a key/value pair is converted into a group of two fields - */ - ConvertedType::MAP_KEY_VALUE, - /** - * a list is converted into an optional field containing a repeated field - * for its values - */ - ConvertedType::LIST, - /** - * an enum is converted into a binary field - */ - ConvertedType::ENUM, - /** - * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's - * complement using big-endian byte order (the most significant byte is the - * zeroth element). The value of the decimal is the value * 10^{-scale}. - * - * This must be accompanied by a (maximum) precision and a scale in the - * SchemaElement. The precision specifies the number of digits in the - * decimal and the scale stores the location of the decimal point. For - * example 1.23 would have precision 3 (3 total digits) and scale 2 (the - * decimal point is 2 digits over). - */ - ConvertedType::DECIMAL, - /** - * A Date - * - * Stored as days since Unix epoch, encoded as the INT32 physical type. - * - */ - ConvertedType::DATE, - /** - * A time - * - * The total number of milliseconds since midnight. The value is stored - * as an INT32 physical type. - */ - ConvertedType::TIME_MILLIS, - /** - * A time. - * - * The total number of microseconds since midnight. The value is stored as - * an INT64 physical type. - */ - ConvertedType::TIME_MICROS, - /** - * A date/time combination - * - * Date and time recorded as milliseconds since the Unix epoch. Recorded as - * a physical type of INT64. - */ - ConvertedType::TIMESTAMP_MILLIS, - /** - * A date/time combination - * - * Date and time recorded as microseconds since the Unix epoch. The value - * is stored as an INT64 physical type. - */ - ConvertedType::TIMESTAMP_MICROS, - /** - * An unsigned integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - ConvertedType::UINT_8, - ConvertedType::UINT_16, - ConvertedType::UINT_32, - ConvertedType::UINT_64, - /** - * A signed integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - ConvertedType::INT_8, - ConvertedType::INT_16, - ConvertedType::INT_32, - ConvertedType::INT_64, - /** - * An embedded JSON document - * - * A JSON document embedded within a single UTF8 column. - */ - ConvertedType::JSON, - /** - * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. - */ - ConvertedType::BSON, - /** - * An interval of time - * - * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 - * This data is composed of three separate little endian unsigned - * integers. Each stores a component of a duration of time. The first - * integer identifies the number of months associated with the duration, - * the second identifies the number of days associated with the duration - * and the third identifies the number of milliseconds associated with - * the provided duration. This duration of time is independent of any - * particular timezone or date. - */ - ConvertedType::INTERVAL}; -const char* _kConvertedTypeNames[] = { - /** - * a BYTE_ARRAY actually contains UTF8 encoded chars - */ - "UTF8", - /** - * a map is converted as an optional field containing a repeated key/value - * pair - */ - "MAP", - /** - * a key/value pair is converted into a group of two fields - */ - "MAP_KEY_VALUE", - /** - * a list is converted into an optional field containing a repeated field - * for its values - */ - "LIST", - /** - * an enum is converted into a binary field - */ - "ENUM", - /** - * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's - * complement using big-endian byte order (the most significant byte is the - * zeroth element). The value of the decimal is the value * 10^{-scale}. - * - * This must be accompanied by a (maximum) precision and a scale in the - * SchemaElement. The precision specifies the number of digits in the - * decimal and the scale stores the location of the decimal point. For - * example 1.23 would have precision 3 (3 total digits) and scale 2 (the - * decimal point is 2 digits over). - */ - "DECIMAL", - /** - * A Date - * - * Stored as days since Unix epoch, encoded as the INT32 physical type. - * - */ - "DATE", - /** - * A time - * - * The total number of milliseconds since midnight. The value is stored - * as an INT32 physical type. - */ - "TIME_MILLIS", - /** - * A time. - * - * The total number of microseconds since midnight. The value is stored as - * an INT64 physical type. - */ - "TIME_MICROS", - /** - * A date/time combination - * - * Date and time recorded as milliseconds since the Unix epoch. Recorded as - * a physical type of INT64. - */ - "TIMESTAMP_MILLIS", - /** - * A date/time combination - * - * Date and time recorded as microseconds since the Unix epoch. The value - * is stored as an INT64 physical type. - */ - "TIMESTAMP_MICROS", - /** - * An unsigned integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - "UINT_8", - "UINT_16", - "UINT_32", - "UINT_64", - /** - * A signed integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - "INT_8", - "INT_16", - "INT_32", - "INT_64", - /** - * An embedded JSON document - * - * A JSON document embedded within a single UTF8 column. - */ - "JSON", - /** - * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. - */ - "BSON", - /** - * An interval of time - * - * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 - * This data is composed of three separate little endian unsigned - * integers. Each stores a component of a duration of time. The first - * integer identifies the number of months associated with the duration, - * the second identifies the number of days associated with the duration - * and the third identifies the number of milliseconds associated with - * the provided duration. This duration of time is independent of any - * particular timezone or date. - */ - "INTERVAL"}; -const std::map _ConvertedType_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator( - 22, - _kConvertedTypeValues, - _kConvertedTypeNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { - std::map::const_iterator it = - _ConvertedType_VALUES_TO_NAMES.find(val); - if (it != _ConvertedType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const ConvertedType::type& val) { - std::map::const_iterator it = - _ConvertedType_VALUES_TO_NAMES.find(val); - if (it != _ConvertedType_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kFieldRepetitionTypeValues[] = { - /** - * This field is required (can not be null) and each record has exactly 1 - * value. - */ - FieldRepetitionType::REQUIRED, - /** - * The field is optional (can be null) and each record has 0 or 1 values. - */ - FieldRepetitionType::OPTIONAL, - /** - * The field is repeated and can contain 0 or more values - */ - FieldRepetitionType::REPEATED}; -const char* _kFieldRepetitionTypeNames[] = { - /** - * This field is required (can not be null) and each record has exactly 1 - * value. - */ - "REQUIRED", - /** - * The field is optional (can be null) and each record has 0 or 1 values. - */ - "OPTIONAL", - /** - * The field is repeated and can contain 0 or more values - */ - "REPEATED"}; -const std::map _FieldRepetitionType_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator( - 3, - _kFieldRepetitionTypeValues, - _kFieldRepetitionTypeNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<( - std::ostream& out, - const FieldRepetitionType::type& val) { - std::map::const_iterator it = - _FieldRepetitionType_VALUES_TO_NAMES.find(val); - if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const FieldRepetitionType::type& val) { - std::map::const_iterator it = - _FieldRepetitionType_VALUES_TO_NAMES.find(val); - if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kEncodingValues[] = { - /** - * Default encoding. - * BOOLEAN - 1 bit per value. 0 is false; 1 is true. - * INT32 - 4 bytes per value. Stored as little-endian. - * INT64 - 8 bytes per value. Stored as little-endian. - * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. - * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. - * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. - * FIXED_LEN_BYTE_ARRAY - Just the bytes. - */ - Encoding::PLAIN, - /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded - * in the plain type. in a data page use RLE_DICTIONARY instead. in a - * Dictionary page use PLAIN instead - */ - Encoding::PLAIN_DICTIONARY, - /** - * Group packed run length encoding. Usable for definition/repetition levels - * encoding and Booleans (on one bit: 0 is false; 1 is true.) - */ - Encoding::RLE, - /** - * Bit packed encoding. This can only be used if the data has a known max - * width. Usable for definition/repetition levels encoding. - */ - Encoding::BIT_PACKED, - /** - * Delta encoding for integers. This can be used for int columns and works - * best on sorted data - */ - Encoding::DELTA_BINARY_PACKED, - /** - * Encoding for byte arrays to separate the length values and the data. The - * lengths are encoded using DELTA_BINARY_PACKED - */ - Encoding::DELTA_LENGTH_BYTE_ARRAY, - /** - * Incremental-encoded byte array. Prefix lengths are encoded using - * DELTA_BINARY_PACKED. Suffixes are stored as delta length byte arrays. - */ - Encoding::DELTA_BYTE_ARRAY, - /** - * Dictionary encoding: the ids are encoded using the RLE encoding - */ - Encoding::RLE_DICTIONARY, - /** - * Encoding for floating-point data. - * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding - * stream and the streams are concatenated. This itself does not reduce the - * size of the data but can lead to better compression afterwards. - */ - Encoding::BYTE_STREAM_SPLIT}; -const char* _kEncodingNames[] = { - /** - * Default encoding. - * BOOLEAN - 1 bit per value. 0 is false; 1 is true. - * INT32 - 4 bytes per value. Stored as little-endian. - * INT64 - 8 bytes per value. Stored as little-endian. - * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. - * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. - * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. - * FIXED_LEN_BYTE_ARRAY - Just the bytes. - */ - "PLAIN", - /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded - * in the plain type. in a data page use RLE_DICTIONARY instead. in a - * Dictionary page use PLAIN instead - */ - "PLAIN_DICTIONARY", - /** - * Group packed run length encoding. Usable for definition/repetition levels - * encoding and Booleans (on one bit: 0 is false; 1 is true.) - */ - "RLE", - /** - * Bit packed encoding. This can only be used if the data has a known max - * width. Usable for definition/repetition levels encoding. - */ - "BIT_PACKED", - /** - * Delta encoding for integers. This can be used for int columns and works - * best on sorted data - */ - "DELTA_BINARY_PACKED", - /** - * Encoding for byte arrays to separate the length values and the data. The - * lengths are encoded using DELTA_BINARY_PACKED - */ - "DELTA_LENGTH_BYTE_ARRAY", - /** - * Incremental-encoded byte array. Prefix lengths are encoded using - * DELTA_BINARY_PACKED. Suffixes are stored as delta length byte arrays. - */ - "DELTA_BYTE_ARRAY", - /** - * Dictionary encoding: the ids are encoded using the RLE encoding - */ - "RLE_DICTIONARY", - /** - * Encoding for floating-point data. - * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding - * stream and the streams are concatenated. This itself does not reduce the - * size of the data but can lead to better compression afterwards. - */ - "BYTE_STREAM_SPLIT"}; -const std::map _Encoding_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator(9, _kEncodingValues, _kEncodingNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { - std::map::const_iterator it = - _Encoding_VALUES_TO_NAMES.find(val); - if (it != _Encoding_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const Encoding::type& val) { - std::map::const_iterator it = - _Encoding_VALUES_TO_NAMES.find(val); - if (it != _Encoding_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kCompressionCodecValues[] = { - CompressionCodec::UNCOMPRESSED, - CompressionCodec::SNAPPY, - CompressionCodec::GZIP, - CompressionCodec::LZO, - CompressionCodec::BROTLI, - CompressionCodec::LZ4, - CompressionCodec::ZSTD, - CompressionCodec::LZ4_RAW}; -const char* _kCompressionCodecNames[] = { - "UNCOMPRESSED", - "SNAPPY", - "GZIP", - "LZO", - "BROTLI", - "LZ4", - "ZSTD", - "LZ4_RAW"}; -const std::map _CompressionCodec_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator( - 8, - _kCompressionCodecValues, - _kCompressionCodecNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { - std::map::const_iterator it = - _CompressionCodec_VALUES_TO_NAMES.find(val); - if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const CompressionCodec::type& val) { - std::map::const_iterator it = - _CompressionCodec_VALUES_TO_NAMES.find(val); - if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kPageTypeValues[] = { - PageType::DATA_PAGE, - PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2}; -const char* _kPageTypeNames[] = { - "DATA_PAGE", - "INDEX_PAGE", - "DICTIONARY_PAGE", - "DATA_PAGE_V2"}; -const std::map _PageType_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const PageType::type& val) { - std::map::const_iterator it = - _PageType_VALUES_TO_NAMES.find(val); - if (it != _PageType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const PageType::type& val) { - std::map::const_iterator it = - _PageType_VALUES_TO_NAMES.find(val); - if (it != _PageType_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -int _kBoundaryOrderValues[] = { - BoundaryOrder::UNORDERED, - BoundaryOrder::ASCENDING, - BoundaryOrder::DESCENDING}; -const char* _kBoundaryOrderNames[] = {"UNORDERED", "ASCENDING", "DESCENDING"}; -const std::map _BoundaryOrder_VALUES_TO_NAMES( - ::apache::thrift::TEnumIterator( - 3, - _kBoundaryOrderValues, - _kBoundaryOrderNames), - ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { - std::map::const_iterator it = - _BoundaryOrder_VALUES_TO_NAMES.find(val); - if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const BoundaryOrder::type& val) { - std::map::const_iterator it = - _BoundaryOrder_VALUES_TO_NAMES.find(val); - if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - -Statistics::~Statistics() noexcept {} - -void Statistics::__set_max(const std::string& val) { - this->max = val; - __isset.max = true; -} - -void Statistics::__set_min(const std::string& val) { - this->min = val; - __isset.min = true; -} - -void Statistics::__set_null_count(const int64_t val) { - this->null_count = val; - __isset.null_count = true; -} - -void Statistics::__set_distinct_count(const int64_t val) { - this->distinct_count = val; - __isset.distinct_count = true; -} - -void Statistics::__set_max_value(const std::string& val) { - this->max_value = val; - __isset.max_value = true; -} - -void Statistics::__set_min_value(const std::string& val) { - this->min_value = val; - __isset.min_value = true; -} -std::ostream& operator<<(std::ostream& out, const Statistics& obj) { - obj.printTo(out); - return out; -} - -uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += - oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += - oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin( - "null_count", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin( - "distinct_count", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin( - "max_value", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin( - "min_value", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(Statistics& a, Statistics& b) { - using ::std::swap; - swap(a.max, b.max); - swap(a.min, b.min); - swap(a.null_count, b.null_count); - swap(a.distinct_count, b.distinct_count); - swap(a.max_value, b.max_value); - swap(a.min_value, b.min_value); - swap(a.__isset, b.__isset); -} - -Statistics::Statistics(const Statistics& other0) { - max = other0.max; - min = other0.min; - null_count = other0.null_count; - distinct_count = other0.distinct_count; - max_value = other0.max_value; - min_value = other0.min_value; - __isset = other0.__isset; -} -Statistics& Statistics::operator=(const Statistics& other1) { - max = other1.max; - min = other1.min; - null_count = other1.null_count; - distinct_count = other1.distinct_count; - max_value = other1.max_value; - min_value = other1.min_value; - __isset = other1.__isset; - return *this; -} -void Statistics::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "Statistics("; - out << "max="; - (__isset.max ? (out << to_string(max)) : (out << "")); - out << ", " << "min="; - (__isset.min ? (out << to_string(min)) : (out << "")); - out << ", " << "null_count="; - (__isset.null_count ? (out << to_string(null_count)) : (out << "")); - out << ", " << "distinct_count="; - (__isset.distinct_count ? (out << to_string(distinct_count)) - : (out << "")); - out << ", " << "max_value="; - (__isset.max_value ? (out << to_string(max_value)) : (out << "")); - out << ", " << "min_value="; - (__isset.min_value ? (out << to_string(min_value)) : (out << "")); - out << ")"; -} - -StringType::~StringType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const StringType& obj) { - obj.printTo(out); - return out; -} - -uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(StringType& /*a*/, StringType& /*b*/) {} - -StringType::StringType(const StringType& /*other2*/) {} - -StringType& StringType::operator=(const StringType& /*other3*/) { - return *this; -} - -void StringType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "StringType("; - out << ")"; -} - -UUIDType::~UUIDType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj) { - obj.printTo(out); - return out; -} - -uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(UUIDType& /*a*/, UUIDType& /*b*/) {} - -UUIDType::UUIDType(const UUIDType& /*other4*/) {} - -UUIDType& UUIDType::operator=(const UUIDType& /*other5*/) { - return *this; -} - -void UUIDType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "UUIDType("; - out << ")"; -} - -MapType::~MapType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const MapType& obj) { - obj.printTo(out); - return out; -} - -uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MapType& /*a*/, MapType& /*b*/) {} - -MapType::MapType(const MapType& /*other6*/) {} - -MapType& MapType::operator=(const MapType& /*other7*/) { - return *this; -} - -void MapType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "MapType("; - out << ")"; -} - -ListType::~ListType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const ListType& obj) { - obj.printTo(out); - return out; -} - -uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ListType& /*a*/, ListType& /*b*/) {} - -ListType::ListType(const ListType& /*other8*/) {} - -ListType& ListType::operator=(const ListType& /*other9*/) { - return *this; -} - -void ListType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ListType("; - out << ")"; -} - -EnumType::~EnumType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const EnumType& obj) { - obj.printTo(out); - return out; -} - -uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EnumType& /*a*/, EnumType& /*b*/) {} - -EnumType::EnumType(const EnumType& /*other10*/) {} - -EnumType& EnumType::operator=(const EnumType& /*other11*/) { - return *this; -} - -void EnumType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "EnumType("; - out << ")"; -} - -DateType::~DateType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const DateType& obj) { - obj.printTo(out); - return out; -} - -uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DateType& /*a*/, DateType& /*b*/) {} - -DateType::DateType(const DateType& /*other12*/) {} - -DateType& DateType::operator=(const DateType& /*other13*/) { - return *this; -} - -void DateType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "DateType("; - out << ")"; -} - -NullType::~NullType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const NullType& obj) { - obj.printTo(out); - return out; -} - -uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NullType& /*a*/, NullType& /*b*/) {} - -NullType::NullType(const NullType& /*other14*/) {} - -NullType& NullType::operator=(const NullType& /*other15*/) { - return *this; -} - -void NullType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "NullType("; - out << ")"; -} - -DecimalType::~DecimalType() noexcept {} - -void DecimalType::__set_scale(const int32_t val) { - this->scale = val; -} - -void DecimalType::__set_precision(const int32_t val) { - this->precision = val; -} -std::ostream& operator<<(std::ostream& out, const DecimalType& obj) { - obj.printTo(out); - return out; -} - -uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DecimalType& a, DecimalType& b) { - using ::std::swap; - swap(a.scale, b.scale); - swap(a.precision, b.precision); -} - -DecimalType::DecimalType(const DecimalType& other16) { - scale = other16.scale; - precision = other16.precision; -} -DecimalType& DecimalType::operator=(const DecimalType& other17) { - scale = other17.scale; - precision = other17.precision; - return *this; -} -void DecimalType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "DecimalType("; - out << "scale=" << to_string(scale); - out << ", " << "precision=" << to_string(precision); - out << ")"; -} - -MilliSeconds::~MilliSeconds() noexcept {} - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) { - obj.printTo(out); - return out; -} - -uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MilliSeconds& /*a*/, MilliSeconds& /*b*/) {} - -MilliSeconds::MilliSeconds(const MilliSeconds& /*other18*/) {} - -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& /*other19*/) { - return *this; -} - -void MilliSeconds::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "MilliSeconds("; - out << ")"; -} - -MicroSeconds::~MicroSeconds() noexcept {} - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) { - obj.printTo(out); - return out; -} - -uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MicroSeconds& /*a*/, MicroSeconds& /*b*/) {} - -MicroSeconds::MicroSeconds(const MicroSeconds& /*other20*/) {} - -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& /*other21*/) { - return *this; -} - -void MicroSeconds::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "MicroSeconds("; - out << ")"; -} - -NanoSeconds::~NanoSeconds() noexcept {} - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) { - obj.printTo(out); - return out; -} - -uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NanoSeconds& /*a*/, NanoSeconds& /*b*/) {} - -NanoSeconds::NanoSeconds(const NanoSeconds& /*other22*/) {} - -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& /*other23*/) { - return *this; -} - -void NanoSeconds::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "NanoSeconds("; - out << ")"; -} - -TimeUnit::~TimeUnit() noexcept {} - -void TimeUnit::__set_MILLIS(const MilliSeconds& val) { - this->MILLIS = val; - __isset.MILLIS = true; -} - -void TimeUnit::__set_MICROS(const MicroSeconds& val) { - this->MICROS = val; - __isset.MICROS = true; -} - -void TimeUnit::__set_NANOS(const NanoSeconds& val) { - this->NANOS = val; - __isset.NANOS = true; -} -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) { - obj.printTo(out); - return out; -} - -uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin( - "MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin( - "MICROS", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin( - "NANOS", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeUnit& a, TimeUnit& b) { - using ::std::swap; - swap(a.MILLIS, b.MILLIS); - swap(a.MICROS, b.MICROS); - swap(a.NANOS, b.NANOS); - swap(a.__isset, b.__isset); -} - -TimeUnit::TimeUnit(const TimeUnit& other24) { - MILLIS = other24.MILLIS; - MICROS = other24.MICROS; - NANOS = other24.NANOS; - __isset = other24.__isset; -} -TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { - MILLIS = other25.MILLIS; - MICROS = other25.MICROS; - NANOS = other25.NANOS; - __isset = other25.__isset; - return *this; -} -void TimeUnit::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "TimeUnit("; - out << "MILLIS="; - (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "")); - out << ", " << "MICROS="; - (__isset.MICROS ? (out << to_string(MICROS)) : (out << "")); - out << ", " << "NANOS="; - (__isset.NANOS ? (out << to_string(NANOS)) : (out << "")); - out << ")"; -} - -TimestampType::~TimestampType() noexcept {} - -void TimestampType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimestampType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimestampType& obj) { - obj.printTo(out); - return out; -} - -uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin( - "isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimestampType& a, TimestampType& b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimestampType::TimestampType(const TimestampType& other26) { - isAdjustedToUTC = other26.isAdjustedToUTC; - unit = other26.unit; -} -TimestampType& TimestampType::operator=(const TimestampType& other27) { - isAdjustedToUTC = other27.isAdjustedToUTC; - unit = other27.unit; - return *this; -} -void TimestampType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "TimestampType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - -TimeType::~TimeType() noexcept {} - -void TimeType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimeType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimeType& obj) { - obj.printTo(out); - return out; -} - -uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin( - "isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeType& a, TimeType& b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimeType::TimeType(const TimeType& other28) { - isAdjustedToUTC = other28.isAdjustedToUTC; - unit = other28.unit; -} -TimeType& TimeType::operator=(const TimeType& other29) { - isAdjustedToUTC = other29.isAdjustedToUTC; - unit = other29.unit; - return *this; -} -void TimeType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "TimeType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - -IntType::~IntType() noexcept {} - -void IntType::__set_bitWidth(const int8_t val) { - this->bitWidth = val; -} - -void IntType::__set_isSigned(const bool val) { - this->isSigned = val; -} -std::ostream& operator<<(std::ostream& out, const IntType& obj) { - obj.printTo(out); - return out; -} - -uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += - oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IntType& a, IntType& b) { - using ::std::swap; - swap(a.bitWidth, b.bitWidth); - swap(a.isSigned, b.isSigned); -} - -IntType::IntType(const IntType& other30) { - bitWidth = other30.bitWidth; - isSigned = other30.isSigned; -} -IntType& IntType::operator=(const IntType& other31) { - bitWidth = other31.bitWidth; - isSigned = other31.isSigned; - return *this; -} -void IntType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "IntType("; - out << "bitWidth=" << to_string(bitWidth); - out << ", " << "isSigned=" << to_string(isSigned); - out << ")"; -} - -JsonType::~JsonType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const JsonType& obj) { - obj.printTo(out); - return out; -} - -uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(JsonType& /*a*/, JsonType& /*b*/) {} - -JsonType::JsonType(const JsonType& /*other32*/) {} - -JsonType& JsonType::operator=(const JsonType& /*other33*/) { - return *this; -} - -void JsonType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "JsonType("; - out << ")"; -} - -BsonType::~BsonType() noexcept {} - -std::ostream& operator<<(std::ostream& out, const BsonType& obj) { - obj.printTo(out); - return out; -} - -uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BsonType& /*a*/, BsonType& /*b*/) {} - -BsonType::BsonType(const BsonType& /*other34*/) {} - -BsonType& BsonType::operator=(const BsonType& /*other35*/) { - return *this; -} - -void BsonType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "BsonType("; - out << ")"; -} - -LogicalType::~LogicalType() noexcept {} - -void LogicalType::__set_STRING(const StringType& val) { - this->STRING = val; - __isset.STRING = true; -} - -void LogicalType::__set_MAP(const MapType& val) { - this->MAP = val; - __isset.MAP = true; -} - -void LogicalType::__set_LIST(const ListType& val) { - this->LIST = val; - __isset.LIST = true; -} - -void LogicalType::__set_ENUM(const EnumType& val) { - this->ENUM = val; - __isset.ENUM = true; -} - -void LogicalType::__set_DECIMAL(const DecimalType& val) { - this->DECIMAL = val; - __isset.DECIMAL = true; -} - -void LogicalType::__set_DATE(const DateType& val) { - this->DATE = val; - __isset.DATE = true; -} - -void LogicalType::__set_TIME(const TimeType& val) { - this->TIME = val; - __isset.TIME = true; -} - -void LogicalType::__set_TIMESTAMP(const TimestampType& val) { - this->TIMESTAMP = val; - __isset.TIMESTAMP = true; -} - -void LogicalType::__set_INTEGER(const IntType& val) { - this->INTEGER = val; - __isset.INTEGER = true; -} - -void LogicalType::__set_UNKNOWN(const NullType& val) { - this->UNKNOWN = val; - __isset.UNKNOWN = true; -} - -void LogicalType::__set_JSON(const JsonType& val) { - this->JSON = val; - __isset.JSON = true; -} - -void LogicalType::__set_BSON(const BsonType& val) { - this->BSON = val; - __isset.BSON = true; -} - -void LogicalType::__set_UUID(const UUIDType& val) { - this->UUID = val; - __isset.UUID = true; -} -std::ostream& operator<<(std::ostream& out, const LogicalType& obj) { - obj.printTo(out); - return out; -} - -uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin( - "STRING", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += - oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += - oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += - oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin( - "DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += - oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += - oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin( - "TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin( - "INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin( - "UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin( - "JSON", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin( - "BSON", ::apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin( - "UUID", ::apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(LogicalType& a, LogicalType& b) { - using ::std::swap; - swap(a.STRING, b.STRING); - swap(a.MAP, b.MAP); - swap(a.LIST, b.LIST); - swap(a.ENUM, b.ENUM); - swap(a.DECIMAL, b.DECIMAL); - swap(a.DATE, b.DATE); - swap(a.TIME, b.TIME); - swap(a.TIMESTAMP, b.TIMESTAMP); - swap(a.INTEGER, b.INTEGER); - swap(a.UNKNOWN, b.UNKNOWN); - swap(a.JSON, b.JSON); - swap(a.BSON, b.BSON); - swap(a.UUID, b.UUID); - swap(a.__isset, b.__isset); -} - -LogicalType::LogicalType(const LogicalType& other36) { - STRING = other36.STRING; - MAP = other36.MAP; - LIST = other36.LIST; - ENUM = other36.ENUM; - DECIMAL = other36.DECIMAL; - DATE = other36.DATE; - TIME = other36.TIME; - TIMESTAMP = other36.TIMESTAMP; - INTEGER = other36.INTEGER; - UNKNOWN = other36.UNKNOWN; - JSON = other36.JSON; - BSON = other36.BSON; - UUID = other36.UUID; - __isset = other36.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other37) { - STRING = other37.STRING; - MAP = other37.MAP; - LIST = other37.LIST; - ENUM = other37.ENUM; - DECIMAL = other37.DECIMAL; - DATE = other37.DATE; - TIME = other37.TIME; - TIMESTAMP = other37.TIMESTAMP; - INTEGER = other37.INTEGER; - UNKNOWN = other37.UNKNOWN; - JSON = other37.JSON; - BSON = other37.BSON; - UUID = other37.UUID; - __isset = other37.__isset; - return *this; -} -void LogicalType::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "LogicalType("; - out << "STRING="; - (__isset.STRING ? (out << to_string(STRING)) : (out << "")); - out << ", " << "MAP="; - (__isset.MAP ? (out << to_string(MAP)) : (out << "")); - out << ", " << "LIST="; - (__isset.LIST ? (out << to_string(LIST)) : (out << "")); - out << ", " << "ENUM="; - (__isset.ENUM ? (out << to_string(ENUM)) : (out << "")); - out << ", " << "DECIMAL="; - (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "")); - out << ", " << "DATE="; - (__isset.DATE ? (out << to_string(DATE)) : (out << "")); - out << ", " << "TIME="; - (__isset.TIME ? (out << to_string(TIME)) : (out << "")); - out << ", " << "TIMESTAMP="; - (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "")); - out << ", " << "INTEGER="; - (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "")); - out << ", " << "UNKNOWN="; - (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "")); - out << ", " << "JSON="; - (__isset.JSON ? (out << to_string(JSON)) : (out << "")); - out << ", " << "BSON="; - (__isset.BSON ? (out << to_string(BSON)) : (out << "")); - out << ", " << "UUID="; - (__isset.UUID ? (out << to_string(UUID)) : (out << "")); - out << ")"; -} - -SchemaElement::~SchemaElement() noexcept {} - -void SchemaElement::__set_type(const Type::type val) { - this->type = val; - __isset.type = true; -} - -void SchemaElement::__set_type_length(const int32_t val) { - this->type_length = val; - __isset.type_length = true; -} - -void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { - this->repetition_type = val; - __isset.repetition_type = true; -} - -void SchemaElement::__set_name(const std::string& val) { - this->name = val; -} - -void SchemaElement::__set_num_children(const int32_t val) { - this->num_children = val; - __isset.num_children = true; -} - -void SchemaElement::__set_converted_type(const ConvertedType::type val) { - this->converted_type = val; - __isset.converted_type = true; -} - -void SchemaElement::__set_scale(const int32_t val) { - this->scale = val; - __isset.scale = true; -} - -void SchemaElement::__set_precision(const int32_t val) { - this->precision = val; - __isset.precision = true; -} - -void SchemaElement::__set_field_id(const int32_t val) { - this->field_id = val; - __isset.field_id = true; -} - -void SchemaElement::__set_logicalType(const LogicalType& val) { - this->logicalType = val; - __isset.logicalType = true; -} -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) { - obj.printTo(out); - return out; -} - -uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast38; - xfer += iprot->readI32(ecast38); - this->type = static_cast(ecast38); - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast39; - xfer += iprot->readI32(ecast39); - this->repetition_type = - static_cast(ecast39); - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast40; - xfer += iprot->readI32(ecast40); - this->converted_type = static_cast(ecast40); - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += - oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin( - "type_length", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin( - "repetition_type", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->repetition_type)); - xfer += oprot->writeFieldEnd(); - } - xfer += - oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin( - "num_children", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin( - "converted_type", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(static_cast(this->converted_type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += - oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin( - "precision", ::apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin( - "field_id", ::apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin( - "logicalType", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SchemaElement& a, SchemaElement& b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.type_length, b.type_length); - swap(a.repetition_type, b.repetition_type); - swap(a.name, b.name); - swap(a.num_children, b.num_children); - swap(a.converted_type, b.converted_type); - swap(a.scale, b.scale); - swap(a.precision, b.precision); - swap(a.field_id, b.field_id); - swap(a.logicalType, b.logicalType); - swap(a.__isset, b.__isset); -} - -SchemaElement::SchemaElement(const SchemaElement& other41) { - type = other41.type; - type_length = other41.type_length; - repetition_type = other41.repetition_type; - name = other41.name; - num_children = other41.num_children; - converted_type = other41.converted_type; - scale = other41.scale; - precision = other41.precision; - field_id = other41.field_id; - logicalType = other41.logicalType; - __isset = other41.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other42) { - type = other42.type; - type_length = other42.type_length; - repetition_type = other42.repetition_type; - name = other42.name; - num_children = other42.num_children; - converted_type = other42.converted_type; - scale = other42.scale; - precision = other42.precision; - field_id = other42.field_id; - logicalType = other42.logicalType; - __isset = other42.__isset; - return *this; -} -void SchemaElement::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "SchemaElement("; - out << "type="; - (__isset.type ? (out << to_string(type)) : (out << "")); - out << ", " << "type_length="; - (__isset.type_length ? (out << to_string(type_length)) : (out << "")); - out << ", " << "repetition_type="; - (__isset.repetition_type ? (out << to_string(repetition_type)) - : (out << "")); - out << ", " << "name=" << to_string(name); - out << ", " << "num_children="; - (__isset.num_children ? (out << to_string(num_children)) : (out << "")); - out << ", " << "converted_type="; - (__isset.converted_type ? (out << to_string(converted_type)) - : (out << "")); - out << ", " << "scale="; - (__isset.scale ? (out << to_string(scale)) : (out << "")); - out << ", " << "precision="; - (__isset.precision ? (out << to_string(precision)) : (out << "")); - out << ", " << "field_id="; - (__isset.field_id ? (out << to_string(field_id)) : (out << "")); - out << ", " << "logicalType="; - (__isset.logicalType ? (out << to_string(logicalType)) : (out << "")); - out << ")"; -} - -DataPageHeader::~DataPageHeader() noexcept {} - -void DataPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { - this->definition_level_encoding = val; -} - -void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { - this->repetition_level_encoding = val; -} - -void DataPageHeader::__set_statistics(const Statistics& val) { - this->statistics = val; - __isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) { - obj.printTo(out); - return out; -} - -uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast43; - xfer += iprot->readI32(ecast43); - this->encoding = static_cast(ecast43); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast44; - xfer += iprot->readI32(ecast44); - this->definition_level_encoding = - static_cast(ecast44); - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast45; - xfer += iprot->readI32(ecast45); - this->repetition_level_encoding = - static_cast(ecast45); - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin( - "num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); - xfer += - oprot->writeI32(static_cast(this->definition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += - oprot->writeI32(static_cast(this->repetition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin( - "statistics", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeader& a, DataPageHeader& b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.definition_level_encoding, b.definition_level_encoding); - swap(a.repetition_level_encoding, b.repetition_level_encoding); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeader::DataPageHeader(const DataPageHeader& other46) { - num_values = other46.num_values; - encoding = other46.encoding; - definition_level_encoding = other46.definition_level_encoding; - repetition_level_encoding = other46.repetition_level_encoding; - statistics = other46.statistics; - __isset = other46.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { - num_values = other47.num_values; - encoding = other47.encoding; - definition_level_encoding = other47.definition_level_encoding; - repetition_level_encoding = other47.repetition_level_encoding; - statistics = other47.statistics; - __isset = other47.__isset; - return *this; -} -void DataPageHeader::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "DataPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " - << "definition_level_encoding=" << to_string(definition_level_encoding); - out << ", " - << "repetition_level_encoding=" << to_string(repetition_level_encoding); - out << ", " << "statistics="; - (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - -IndexPageHeader::~IndexPageHeader() noexcept {} - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) { - obj.printTo(out); - return out; -} - -uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IndexPageHeader& /*a*/, IndexPageHeader& /*b*/) {} - -IndexPageHeader::IndexPageHeader(const IndexPageHeader& /*other48*/) {} - -IndexPageHeader& IndexPageHeader::operator=( - const IndexPageHeader& /*other49*/) { - return *this; -} - -void IndexPageHeader::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "IndexPageHeader("; - out << ")"; -} - -DictionaryPageHeader::~DictionaryPageHeader() noexcept {} - -void DictionaryPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DictionaryPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DictionaryPageHeader::__set_is_sorted(const bool val) { - this->is_sorted = val; - __isset.is_sorted = true; -} -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) { - obj.printTo(out); - return out; -} - -uint32_t DictionaryPageHeader::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast50; - xfer += iprot->readI32(ecast50); - this->encoding = static_cast(ecast50); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin( - "num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin( - "is_sorted", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DictionaryPageHeader& a, DictionaryPageHeader& b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.is_sorted, b.is_sorted); - swap(a.__isset, b.__isset); -} - -DictionaryPageHeader::DictionaryPageHeader( - const DictionaryPageHeader& other51) { - num_values = other51.num_values; - encoding = other51.encoding; - is_sorted = other51.is_sorted; - __isset = other51.__isset; -} -DictionaryPageHeader& DictionaryPageHeader::operator=( - const DictionaryPageHeader& other52) { - num_values = other52.num_values; - encoding = other52.encoding; - is_sorted = other52.is_sorted; - __isset = other52.__isset; - return *this; -} -void DictionaryPageHeader::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "DictionaryPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "is_sorted="; - (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "")); - out << ")"; -} - -DataPageHeaderV2::~DataPageHeaderV2() noexcept {} - -void DataPageHeaderV2::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeaderV2::__set_num_nulls(const int32_t val) { - this->num_nulls = val; -} - -void DataPageHeaderV2::__set_num_rows(const int32_t val) { - this->num_rows = val; -} - -void DataPageHeaderV2::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { - this->definition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { - this->repetition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_is_compressed(const bool val) { - this->is_compressed = val; - __isset.is_compressed = true; -} - -void DataPageHeaderV2::__set_statistics(const Statistics& val) { - this->statistics = val; - __isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) { - obj.printTo(out); - return out; -} - -uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast53; - xfer += iprot->readI32(ecast53); - this->encoding = static_cast(ecast53); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin( - "num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin( - "is_compressed", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin( - "statistics", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeaderV2& a, DataPageHeaderV2& b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.num_nulls, b.num_nulls); - swap(a.num_rows, b.num_rows); - swap(a.encoding, b.encoding); - swap(a.definition_levels_byte_length, b.definition_levels_byte_length); - swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); - swap(a.is_compressed, b.is_compressed); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { - num_values = other54.num_values; - num_nulls = other54.num_nulls; - num_rows = other54.num_rows; - encoding = other54.encoding; - definition_levels_byte_length = other54.definition_levels_byte_length; - repetition_levels_byte_length = other54.repetition_levels_byte_length; - is_compressed = other54.is_compressed; - statistics = other54.statistics; - __isset = other54.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { - num_values = other55.num_values; - num_nulls = other55.num_nulls; - num_rows = other55.num_rows; - encoding = other55.encoding; - definition_levels_byte_length = other55.definition_levels_byte_length; - repetition_levels_byte_length = other55.repetition_levels_byte_length; - is_compressed = other55.is_compressed; - statistics = other55.statistics; - __isset = other55.__isset; - return *this; -} -void DataPageHeaderV2::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "DataPageHeaderV2("; - out << "num_values=" << to_string(num_values); - out << ", " << "num_nulls=" << to_string(num_nulls); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_levels_byte_length=" - << to_string(definition_levels_byte_length); - out << ", " << "repetition_levels_byte_length=" - << to_string(repetition_levels_byte_length); - out << ", " << "is_compressed="; - (__isset.is_compressed ? (out << to_string(is_compressed)) - : (out << "")); - out << ", " << "statistics="; - (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - -SplitBlockAlgorithm::~SplitBlockAlgorithm() noexcept {} - -std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) { - obj.printTo(out); - return out; -} - -uint32_t SplitBlockAlgorithm::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SplitBlockAlgorithm::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SplitBlockAlgorithm& /*a*/, SplitBlockAlgorithm& /*b*/) {} - -SplitBlockAlgorithm::SplitBlockAlgorithm( - const SplitBlockAlgorithm& /*other56*/) {} - -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=( - const SplitBlockAlgorithm& /*other57*/) { - return *this; -} - -void SplitBlockAlgorithm::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "SplitBlockAlgorithm("; - out << ")"; -} - -BloomFilterAlgorithm::~BloomFilterAlgorithm() noexcept {} - -void BloomFilterAlgorithm::__set_BLOCK(const SplitBlockAlgorithm& val) { - this->BLOCK = val; - __isset.BLOCK = true; -} -std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) { - obj.printTo(out); - return out; -} - -uint32_t BloomFilterAlgorithm::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BLOCK.read(iprot); - this->__isset.BLOCK = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterAlgorithm::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); - - if (this->__isset.BLOCK) { - xfer += oprot->writeFieldBegin( - "BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->BLOCK.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BloomFilterAlgorithm& a, BloomFilterAlgorithm& b) { - using ::std::swap; - swap(a.BLOCK, b.BLOCK); - swap(a.__isset, b.__isset); -} - -BloomFilterAlgorithm::BloomFilterAlgorithm( - const BloomFilterAlgorithm& other58) { - BLOCK = other58.BLOCK; - __isset = other58.__isset; -} -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=( - const BloomFilterAlgorithm& other59) { - BLOCK = other59.BLOCK; - __isset = other59.__isset; - return *this; -} -void BloomFilterAlgorithm::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "BloomFilterAlgorithm("; - out << "BLOCK="; - (__isset.BLOCK ? (out << to_string(BLOCK)) : (out << "")); - out << ")"; -} - -XxHash::~XxHash() noexcept {} - -std::ostream& operator<<(std::ostream& out, const XxHash& obj) { - obj.printTo(out); - return out; -} - -uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("XxHash"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(XxHash& /*a*/, XxHash& /*b*/) {} - -XxHash::XxHash(const XxHash& /*other60*/) {} - -XxHash& XxHash::operator=(const XxHash& /*other61*/) { - return *this; -} - -void XxHash::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "XxHash("; - out << ")"; -} - -BloomFilterHash::~BloomFilterHash() noexcept {} - -void BloomFilterHash::__set_XXHASH(const XxHash& val) { - this->XXHASH = val; - __isset.XXHASH = true; -} -std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) { - obj.printTo(out); - return out; -} - -uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->XXHASH.read(iprot); - this->__isset.XXHASH = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterHash::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHash"); - - if (this->__isset.XXHASH) { - xfer += oprot->writeFieldBegin( - "XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->XXHASH.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BloomFilterHash& a, BloomFilterHash& b) { - using ::std::swap; - swap(a.XXHASH, b.XXHASH); - swap(a.__isset, b.__isset); -} - -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other62) { - XXHASH = other62.XXHASH; - __isset = other62.__isset; -} -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other63) { - XXHASH = other63.XXHASH; - __isset = other63.__isset; - return *this; -} -void BloomFilterHash::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "BloomFilterHash("; - out << "XXHASH="; - (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "")); - out << ")"; -} - -Uncompressed::~Uncompressed() noexcept {} - -std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) { - obj.printTo(out); - return out; -} - -uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Uncompressed::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Uncompressed"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(Uncompressed& /*a*/, Uncompressed& /*b*/) {} - -Uncompressed::Uncompressed(const Uncompressed& /*other64*/) {} - -Uncompressed& Uncompressed::operator=(const Uncompressed& /*other65*/) { - return *this; -} - -void Uncompressed::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "Uncompressed("; - out << ")"; -} - -BloomFilterCompression::~BloomFilterCompression() noexcept {} - -void BloomFilterCompression::__set_UNCOMPRESSED(const Uncompressed& val) { - this->UNCOMPRESSED = val; - __isset.UNCOMPRESSED = true; -} -std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) { - obj.printTo(out); - return out; -} - -uint32_t BloomFilterCompression::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNCOMPRESSED.read(iprot); - this->__isset.UNCOMPRESSED = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterCompression::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterCompression"); - - if (this->__isset.UNCOMPRESSED) { - xfer += oprot->writeFieldBegin( - "UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->UNCOMPRESSED.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BloomFilterCompression& a, BloomFilterCompression& b) { - using ::std::swap; - swap(a.UNCOMPRESSED, b.UNCOMPRESSED); - swap(a.__isset, b.__isset); -} - -BloomFilterCompression::BloomFilterCompression( - const BloomFilterCompression& other66) { - UNCOMPRESSED = other66.UNCOMPRESSED; - __isset = other66.__isset; -} -BloomFilterCompression& BloomFilterCompression::operator=( - const BloomFilterCompression& other67) { - UNCOMPRESSED = other67.UNCOMPRESSED; - __isset = other67.__isset; - return *this; -} -void BloomFilterCompression::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "BloomFilterCompression("; - out << "UNCOMPRESSED="; - (__isset.UNCOMPRESSED ? (out << to_string(UNCOMPRESSED)) : (out << "")); - out << ")"; -} - -BloomFilterHeader::~BloomFilterHeader() noexcept {} - -void BloomFilterHeader::__set_numBytes(const int32_t val) { - this->numBytes = val; -} - -void BloomFilterHeader::__set_algorithm(const BloomFilterAlgorithm& val) { - this->algorithm = val; -} - -void BloomFilterHeader::__set_hash(const BloomFilterHash& val) { - this->hash = val; -} - -void BloomFilterHeader::__set_compression(const BloomFilterCompression& val) { - this->compression = val; -} -std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) { - obj.printTo(out); - return out; -} - -uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_numBytes = false; - bool isset_algorithm = false; - bool isset_hash = false; - bool isset_compression = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->numBytes); - isset_numBytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->algorithm.read(iprot); - isset_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->hash.read(iprot); - isset_hash = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->compression.read(iprot); - isset_compression = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_numBytes) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_hash) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compression) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t BloomFilterHeader::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHeader"); - - xfer += - oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->numBytes); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "algorithm", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->hash.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "compression", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->compression.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BloomFilterHeader& a, BloomFilterHeader& b) { - using ::std::swap; - swap(a.numBytes, b.numBytes); - swap(a.algorithm, b.algorithm); - swap(a.hash, b.hash); - swap(a.compression, b.compression); -} - -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other68) { - numBytes = other68.numBytes; - algorithm = other68.algorithm; - hash = other68.hash; - compression = other68.compression; -} -BloomFilterHeader& BloomFilterHeader::operator=( - const BloomFilterHeader& other69) { - numBytes = other69.numBytes; - algorithm = other69.algorithm; - hash = other69.hash; - compression = other69.compression; - return *this; -} -void BloomFilterHeader::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "BloomFilterHeader("; - out << "numBytes=" << to_string(numBytes); - out << ", " << "algorithm=" << to_string(algorithm); - out << ", " << "hash=" << to_string(hash); - out << ", " << "compression=" << to_string(compression); - out << ")"; -} - -PageHeader::~PageHeader() noexcept {} - -void PageHeader::__set_type(const PageType::type val) { - this->type = val; -} - -void PageHeader::__set_uncompressed_page_size(const int32_t val) { - this->uncompressed_page_size = val; -} - -void PageHeader::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageHeader::__set_crc(const int32_t val) { - this->crc = val; - __isset.crc = true; -} - -void PageHeader::__set_data_page_header(const DataPageHeader& val) { - this->data_page_header = val; - __isset.data_page_header = true; -} - -void PageHeader::__set_index_page_header(const IndexPageHeader& val) { - this->index_page_header = val; - __isset.index_page_header = true; -} - -void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { - this->dictionary_page_header = val; - __isset.dictionary_page_header = true; -} - -void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { - this->data_page_header_v2 = val; - __isset.data_page_header_v2 = true; -} -std::ostream& operator<<(std::ostream& out, const PageHeader& obj) { - obj.printTo(out); - return out; -} - -uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast70; - xfer += iprot->readI32(ecast70); - this->type = static_cast(ecast70); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "compressed_page_size", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin( - "data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin( - "index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin( - "dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin( - "data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageHeader& a, PageHeader& b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.uncompressed_page_size, b.uncompressed_page_size); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.crc, b.crc); - swap(a.data_page_header, b.data_page_header); - swap(a.index_page_header, b.index_page_header); - swap(a.dictionary_page_header, b.dictionary_page_header); - swap(a.data_page_header_v2, b.data_page_header_v2); - swap(a.__isset, b.__isset); -} - -PageHeader::PageHeader(const PageHeader& other71) { - type = other71.type; - uncompressed_page_size = other71.uncompressed_page_size; - compressed_page_size = other71.compressed_page_size; - crc = other71.crc; - data_page_header = other71.data_page_header; - index_page_header = other71.index_page_header; - dictionary_page_header = other71.dictionary_page_header; - data_page_header_v2 = other71.data_page_header_v2; - __isset = other71.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other72) { - type = other72.type; - uncompressed_page_size = other72.uncompressed_page_size; - compressed_page_size = other72.compressed_page_size; - crc = other72.crc; - data_page_header = other72.data_page_header; - index_page_header = other72.index_page_header; - dictionary_page_header = other72.dictionary_page_header; - data_page_header_v2 = other72.data_page_header_v2; - __isset = other72.__isset; - return *this; -} -void PageHeader::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "PageHeader("; - out << "type=" << to_string(type); - out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "crc="; - (__isset.crc ? (out << to_string(crc)) : (out << "")); - out << ", " << "data_page_header="; - (__isset.data_page_header ? (out << to_string(data_page_header)) - : (out << "")); - out << ", " << "index_page_header="; - (__isset.index_page_header ? (out << to_string(index_page_header)) - : (out << "")); - out << ", " << "dictionary_page_header="; - (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) - : (out << "")); - out << ", " << "data_page_header_v2="; - (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) - : (out << "")); - out << ")"; -} - -KeyValue::~KeyValue() noexcept {} - -void KeyValue::__set_key(const std::string& val) { - this->key = val; -} - -void KeyValue::__set_value(const std::string& val) { - this->value = val; - __isset.value = true; -} -std::ostream& operator<<(std::ostream& out, const KeyValue& obj) { - obj.printTo(out); - return out; -} - -uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += - oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin( - "value", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(KeyValue& a, KeyValue& b) { - using ::std::swap; - swap(a.key, b.key); - swap(a.value, b.value); - swap(a.__isset, b.__isset); -} - -KeyValue::KeyValue(const KeyValue& other73) { - key = other73.key; - value = other73.value; - __isset = other73.__isset; -} -KeyValue& KeyValue::operator=(const KeyValue& other74) { - key = other74.key; - value = other74.value; - __isset = other74.__isset; - return *this; -} -void KeyValue::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "KeyValue("; - out << "key=" << to_string(key); - out << ", " << "value="; - (__isset.value ? (out << to_string(value)) : (out << "")); - out << ")"; -} - -SortingColumn::~SortingColumn() noexcept {} - -void SortingColumn::__set_column_idx(const int32_t val) { - this->column_idx = val; -} - -void SortingColumn::__set_descending(const bool val) { - this->descending = val; -} - -void SortingColumn::__set_nulls_first(const bool val) { - this->nulls_first = val; -} -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) { - obj.printTo(out); - return out; -} - -uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin( - "column_idx", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "descending", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "nulls_first", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SortingColumn& a, SortingColumn& b) { - using ::std::swap; - swap(a.column_idx, b.column_idx); - swap(a.descending, b.descending); - swap(a.nulls_first, b.nulls_first); -} - -SortingColumn::SortingColumn(const SortingColumn& other75) { - column_idx = other75.column_idx; - descending = other75.descending; - nulls_first = other75.nulls_first; -} -SortingColumn& SortingColumn::operator=(const SortingColumn& other76) { - column_idx = other76.column_idx; - descending = other76.descending; - nulls_first = other76.nulls_first; - return *this; -} -void SortingColumn::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "SortingColumn("; - out << "column_idx=" << to_string(column_idx); - out << ", " << "descending=" << to_string(descending); - out << ", " << "nulls_first=" << to_string(nulls_first); - out << ")"; -} - -PageEncodingStats::~PageEncodingStats() noexcept {} - -void PageEncodingStats::__set_page_type(const PageType::type val) { - this->page_type = val; -} - -void PageEncodingStats::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void PageEncodingStats::__set_count(const int32_t val) { - this->count = val; -} -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) { - obj.printTo(out); - return out; -} - -uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast77; - xfer += iprot->readI32(ecast77); - this->page_type = static_cast(ecast77); - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast78; - xfer += iprot->readI32(ecast78); - this->encoding = static_cast(ecast78); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += - oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->page_type)); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageEncodingStats& a, PageEncodingStats& b) { - using ::std::swap; - swap(a.page_type, b.page_type); - swap(a.encoding, b.encoding); - swap(a.count, b.count); -} - -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other79) { - page_type = other79.page_type; - encoding = other79.encoding; - count = other79.count; -} -PageEncodingStats& PageEncodingStats::operator=( - const PageEncodingStats& other80) { - page_type = other80.page_type; - encoding = other80.encoding; - count = other80.count; - return *this; -} -void PageEncodingStats::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "PageEncodingStats("; - out << "page_type=" << to_string(page_type); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "count=" << to_string(count); - out << ")"; -} - -ColumnMetaData::~ColumnMetaData() noexcept {} - -void ColumnMetaData::__set_type(const Type::type val) { - this->type = val; -} - -void ColumnMetaData::__set_encodings(const std::vector& val) { - this->encodings = val; -} - -void ColumnMetaData::__set_path_in_schema(const std::vector& val) { - this->path_in_schema = val; -} - -void ColumnMetaData::__set_codec(const CompressionCodec::type val) { - this->codec = val; -} - -void ColumnMetaData::__set_num_values(const int64_t val) { - this->num_values = val; -} - -void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { - this->total_uncompressed_size = val; -} - -void ColumnMetaData::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -} - -void ColumnMetaData::__set_key_value_metadata( - const std::vector& val) { - this->key_value_metadata = val; - __isset.key_value_metadata = true; -} - -void ColumnMetaData::__set_data_page_offset(const int64_t val) { - this->data_page_offset = val; -} - -void ColumnMetaData::__set_index_page_offset(const int64_t val) { - this->index_page_offset = val; - __isset.index_page_offset = true; -} - -void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { - this->dictionary_page_offset = val; - __isset.dictionary_page_offset = true; -} - -void ColumnMetaData::__set_statistics(const Statistics& val) { - this->statistics = val; - __isset.statistics = true; -} - -void ColumnMetaData::__set_encoding_stats( - const std::vector& val) { - this->encoding_stats = val; - __isset.encoding_stats = true; -} - -void ColumnMetaData::__set_bloom_filter_offset(const int64_t val) { - this->bloom_filter_offset = val; - __isset.bloom_filter_offset = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) { - obj.printTo(out); - return out; -} - -uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast81; - xfer += iprot->readI32(ecast81); - this->type = static_cast(ecast81); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size82; - ::apache::thrift::protocol::TType _etype85; - xfer += iprot->readListBegin(_etype85, _size82); - this->encodings.resize(_size82); - uint32_t _i86; - for (_i86 = 0; _i86 < _size82; ++_i86) { - int32_t ecast87; - xfer += iprot->readI32(ecast87); - this->encodings[_i86] = static_cast(ecast87); - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size88; - ::apache::thrift::protocol::TType _etype91; - xfer += iprot->readListBegin(_etype91, _size88); - this->path_in_schema.resize(_size88); - uint32_t _i92; - for (_i92 = 0; _i92 < _size88; ++_i92) { - xfer += iprot->readString(this->path_in_schema[_i92]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast93; - xfer += iprot->readI32(ecast93); - this->codec = static_cast(ecast93); - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size94; - ::apache::thrift::protocol::TType _etype97; - xfer += iprot->readListBegin(_etype97, _size94); - this->key_value_metadata.resize(_size94); - uint32_t _i98; - for (_i98 = 0; _i98 < _size94; ++_i98) { - xfer += this->key_value_metadata[_i98].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size99; - ::apache::thrift::protocol::TType _etype102; - xfer += iprot->readListBegin(_etype102, _size99); - this->encoding_stats.resize(_size99); - uint32_t _i103; - for (_i103 = 0; _i103 < _size99; ++_i103) { - xfer += this->encoding_stats[_i103].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->bloom_filter_offset); - this->__isset.bloom_filter_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "encodings", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_I32, - static_cast(this->encodings.size())); - std::vector::const_iterator _iter104; - for (_iter104 = this->encodings.begin(); _iter104 != this->encodings.end(); - ++_iter104) { - xfer += oprot->writeI32(static_cast(*_iter104)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "path_in_schema", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRING, - static_cast(this->path_in_schema.size())); - std::vector::const_iterator _iter105; - for (_iter105 = this->path_in_schema.begin(); - _iter105 != this->path_in_schema.end(); - ++_iter105) { - xfer += oprot->writeString((*_iter105)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->codec)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "num_values", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "total_compressed_size", ::apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin( - "key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->key_value_metadata.size())); - std::vector::const_iterator _iter106; - for (_iter106 = this->key_value_metadata.begin(); - _iter106 != this->key_value_metadata.end(); - ++_iter106) { - xfer += (*_iter106).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin( - "data_page_offset", ::apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin( - "index_page_offset", ::apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin( - "dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin( - "statistics", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin( - "encoding_stats", ::apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->encoding_stats.size())); - std::vector::const_iterator _iter107; - for (_iter107 = this->encoding_stats.begin(); - _iter107 != this->encoding_stats.end(); - ++_iter107) { - xfer += (*_iter107).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_offset) { - xfer += oprot->writeFieldBegin( - "bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); - xfer += oprot->writeI64(this->bloom_filter_offset); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnMetaData& a, ColumnMetaData& b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.encodings, b.encodings); - swap(a.path_in_schema, b.path_in_schema); - swap(a.codec, b.codec); - swap(a.num_values, b.num_values); - swap(a.total_uncompressed_size, b.total_uncompressed_size); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.data_page_offset, b.data_page_offset); - swap(a.index_page_offset, b.index_page_offset); - swap(a.dictionary_page_offset, b.dictionary_page_offset); - swap(a.statistics, b.statistics); - swap(a.encoding_stats, b.encoding_stats); - swap(a.bloom_filter_offset, b.bloom_filter_offset); - swap(a.__isset, b.__isset); -} - -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other108) { - type = other108.type; - encodings = other108.encodings; - path_in_schema = other108.path_in_schema; - codec = other108.codec; - num_values = other108.num_values; - total_uncompressed_size = other108.total_uncompressed_size; - total_compressed_size = other108.total_compressed_size; - key_value_metadata = other108.key_value_metadata; - data_page_offset = other108.data_page_offset; - index_page_offset = other108.index_page_offset; - dictionary_page_offset = other108.dictionary_page_offset; - statistics = other108.statistics; - encoding_stats = other108.encoding_stats; - bloom_filter_offset = other108.bloom_filter_offset; - __isset = other108.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other109) { - type = other109.type; - encodings = other109.encodings; - path_in_schema = other109.path_in_schema; - codec = other109.codec; - num_values = other109.num_values; - total_uncompressed_size = other109.total_uncompressed_size; - total_compressed_size = other109.total_compressed_size; - key_value_metadata = other109.key_value_metadata; - data_page_offset = other109.data_page_offset; - index_page_offset = other109.index_page_offset; - dictionary_page_offset = other109.dictionary_page_offset; - statistics = other109.statistics; - encoding_stats = other109.encoding_stats; - bloom_filter_offset = other109.bloom_filter_offset; - __isset = other109.__isset; - return *this; -} -void ColumnMetaData::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ColumnMetaData("; - out << "type=" << to_string(type); - out << ", " << "encodings=" << to_string(encodings); - out << ", " << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "codec=" << to_string(codec); - out << ", " << "num_values=" << to_string(num_values); - out << ", " - << "total_uncompressed_size=" << to_string(total_uncompressed_size); - out << ", " << "total_compressed_size=" << to_string(total_compressed_size); - out << ", " << "key_value_metadata="; - (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) - : (out << "")); - out << ", " << "data_page_offset=" << to_string(data_page_offset); - out << ", " << "index_page_offset="; - (__isset.index_page_offset ? (out << to_string(index_page_offset)) - : (out << "")); - out << ", " << "dictionary_page_offset="; - (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) - : (out << "")); - out << ", " << "statistics="; - (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ", " << "encoding_stats="; - (__isset.encoding_stats ? (out << to_string(encoding_stats)) - : (out << "")); - out << ", " << "bloom_filter_offset="; - (__isset.bloom_filter_offset ? (out << to_string(bloom_filter_offset)) - : (out << "")); - out << ")"; -} - -EncryptionWithFooterKey::~EncryptionWithFooterKey() noexcept {} - -std::ostream& operator<<( - std::ostream& out, - const EncryptionWithFooterKey& obj) { - obj.printTo(out); - return out; -} - -uint32_t EncryptionWithFooterKey::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithFooterKey& /*a*/, EncryptionWithFooterKey& /*b*/) {} - -EncryptionWithFooterKey::EncryptionWithFooterKey( - const EncryptionWithFooterKey& /*other110*/) {} - -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=( - const EncryptionWithFooterKey& /*other111*/) { - return *this; -} - -void EncryptionWithFooterKey::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "EncryptionWithFooterKey("; - out << ")"; -} - -EncryptionWithColumnKey::~EncryptionWithColumnKey() noexcept {} - -void EncryptionWithColumnKey::__set_path_in_schema( - const std::vector& val) { - this->path_in_schema = val; -} - -void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { - this->key_metadata = val; - __isset.key_metadata = true; -} -std::ostream& operator<<( - std::ostream& out, - const EncryptionWithColumnKey& obj) { - obj.printTo(out); - return out; -} - -uint32_t EncryptionWithColumnKey::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size112; - ::apache::thrift::protocol::TType _etype115; - xfer += iprot->readListBegin(_etype115, _size112); - this->path_in_schema.resize(_size112); - uint32_t _i116; - for (_i116 = 0; _i116 < _size112; ++_i116) { - xfer += iprot->readString(this->path_in_schema[_i116]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin( - "path_in_schema", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRING, - static_cast(this->path_in_schema.size())); - std::vector::const_iterator _iter117; - for (_iter117 = this->path_in_schema.begin(); - _iter117 != this->path_in_schema.end(); - ++_iter117) { - xfer += oprot->writeString((*_iter117)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin( - "key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithColumnKey& a, EncryptionWithColumnKey& b) { - using ::std::swap; - swap(a.path_in_schema, b.path_in_schema); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -EncryptionWithColumnKey::EncryptionWithColumnKey( - const EncryptionWithColumnKey& other118) { - path_in_schema = other118.path_in_schema; - key_metadata = other118.key_metadata; - __isset = other118.__isset; -} -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=( - const EncryptionWithColumnKey& other119) { - path_in_schema = other119.path_in_schema; - key_metadata = other119.key_metadata; - __isset = other119.__isset; - return *this; -} -void EncryptionWithColumnKey::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "EncryptionWithColumnKey("; - out << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "key_metadata="; - (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} - -ColumnCryptoMetaData::~ColumnCryptoMetaData() noexcept {} - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY( - const EncryptionWithFooterKey& val) { - this->ENCRYPTION_WITH_FOOTER_KEY = val; - __isset.ENCRYPTION_WITH_FOOTER_KEY = true; -} - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY( - const EncryptionWithColumnKey& val) { - this->ENCRYPTION_WITH_COLUMN_KEY = val; - __isset.ENCRYPTION_WITH_COLUMN_KEY = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) { - obj.printTo(out); - return out; -} - -uint32_t ColumnCryptoMetaData::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin( - "ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin( - "ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnCryptoMetaData& a, ColumnCryptoMetaData& b) { - using ::std::swap; - swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); - swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); - swap(a.__isset, b.__isset); -} - -ColumnCryptoMetaData::ColumnCryptoMetaData( - const ColumnCryptoMetaData& other120) { - ENCRYPTION_WITH_FOOTER_KEY = other120.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other120.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other120.__isset; -} -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=( - const ColumnCryptoMetaData& other121) { - ENCRYPTION_WITH_FOOTER_KEY = other121.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other121.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other121.__isset; - return *this; -} -void ColumnCryptoMetaData::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ColumnCryptoMetaData("; - out << "ENCRYPTION_WITH_FOOTER_KEY="; - (__isset.ENCRYPTION_WITH_FOOTER_KEY - ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) - : (out << "")); - out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; - (__isset.ENCRYPTION_WITH_COLUMN_KEY - ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) - : (out << "")); - out << ")"; -} - -ColumnChunk::~ColumnChunk() noexcept {} - -void ColumnChunk::__set_file_path(const std::string& val) { - this->file_path = val; - __isset.file_path = true; -} - -void ColumnChunk::__set_file_offset(const int64_t val) { - this->file_offset = val; -} - -void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { - this->meta_data = val; - __isset.meta_data = true; -} - -void ColumnChunk::__set_offset_index_offset(const int64_t val) { - this->offset_index_offset = val; - __isset.offset_index_offset = true; -} - -void ColumnChunk::__set_offset_index_length(const int32_t val) { - this->offset_index_length = val; - __isset.offset_index_length = true; -} - -void ColumnChunk::__set_column_index_offset(const int64_t val) { - this->column_index_offset = val; - __isset.column_index_offset = true; -} - -void ColumnChunk::__set_column_index_length(const int32_t val) { - this->column_index_length = val; - __isset.column_index_length = true; -} - -void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { - this->crypto_metadata = val; - __isset.crypto_metadata = true; -} - -void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { - this->encrypted_column_metadata = val; - __isset.encrypted_column_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) { - obj.printTo(out); - return out; -} - -uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin( - "file_path", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin( - "file_offset", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin( - "meta_data", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin( - "offset_index_offset", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin( - "offset_index_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin( - "column_index_offset", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin( - "column_index_length", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin( - "crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin( - "encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnChunk& a, ColumnChunk& b) { - using ::std::swap; - swap(a.file_path, b.file_path); - swap(a.file_offset, b.file_offset); - swap(a.meta_data, b.meta_data); - swap(a.offset_index_offset, b.offset_index_offset); - swap(a.offset_index_length, b.offset_index_length); - swap(a.column_index_offset, b.column_index_offset); - swap(a.column_index_length, b.column_index_length); - swap(a.crypto_metadata, b.crypto_metadata); - swap(a.encrypted_column_metadata, b.encrypted_column_metadata); - swap(a.__isset, b.__isset); -} - -ColumnChunk::ColumnChunk(const ColumnChunk& other122) { - file_path = other122.file_path; - file_offset = other122.file_offset; - meta_data = other122.meta_data; - offset_index_offset = other122.offset_index_offset; - offset_index_length = other122.offset_index_length; - column_index_offset = other122.column_index_offset; - column_index_length = other122.column_index_length; - crypto_metadata = other122.crypto_metadata; - encrypted_column_metadata = other122.encrypted_column_metadata; - __isset = other122.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other123) { - file_path = other123.file_path; - file_offset = other123.file_offset; - meta_data = other123.meta_data; - offset_index_offset = other123.offset_index_offset; - offset_index_length = other123.offset_index_length; - column_index_offset = other123.column_index_offset; - column_index_length = other123.column_index_length; - crypto_metadata = other123.crypto_metadata; - encrypted_column_metadata = other123.encrypted_column_metadata; - __isset = other123.__isset; - return *this; -} -void ColumnChunk::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ColumnChunk("; - out << "file_path="; - (__isset.file_path ? (out << to_string(file_path)) : (out << "")); - out << ", " << "file_offset=" << to_string(file_offset); - out << ", " << "meta_data="; - (__isset.meta_data ? (out << to_string(meta_data)) : (out << "")); - out << ", " << "offset_index_offset="; - (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) - : (out << "")); - out << ", " << "offset_index_length="; - (__isset.offset_index_length ? (out << to_string(offset_index_length)) - : (out << "")); - out << ", " << "column_index_offset="; - (__isset.column_index_offset ? (out << to_string(column_index_offset)) - : (out << "")); - out << ", " << "column_index_length="; - (__isset.column_index_length ? (out << to_string(column_index_length)) - : (out << "")); - out << ", " << "crypto_metadata="; - (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) - : (out << "")); - out << ", " << "encrypted_column_metadata="; - (__isset.encrypted_column_metadata - ? (out << to_string(encrypted_column_metadata)) - : (out << "")); - out << ")"; -} - -RowGroup::~RowGroup() noexcept {} - -void RowGroup::__set_columns(const std::vector& val) { - this->columns = val; -} - -void RowGroup::__set_total_byte_size(const int64_t val) { - this->total_byte_size = val; -} - -void RowGroup::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void RowGroup::__set_sorting_columns(const std::vector& val) { - this->sorting_columns = val; - __isset.sorting_columns = true; -} - -void RowGroup::__set_file_offset(const int64_t val) { - this->file_offset = val; - __isset.file_offset = true; -} - -void RowGroup::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; - __isset.total_compressed_size = true; -} - -void RowGroup::__set_ordinal(const int16_t val) { - this->ordinal = val; - __isset.ordinal = true; -} -std::ostream& operator<<(std::ostream& out, const RowGroup& obj) { - obj.printTo(out); - return out; -} - -uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size124; - ::apache::thrift::protocol::TType _etype127; - xfer += iprot->readListBegin(_etype127, _size124); - this->columns.resize(_size124); - uint32_t _i128; - for (_i128 = 0; _i128 < _size124; ++_i128) { - xfer += this->columns[_i128].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size129; - ::apache::thrift::protocol::TType _etype132; - xfer += iprot->readListBegin(_etype132, _size129); - this->sorting_columns.resize(_size129); - uint32_t _i133; - for (_i133 = 0; _i133 < _size129; ++_i133) { - xfer += this->sorting_columns[_i133].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += - oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->columns.size())); - std::vector::const_iterator _iter134; - for (_iter134 = this->columns.begin(); _iter134 != this->columns.end(); - ++_iter134) { - xfer += (*_iter134).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "total_byte_size", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin( - "sorting_columns", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->sorting_columns.size())); - std::vector::const_iterator _iter135; - for (_iter135 = this->sorting_columns.begin(); - _iter135 != this->sorting_columns.end(); - ++_iter135) { - xfer += (*_iter135).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin( - "file_offset", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin( - "total_compressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += - oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(RowGroup& a, RowGroup& b) { - using ::std::swap; - swap(a.columns, b.columns); - swap(a.total_byte_size, b.total_byte_size); - swap(a.num_rows, b.num_rows); - swap(a.sorting_columns, b.sorting_columns); - swap(a.file_offset, b.file_offset); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.ordinal, b.ordinal); - swap(a.__isset, b.__isset); -} - -RowGroup::RowGroup(const RowGroup& other136) { - columns = other136.columns; - total_byte_size = other136.total_byte_size; - num_rows = other136.num_rows; - sorting_columns = other136.sorting_columns; - file_offset = other136.file_offset; - total_compressed_size = other136.total_compressed_size; - ordinal = other136.ordinal; - __isset = other136.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other137) { - columns = other137.columns; - total_byte_size = other137.total_byte_size; - num_rows = other137.num_rows; - sorting_columns = other137.sorting_columns; - file_offset = other137.file_offset; - total_compressed_size = other137.total_compressed_size; - ordinal = other137.ordinal; - __isset = other137.__isset; - return *this; -} -void RowGroup::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "RowGroup("; - out << "columns=" << to_string(columns); - out << ", " << "total_byte_size=" << to_string(total_byte_size); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "sorting_columns="; - (__isset.sorting_columns ? (out << to_string(sorting_columns)) - : (out << "")); - out << ", " << "file_offset="; - (__isset.file_offset ? (out << to_string(file_offset)) : (out << "")); - out << ", " << "total_compressed_size="; - (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) - : (out << "")); - out << ", " << "ordinal="; - (__isset.ordinal ? (out << to_string(ordinal)) : (out << "")); - out << ")"; -} - -TypeDefinedOrder::~TypeDefinedOrder() noexcept {} - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) { - obj.printTo(out); - return out; -} - -uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TypeDefinedOrder& /*a*/, TypeDefinedOrder& /*b*/) {} - -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& /*other138*/) {} - -TypeDefinedOrder& TypeDefinedOrder::operator=( - const TypeDefinedOrder& /*other139*/) { - return *this; -} - -void TypeDefinedOrder::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "TypeDefinedOrder("; - out << ")"; -} - -ColumnOrder::~ColumnOrder() noexcept {} - -void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { - this->TYPE_ORDER = val; - __isset.TYPE_ORDER = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) { - obj.printTo(out); - return out; -} - -uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin( - "TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnOrder& a, ColumnOrder& b) { - using ::std::swap; - swap(a.TYPE_ORDER, b.TYPE_ORDER); - swap(a.__isset, b.__isset); -} - -ColumnOrder::ColumnOrder(const ColumnOrder& other140) { - TYPE_ORDER = other140.TYPE_ORDER; - __isset = other140.__isset; -} -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other141) { - TYPE_ORDER = other141.TYPE_ORDER; - __isset = other141.__isset; - return *this; -} -void ColumnOrder::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ColumnOrder("; - out << "TYPE_ORDER="; - (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); - out << ")"; -} - -PageLocation::~PageLocation() noexcept {} - -void PageLocation::__set_offset(const int64_t val) { - this->offset = val; -} - -void PageLocation::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageLocation::__set_first_row_index(const int64_t val) { - this->first_row_index = val; -} -std::ostream& operator<<(std::ostream& out, const PageLocation& obj) { - obj.printTo(out); - return out; -} - -uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += - oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "compressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "first_row_index", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageLocation& a, PageLocation& b) { - using ::std::swap; - swap(a.offset, b.offset); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.first_row_index, b.first_row_index); -} - -PageLocation::PageLocation(const PageLocation& other142) { - offset = other142.offset; - compressed_page_size = other142.compressed_page_size; - first_row_index = other142.first_row_index; -} -PageLocation& PageLocation::operator=(const PageLocation& other143) { - offset = other143.offset; - compressed_page_size = other143.compressed_page_size; - first_row_index = other143.first_row_index; - return *this; -} -void PageLocation::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "PageLocation("; - out << "offset=" << to_string(offset); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "first_row_index=" << to_string(first_row_index); - out << ")"; -} - -OffsetIndex::~OffsetIndex() noexcept {} - -void OffsetIndex::__set_page_locations(const std::vector& val) { - this->page_locations = val; -} -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) { - obj.printTo(out); - return out; -} - -uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size144; - ::apache::thrift::protocol::TType _etype147; - xfer += iprot->readListBegin(_etype147, _size144); - this->page_locations.resize(_size144); - uint32_t _i148; - for (_i148 = 0; _i148 < _size144; ++_i148) { - xfer += this->page_locations[_i148].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin( - "page_locations", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->page_locations.size())); - std::vector::const_iterator _iter149; - for (_iter149 = this->page_locations.begin(); - _iter149 != this->page_locations.end(); - ++_iter149) { - xfer += (*_iter149).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(OffsetIndex& a, OffsetIndex& b) { - using ::std::swap; - swap(a.page_locations, b.page_locations); -} - -OffsetIndex::OffsetIndex(const OffsetIndex& other150) { - page_locations = other150.page_locations; -} -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other151) { - page_locations = other151.page_locations; - return *this; -} -void OffsetIndex::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "OffsetIndex("; - out << "page_locations=" << to_string(page_locations); - out << ")"; -} - -ColumnIndex::~ColumnIndex() noexcept {} - -void ColumnIndex::__set_null_pages(const std::vector& val) { - this->null_pages = val; -} - -void ColumnIndex::__set_min_values(const std::vector& val) { - this->min_values = val; -} - -void ColumnIndex::__set_max_values(const std::vector& val) { - this->max_values = val; -} - -void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { - this->boundary_order = val; -} - -void ColumnIndex::__set_null_counts(const std::vector& val) { - this->null_counts = val; - __isset.null_counts = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) { - obj.printTo(out); - return out; -} - -uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size152; - ::apache::thrift::protocol::TType _etype155; - xfer += iprot->readListBegin(_etype155, _size152); - this->null_pages.resize(_size152); - uint32_t _i156; - for (_i156 = 0; _i156 < _size152; ++_i156) { - xfer += iprot->readBool(this->null_pages[_i156]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size157; - ::apache::thrift::protocol::TType _etype160; - xfer += iprot->readListBegin(_etype160, _size157); - this->min_values.resize(_size157); - uint32_t _i161; - for (_i161 = 0; _i161 < _size157; ++_i161) { - xfer += iprot->readBinary(this->min_values[_i161]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size162; - ::apache::thrift::protocol::TType _etype165; - xfer += iprot->readListBegin(_etype165, _size162); - this->max_values.resize(_size162); - uint32_t _i166; - for (_i166 = 0; _i166 < _size162; ++_i166) { - xfer += iprot->readBinary(this->max_values[_i166]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast167; - xfer += iprot->readI32(ecast167); - this->boundary_order = static_cast(ecast167); - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size168; - ::apache::thrift::protocol::TType _etype171; - xfer += iprot->readListBegin(_etype171, _size168); - this->null_counts.resize(_size168); - uint32_t _i172; - for (_i172 = 0; _i172 < _size168; ++_i172) { - xfer += iprot->readI64(this->null_counts[_i172]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin( - "null_pages", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_BOOL, - static_cast(this->null_pages.size())); - std::vector::const_iterator _iter173; - for (_iter173 = this->null_pages.begin(); - _iter173 != this->null_pages.end(); - ++_iter173) { - xfer += oprot->writeBool((*_iter173)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "min_values", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRING, - static_cast(this->min_values.size())); - std::vector::const_iterator _iter174; - for (_iter174 = this->min_values.begin(); - _iter174 != this->min_values.end(); - ++_iter174) { - xfer += oprot->writeBinary((*_iter174)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "max_values", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRING, - static_cast(this->max_values.size())); - std::vector::const_iterator _iter175; - for (_iter175 = this->max_values.begin(); - _iter175 != this->max_values.end(); - ++_iter175) { - xfer += oprot->writeBinary((*_iter175)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "boundary_order", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->boundary_order)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin( - "null_counts", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_I64, - static_cast(this->null_counts.size())); - std::vector::const_iterator _iter176; - for (_iter176 = this->null_counts.begin(); - _iter176 != this->null_counts.end(); - ++_iter176) { - xfer += oprot->writeI64((*_iter176)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnIndex& a, ColumnIndex& b) { - using ::std::swap; - swap(a.null_pages, b.null_pages); - swap(a.min_values, b.min_values); - swap(a.max_values, b.max_values); - swap(a.boundary_order, b.boundary_order); - swap(a.null_counts, b.null_counts); - swap(a.__isset, b.__isset); -} - -ColumnIndex::ColumnIndex(const ColumnIndex& other177) { - null_pages = other177.null_pages; - min_values = other177.min_values; - max_values = other177.max_values; - boundary_order = other177.boundary_order; - null_counts = other177.null_counts; - __isset = other177.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other178) { - null_pages = other178.null_pages; - min_values = other178.min_values; - max_values = other178.max_values; - boundary_order = other178.boundary_order; - null_counts = other178.null_counts; - __isset = other178.__isset; - return *this; -} -void ColumnIndex::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "ColumnIndex("; - out << "null_pages=" << to_string(null_pages); - out << ", " << "min_values=" << to_string(min_values); - out << ", " << "max_values=" << to_string(max_values); - out << ", " << "boundary_order=" << to_string(boundary_order); - out << ", " << "null_counts="; - (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); - out << ")"; -} - -AesGcmV1::~AesGcmV1() noexcept {} - -void AesGcmV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; - __isset.aad_prefix = true; -} - -void AesGcmV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; - __isset.aad_file_unique = true; -} - -void AesGcmV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; - __isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) { - obj.printTo(out); - return out; -} - -uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin( - "aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin( - "aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin( - "supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmV1& a, AesGcmV1& b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmV1::AesGcmV1(const AesGcmV1& other179) { - aad_prefix = other179.aad_prefix; - aad_file_unique = other179.aad_file_unique; - supply_aad_prefix = other179.supply_aad_prefix; - __isset = other179.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other180) { - aad_prefix = other180.aad_prefix; - aad_file_unique = other180.aad_file_unique; - supply_aad_prefix = other180.supply_aad_prefix; - __isset = other180.__isset; - return *this; -} -void AesGcmV1::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "AesGcmV1("; - out << "aad_prefix="; - (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; - (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) - : (out << "")); - out << ", " << "supply_aad_prefix="; - (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) - : (out << "")); - out << ")"; -} - -AesGcmCtrV1::~AesGcmCtrV1() noexcept {} - -void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; - __isset.aad_prefix = true; -} - -void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; - __isset.aad_file_unique = true; -} - -void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; - __isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) { - obj.printTo(out); - return out; -} - -uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin( - "aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin( - "aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin( - "supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmCtrV1& a, AesGcmCtrV1& b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other181) { - aad_prefix = other181.aad_prefix; - aad_file_unique = other181.aad_file_unique; - supply_aad_prefix = other181.supply_aad_prefix; - __isset = other181.__isset; -} -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other182) { - aad_prefix = other182.aad_prefix; - aad_file_unique = other182.aad_file_unique; - supply_aad_prefix = other182.supply_aad_prefix; - __isset = other182.__isset; - return *this; -} -void AesGcmCtrV1::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "AesGcmCtrV1("; - out << "aad_prefix="; - (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; - (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) - : (out << "")); - out << ", " << "supply_aad_prefix="; - (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) - : (out << "")); - out << ")"; -} - -EncryptionAlgorithm::~EncryptionAlgorithm() noexcept {} - -void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { - this->AES_GCM_V1 = val; - __isset.AES_GCM_V1 = true; -} - -void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { - this->AES_GCM_CTR_V1 = val; - __isset.AES_GCM_CTR_V1 = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) { - obj.printTo(out); - return out; -} - -uint32_t EncryptionAlgorithm::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin( - "AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin( - "AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionAlgorithm& a, EncryptionAlgorithm& b) { - using ::std::swap; - swap(a.AES_GCM_V1, b.AES_GCM_V1); - swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); - swap(a.__isset, b.__isset); -} - -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other183) { - AES_GCM_V1 = other183.AES_GCM_V1; - AES_GCM_CTR_V1 = other183.AES_GCM_CTR_V1; - __isset = other183.__isset; -} -EncryptionAlgorithm& EncryptionAlgorithm::operator=( - const EncryptionAlgorithm& other184) { - AES_GCM_V1 = other184.AES_GCM_V1; - AES_GCM_CTR_V1 = other184.AES_GCM_CTR_V1; - __isset = other184.__isset; - return *this; -} -void EncryptionAlgorithm::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "EncryptionAlgorithm("; - out << "AES_GCM_V1="; - (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "")); - out << ", " << "AES_GCM_CTR_V1="; - (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) - : (out << "")); - out << ")"; -} - -FileMetaData::~FileMetaData() noexcept {} - -void FileMetaData::__set_version(const int32_t val) { - this->version = val; -} - -void FileMetaData::__set_schema(const std::vector& val) { - this->schema = val; -} - -void FileMetaData::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void FileMetaData::__set_row_groups(const std::vector& val) { - this->row_groups = val; -} - -void FileMetaData::__set_key_value_metadata(const std::vector& val) { - this->key_value_metadata = val; - __isset.key_value_metadata = true; -} - -void FileMetaData::__set_created_by(const std::string& val) { - this->created_by = val; - __isset.created_by = true; -} - -void FileMetaData::__set_column_orders(const std::vector& val) { - this->column_orders = val; - __isset.column_orders = true; -} - -void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; - __isset.encryption_algorithm = true; -} - -void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { - this->footer_signing_key_metadata = val; - __isset.footer_signing_key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) { - obj.printTo(out); - return out; -} - -uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size185; - ::apache::thrift::protocol::TType _etype188; - xfer += iprot->readListBegin(_etype188, _size185); - this->schema.resize(_size185); - uint32_t _i189; - for (_i189 = 0; _i189 < _size185; ++_i189) { - xfer += this->schema[_i189].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size190; - ::apache::thrift::protocol::TType _etype193; - xfer += iprot->readListBegin(_etype193, _size190); - this->row_groups.resize(_size190); - uint32_t _i194; - for (_i194 = 0; _i194 < _size190; ++_i194) { - xfer += this->row_groups[_i194].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size195; - ::apache::thrift::protocol::TType _etype198; - xfer += iprot->readListBegin(_etype198, _size195); - this->key_value_metadata.resize(_size195); - uint32_t _i199; - for (_i199 = 0; _i199 < _size195; ++_i199) { - xfer += this->key_value_metadata[_i199].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size200; - ::apache::thrift::protocol::TType _etype203; - xfer += iprot->readListBegin(_etype203, _size200); - this->column_orders.resize(_size200); - uint32_t _i204; - for (_i204 = 0; _i204 < _size200; ++_i204) { - xfer += this->column_orders[_i204].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += - oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->schema.size())); - std::vector::const_iterator _iter205; - for (_iter205 = this->schema.begin(); _iter205 != this->schema.end(); - ++_iter205) { - xfer += (*_iter205).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += - oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin( - "row_groups", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->row_groups.size())); - std::vector::const_iterator _iter206; - for (_iter206 = this->row_groups.begin(); - _iter206 != this->row_groups.end(); - ++_iter206) { - xfer += (*_iter206).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin( - "key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->key_value_metadata.size())); - std::vector::const_iterator _iter207; - for (_iter207 = this->key_value_metadata.begin(); - _iter207 != this->key_value_metadata.end(); - ++_iter207) { - xfer += (*_iter207).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin( - "created_by", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin( - "column_orders", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin( - ::apache::thrift::protocol::T_STRUCT, - static_cast(this->column_orders.size())); - std::vector::const_iterator _iter208; - for (_iter208 = this->column_orders.begin(); - _iter208 != this->column_orders.end(); - ++_iter208) { - xfer += (*_iter208).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin( - "encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin( - "footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileMetaData& a, FileMetaData& b) { - using ::std::swap; - swap(a.version, b.version); - swap(a.schema, b.schema); - swap(a.num_rows, b.num_rows); - swap(a.row_groups, b.row_groups); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.created_by, b.created_by); - swap(a.column_orders, b.column_orders); - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); - swap(a.__isset, b.__isset); -} - -FileMetaData::FileMetaData(const FileMetaData& other209) { - version = other209.version; - schema = other209.schema; - num_rows = other209.num_rows; - row_groups = other209.row_groups; - key_value_metadata = other209.key_value_metadata; - created_by = other209.created_by; - column_orders = other209.column_orders; - encryption_algorithm = other209.encryption_algorithm; - footer_signing_key_metadata = other209.footer_signing_key_metadata; - __isset = other209.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other210) { - version = other210.version; - schema = other210.schema; - num_rows = other210.num_rows; - row_groups = other210.row_groups; - key_value_metadata = other210.key_value_metadata; - created_by = other210.created_by; - column_orders = other210.column_orders; - encryption_algorithm = other210.encryption_algorithm; - footer_signing_key_metadata = other210.footer_signing_key_metadata; - __isset = other210.__isset; - return *this; -} -void FileMetaData::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "FileMetaData("; - out << "version=" << to_string(version); - out << ", " << "schema=" << to_string(schema); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "row_groups=" << to_string(row_groups); - out << ", " << "key_value_metadata="; - (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) - : (out << "")); - out << ", " << "created_by="; - (__isset.created_by ? (out << to_string(created_by)) : (out << "")); - out << ", " << "column_orders="; - (__isset.column_orders ? (out << to_string(column_orders)) - : (out << "")); - out << ", " << "encryption_algorithm="; - (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) - : (out << "")); - out << ", " << "footer_signing_key_metadata="; - (__isset.footer_signing_key_metadata - ? (out << to_string(footer_signing_key_metadata)) - : (out << "")); - out << ")"; -} - -FileCryptoMetaData::~FileCryptoMetaData() noexcept {} - -void FileCryptoMetaData::__set_encryption_algorithm( - const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -} - -void FileCryptoMetaData::__set_key_metadata(const std::string& val) { - this->key_metadata = val; - __isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) { - obj.printTo(out); - return out; -} - -uint32_t FileCryptoMetaData::read( - ::apache::thrift::protocol::TProtocol* iprot) { - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write( - ::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin( - "encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin( - "key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileCryptoMetaData& a, FileCryptoMetaData& b) { - using ::std::swap; - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other211) { - encryption_algorithm = other211.encryption_algorithm; - key_metadata = other211.key_metadata; - __isset = other211.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=( - const FileCryptoMetaData& other212) { - encryption_algorithm = other212.encryption_algorithm; - key_metadata = other212.key_metadata; - __isset = other212.__isset; - return *this; -} -void FileCryptoMetaData::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "FileCryptoMetaData("; - out << "encryption_algorithm=" << to_string(encryption_algorithm); - out << ", " << "key_metadata="; - (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} -} // namespace facebook::velox::parquet::thrift diff --git a/velox/dwio/parquet/thrift/ParquetThriftTypes.h b/velox/dwio/parquet/thrift/ParquetThriftTypes.h deleted file mode 100644 index 0fd63e83cc0..00000000000 --- a/velox/dwio/parquet/thrift/ParquetThriftTypes.h +++ /dev/null @@ -1,3823 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Autogenerated by Thrift Compiler (0.14.1) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace facebook::velox::parquet::thrift { - -/** - * Types supported by Parquet. These types are intended to be used in - * combination with the encodings to control the on disk storage format. For - * example INT16 is not included as a type since a good encoding of INT32 would - * handle this. - */ -struct Type { - enum type { - BOOLEAN = 0, - INT32 = 1, - INT64 = 2, - INT96 = 3, - FLOAT = 4, - DOUBLE = 5, - BYTE_ARRAY = 6, - FIXED_LEN_BYTE_ARRAY = 7 - }; -}; - -extern const std::map _Type_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Type::type& val); - -std::string to_string(const Type::type& val); - -/** - * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. - * ConvertedType is superseded by LogicalType. This enum should not be - * extended. - * - * See LogicalTypes.md for conversion between ConvertedType and LogicalType. - */ -struct ConvertedType { - enum type { - /** - * a BYTE_ARRAY actually contains UTF8 encoded chars - */ - UTF8 = 0, - /** - * a map is converted as an optional field containing a repeated key/value - * pair - */ - MAP = 1, - /** - * a key/value pair is converted into a group of two fields - */ - MAP_KEY_VALUE = 2, - /** - * a list is converted into an optional field containing a repeated field - * for its values - */ - LIST = 3, - /** - * an enum is converted into a binary field - */ - ENUM = 4, - /** - * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's - * complement using big-endian byte order (the most significant byte is the - * zeroth element). The value of the decimal is the value * 10^{-scale}. - * - * This must be accompanied by a (maximum) precision and a scale in the - * SchemaElement. The precision specifies the number of digits in the - * decimal and the scale stores the location of the decimal point. For - * example 1.23 would have precision 3 (3 total digits) and scale 2 (the - * decimal point is 2 digits over). - */ - DECIMAL = 5, - /** - * A Date - * - * Stored as days since Unix epoch, encoded as the INT32 physical type. - * - */ - DATE = 6, - /** - * A time - * - * The total number of milliseconds since midnight. The value is stored - * as an INT32 physical type. - */ - TIME_MILLIS = 7, - /** - * A time. - * - * The total number of microseconds since midnight. The value is stored as - * an INT64 physical type. - */ - TIME_MICROS = 8, - /** - * A date/time combination - * - * Date and time recorded as milliseconds since the Unix epoch. Recorded as - * a physical type of INT64. - */ - TIMESTAMP_MILLIS = 9, - /** - * A date/time combination - * - * Date and time recorded as microseconds since the Unix epoch. The value - * is stored as an INT64 physical type. - */ - TIMESTAMP_MICROS = 10, - /** - * An unsigned integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - UINT_8 = 11, - UINT_16 = 12, - UINT_32 = 13, - UINT_64 = 14, - /** - * A signed integer value. - * - * The number describes the maximum number of meaningful data bits in - * the stored value. 8, 16 and 32 bit values are stored using the - * INT32 physical type. 64 bit values are stored using the INT64 - * physical type. - * - */ - INT_8 = 15, - INT_16 = 16, - INT_32 = 17, - INT_64 = 18, - /** - * An embedded JSON document - * - * A JSON document embedded within a single UTF8 column. - */ - JSON = 19, - /** - * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. - */ - BSON = 20, - /** - * An interval of time - * - * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 - * This data is composed of three separate little endian unsigned - * integers. Each stores a component of a duration of time. The first - * integer identifies the number of months associated with the duration, - * the second identifies the number of days associated with the duration - * and the third identifies the number of milliseconds associated with - * the provided duration. This duration of time is independent of any - * particular timezone or date. - */ - INTERVAL = 21 - }; -}; - -extern const std::map _ConvertedType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val); - -std::string to_string(const ConvertedType::type& val); - -/** - * Representation of Schemas - */ -struct FieldRepetitionType { - enum type { - /** - * This field is required (can not be null) and each record has exactly 1 - * value. - */ - REQUIRED = 0, - /** - * The field is optional (can be null) and each record has 0 or 1 values. - */ - OPTIONAL = 1, - /** - * The field is repeated and can contain 0 or more values - */ - REPEATED = 2 - }; -}; - -extern const std::map _FieldRepetitionType_VALUES_TO_NAMES; - -std::ostream& operator<<( - std::ostream& out, - const FieldRepetitionType::type& val); - -std::string to_string(const FieldRepetitionType::type& val); - -/** - * Encodings supported by Parquet. Not all encodings are valid for all types. - * These enums are also used to specify the encoding of definition and - * repetition levels. See the accompanying doc for the details of the more - * complicated encodings. - */ -struct Encoding { - enum type { - /** - * Default encoding. - * BOOLEAN - 1 bit per value. 0 is false; 1 is true. - * INT32 - 4 bytes per value. Stored as little-endian. - * INT64 - 8 bytes per value. Stored as little-endian. - * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. - * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. - * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. - * FIXED_LEN_BYTE_ARRAY - Just the bytes. - */ - PLAIN = 0, - /** - * Deprecated: Dictionary encoding. The values in the dictionary are encoded - * in the plain type. in a data page use RLE_DICTIONARY instead. in a - * Dictionary page use PLAIN instead - */ - PLAIN_DICTIONARY = 2, - /** - * Group packed run length encoding. Usable for definition/repetition levels - * encoding and Booleans (on one bit: 0 is false; 1 is true.) - */ - RLE = 3, - /** - * Bit packed encoding. This can only be used if the data has a known max - * width. Usable for definition/repetition levels encoding. - */ - BIT_PACKED = 4, - /** - * Delta encoding for integers. This can be used for int columns and works - * best on sorted data - */ - DELTA_BINARY_PACKED = 5, - /** - * Encoding for byte arrays to separate the length values and the data. The - * lengths are encoded using DELTA_BINARY_PACKED - */ - DELTA_LENGTH_BYTE_ARRAY = 6, - /** - * Incremental-encoded byte array. Prefix lengths are encoded using - * DELTA_BINARY_PACKED. Suffixes are stored as delta length byte arrays. - */ - DELTA_BYTE_ARRAY = 7, - /** - * Dictionary encoding: the ids are encoded using the RLE encoding - */ - RLE_DICTIONARY = 8, - /** - * Encoding for floating-point data. - * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding - * stream and the streams are concatenated. This itself does not reduce the - * size of the data but can lead to better compression afterwards. - */ - BYTE_STREAM_SPLIT = 9 - }; -}; - -extern const std::map _Encoding_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val); - -std::string to_string(const Encoding::type& val); - -/** - * Supported compression algorithms. - * - * Codecs added in format version X.Y can be read by readers based on X.Y and - * later. Codec support may vary between readers based on the format version and - * libraries available at runtime. - * - * See Compression.md for a detailed specification of these algorithms. - */ -struct CompressionCodec { - enum type { - UNCOMPRESSED = 0, - SNAPPY = 1, - GZIP = 2, - LZO = 3, - BROTLI = 4, - LZ4 = 5, - ZSTD = 6, - LZ4_RAW = 7 - }; -}; - -extern const std::map _CompressionCodec_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val); - -std::string to_string(const CompressionCodec::type& val); - -struct PageType { - enum type { - DATA_PAGE = 0, - INDEX_PAGE = 1, - DICTIONARY_PAGE = 2, - DATA_PAGE_V2 = 3 - }; -}; - -extern const std::map _PageType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const PageType::type& val); - -std::string to_string(const PageType::type& val); - -/** - * Enum to annotate whether lists of min/max elements inside ColumnIndex - * are ordered and if so, in which direction. - */ -struct BoundaryOrder { - enum type { UNORDERED = 0, ASCENDING = 1, DESCENDING = 2 }; -}; - -extern const std::map _BoundaryOrder_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val); - -std::string to_string(const BoundaryOrder::type& val); - -class Statistics; - -class StringType; - -class UUIDType; - -class MapType; - -class ListType; - -class EnumType; - -class DateType; - -class NullType; - -class DecimalType; - -class MilliSeconds; - -class MicroSeconds; - -class NanoSeconds; - -class TimeUnit; - -class TimestampType; - -class TimeType; - -class IntType; - -class JsonType; - -class BsonType; - -class LogicalType; - -class SchemaElement; - -class DataPageHeader; - -class IndexPageHeader; - -class DictionaryPageHeader; - -class DataPageHeaderV2; - -class SplitBlockAlgorithm; - -class BloomFilterAlgorithm; - -class XxHash; - -class BloomFilterHash; - -class Uncompressed; - -class BloomFilterCompression; - -class BloomFilterHeader; - -class PageHeader; - -class KeyValue; - -class SortingColumn; - -class PageEncodingStats; - -class ColumnMetaData; - -class EncryptionWithFooterKey; - -class EncryptionWithColumnKey; - -class ColumnCryptoMetaData; - -class ColumnChunk; - -class RowGroup; - -class TypeDefinedOrder; - -class ColumnOrder; - -class PageLocation; - -class OffsetIndex; - -class ColumnIndex; - -class AesGcmV1; - -class AesGcmCtrV1; - -class EncryptionAlgorithm; - -class FileMetaData; - -class FileCryptoMetaData; - -typedef struct _Statistics__isset { - _Statistics__isset() - : max(false), - min(false), - null_count(false), - distinct_count(false), - max_value(false), - min_value(false) {} - bool max : 1; - bool min : 1; - bool null_count : 1; - bool distinct_count : 1; - bool max_value : 1; - bool min_value : 1; -} _Statistics__isset; - -/** - * Statistics per row group and per page - * All fields are optional. - */ -class Statistics : public virtual apache::thrift::TBase { - public: - Statistics(const Statistics&); - Statistics& operator=(const Statistics&); - Statistics() - : max(), - min(), - null_count(0), - distinct_count(0), - max_value(), - min_value() {} - - virtual ~Statistics() noexcept; - /** - * DEPRECATED: min and max value of the column. Use min_value and max_value. - * - * Values are encoded using PLAIN encoding, except that variable-length byte - * arrays do not include a length prefix. - * - * These fields encode min and max values determined by signed comparison - * only. New files should use the correct order for a column's logical type - * and store the values in the min_value and max_value fields. - * - * To support older readers, these may be set when the column order is - * signed. - */ - std::string max; - std::string min; - /** - * count of null value in the column - */ - int64_t null_count; - /** - * count of distinct values occurring - */ - int64_t distinct_count; - /** - * Min and max values for the column, determined by its ColumnOrder. - * - * Values are encoded using PLAIN encoding, except that variable-length byte - * arrays do not include a length prefix. - */ - std::string max_value; - std::string min_value; - - _Statistics__isset __isset; - - void __set_max(const std::string& val); - - void __set_min(const std::string& val); - - void __set_null_count(const int64_t val); - - void __set_distinct_count(const int64_t val); - - void __set_max_value(const std::string& val); - - void __set_min_value(const std::string& val); - - bool operator==(const Statistics& rhs) const { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - if (__isset.max_value != rhs.__isset.max_value) - return false; - else if (__isset.max_value && !(max_value == rhs.max_value)) - return false; - if (__isset.min_value != rhs.__isset.min_value) - return false; - else if (__isset.min_value && !(min_value == rhs.min_value)) - return false; - return true; - } - bool operator!=(const Statistics& rhs) const { - return !(*this == rhs); - } - - bool operator<(const Statistics&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(Statistics& a, Statistics& b); - -std::ostream& operator<<(std::ostream& out, const Statistics& obj); - -/** - * Empty structs to use as logical type annotations - */ -class StringType : public virtual ::apache::thrift::TBase { - public: - StringType(const StringType&); - StringType& operator=(const StringType&); - StringType() {} - - virtual ~StringType() noexcept; - - bool operator==(const StringType& /* rhs */) const { - return true; - } - bool operator!=(const StringType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const StringType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(StringType& a, StringType& b); - -std::ostream& operator<<(std::ostream& out, const StringType& obj); - -class UUIDType : public virtual ::apache::thrift::TBase { - public: - UUIDType(const UUIDType&); - UUIDType& operator=(const UUIDType&); - UUIDType() {} - - virtual ~UUIDType() noexcept; - - bool operator==(const UUIDType& /* rhs */) const { - return true; - } - bool operator!=(const UUIDType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const UUIDType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(UUIDType& a, UUIDType& b); - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj); - -class MapType : public virtual ::apache::thrift::TBase { - public: - MapType(const MapType&); - MapType& operator=(const MapType&); - MapType() {} - - virtual ~MapType() noexcept; - - bool operator==(const MapType& /* rhs */) const { - return true; - } - bool operator!=(const MapType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const MapType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MapType& a, MapType& b); - -std::ostream& operator<<(std::ostream& out, const MapType& obj); - -class ListType : public virtual ::apache::thrift::TBase { - public: - ListType(const ListType&); - ListType& operator=(const ListType&); - ListType() {} - - virtual ~ListType() noexcept; - - bool operator==(const ListType& /* rhs */) const { - return true; - } - bool operator!=(const ListType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ListType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ListType& a, ListType& b); - -std::ostream& operator<<(std::ostream& out, const ListType& obj); - -class EnumType : public virtual ::apache::thrift::TBase { - public: - EnumType(const EnumType&); - EnumType& operator=(const EnumType&); - EnumType() {} - - virtual ~EnumType() noexcept; - - bool operator==(const EnumType& /* rhs */) const { - return true; - } - bool operator!=(const EnumType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const EnumType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EnumType& a, EnumType& b); - -std::ostream& operator<<(std::ostream& out, const EnumType& obj); - -class DateType : public virtual ::apache::thrift::TBase { - public: - DateType(const DateType&); - DateType& operator=(const DateType&); - DateType() {} - - virtual ~DateType() noexcept; - - bool operator==(const DateType& /* rhs */) const { - return true; - } - bool operator!=(const DateType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const DateType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DateType& a, DateType& b); - -std::ostream& operator<<(std::ostream& out, const DateType& obj); - -/** - * Logical type to annotate a column that is always null. - * - * Sometimes when discovering the schema of existing data, values are always - * null and the physical type can't be determined. This annotation signals - * the case where the physical type was guessed from all null values. - */ -class NullType : public virtual ::apache::thrift::TBase { - public: - NullType(const NullType&); - NullType& operator=(const NullType&); - NullType() {} - - virtual ~NullType() noexcept; - - bool operator==(const NullType& /* rhs */) const { - return true; - } - bool operator!=(const NullType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const NullType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NullType& a, NullType& b); - -std::ostream& operator<<(std::ostream& out, const NullType& obj); - -/** - * Decimal logical type annotation - * - * To maintain forward-compatibility in v1, implementations using this logical - * type must also set scale and precision on the annotated SchemaElement. - * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY - */ -class DecimalType : public virtual ::apache::thrift::TBase { - public: - DecimalType(const DecimalType&); - DecimalType& operator=(const DecimalType&); - DecimalType() : scale(0), precision(0) {} - - virtual ~DecimalType() noexcept; - int32_t scale; - int32_t precision; - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - bool operator==(const DecimalType& rhs) const { - if (!(scale == rhs.scale)) - return false; - if (!(precision == rhs.precision)) - return false; - return true; - } - bool operator!=(const DecimalType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const DecimalType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DecimalType& a, DecimalType& b); - -std::ostream& operator<<(std::ostream& out, const DecimalType& obj); - -/** - * Time units for logical types - */ -class MilliSeconds : public virtual ::apache::thrift::TBase { - public: - MilliSeconds(const MilliSeconds&); - MilliSeconds& operator=(const MilliSeconds&); - MilliSeconds() {} - - virtual ~MilliSeconds() noexcept; - - bool operator==(const MilliSeconds& /* rhs */) const { - return true; - } - bool operator!=(const MilliSeconds& rhs) const { - return !(*this == rhs); - } - - bool operator<(const MilliSeconds&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MilliSeconds& a, MilliSeconds& b); - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); - -class MicroSeconds : public virtual ::apache::thrift::TBase { - public: - MicroSeconds(const MicroSeconds&); - MicroSeconds& operator=(const MicroSeconds&); - MicroSeconds() {} - - virtual ~MicroSeconds() noexcept; - - bool operator==(const MicroSeconds& /* rhs */) const { - return true; - } - bool operator!=(const MicroSeconds& rhs) const { - return !(*this == rhs); - } - - bool operator<(const MicroSeconds&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MicroSeconds& a, MicroSeconds& b); - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); - -class NanoSeconds : public virtual ::apache::thrift::TBase { - public: - NanoSeconds(const NanoSeconds&); - NanoSeconds& operator=(const NanoSeconds&); - NanoSeconds() {} - - virtual ~NanoSeconds() noexcept; - - bool operator==(const NanoSeconds& /* rhs */) const { - return true; - } - bool operator!=(const NanoSeconds& rhs) const { - return !(*this == rhs); - } - - bool operator<(const NanoSeconds&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NanoSeconds& a, NanoSeconds& b); - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj); - -typedef struct _TimeUnit__isset { - _TimeUnit__isset() : MILLIS(false), MICROS(false), NANOS(false) {} - bool MILLIS : 1; - bool MICROS : 1; - bool NANOS : 1; -} _TimeUnit__isset; - -class TimeUnit : public virtual ::apache::thrift::TBase { - public: - TimeUnit(const TimeUnit&); - TimeUnit& operator=(const TimeUnit&); - TimeUnit() {} - - virtual ~TimeUnit() noexcept; - MilliSeconds MILLIS; - MicroSeconds MICROS; - NanoSeconds NANOS; - - _TimeUnit__isset __isset; - - void __set_MILLIS(const MilliSeconds& val); - - void __set_MICROS(const MicroSeconds& val); - - void __set_NANOS(const NanoSeconds& val); - - bool operator==(const TimeUnit& rhs) const { - if (__isset.MILLIS != rhs.__isset.MILLIS) - return false; - else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) - return false; - if (__isset.MICROS != rhs.__isset.MICROS) - return false; - else if (__isset.MICROS && !(MICROS == rhs.MICROS)) - return false; - if (__isset.NANOS != rhs.__isset.NANOS) - return false; - else if (__isset.NANOS && !(NANOS == rhs.NANOS)) - return false; - return true; - } - bool operator!=(const TimeUnit& rhs) const { - return !(*this == rhs); - } - - bool operator<(const TimeUnit&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeUnit& a, TimeUnit& b); - -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); - -/** - * Timestamp logical type annotation - * - * Allowed for physical types: INT64 - */ -class TimestampType : public virtual ::apache::thrift::TBase { - public: - TimestampType(const TimestampType&); - TimestampType& operator=(const TimestampType&); - TimestampType() : isAdjustedToUTC(0) {} - - virtual ~TimestampType() noexcept; - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator==(const TimestampType& rhs) const { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator!=(const TimestampType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const TimestampType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimestampType& a, TimestampType& b); - -std::ostream& operator<<(std::ostream& out, const TimestampType& obj); - -/** - * Time logical type annotation - * - * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) - */ -class TimeType : public virtual ::apache::thrift::TBase { - public: - TimeType(const TimeType&); - TimeType& operator=(const TimeType&); - TimeType() : isAdjustedToUTC(0) {} - - virtual ~TimeType() noexcept; - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator==(const TimeType& rhs) const { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator!=(const TimeType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const TimeType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeType& a, TimeType& b); - -std::ostream& operator<<(std::ostream& out, const TimeType& obj); - -/** - * Integer logical type annotation - * - * bitWidth must be 8, 16, 32, or 64. - * - * Allowed for physical types: INT32, INT64 - */ -class IntType : public virtual ::apache::thrift::TBase { - public: - IntType(const IntType&); - IntType& operator=(const IntType&); - IntType() : bitWidth(0), isSigned(0) {} - - virtual ~IntType() noexcept; - int8_t bitWidth; - bool isSigned; - - void __set_bitWidth(const int8_t val); - - void __set_isSigned(const bool val); - - bool operator==(const IntType& rhs) const { - if (!(bitWidth == rhs.bitWidth)) - return false; - if (!(isSigned == rhs.isSigned)) - return false; - return true; - } - bool operator!=(const IntType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const IntType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IntType& a, IntType& b); - -std::ostream& operator<<(std::ostream& out, const IntType& obj); - -/** - * Embedded JSON logical type annotation - * - * Allowed for physical types: BINARY - */ -class JsonType : public virtual ::apache::thrift::TBase { - public: - JsonType(const JsonType&); - JsonType& operator=(const JsonType&); - JsonType() {} - - virtual ~JsonType() noexcept; - - bool operator==(const JsonType& /* rhs */) const { - return true; - } - bool operator!=(const JsonType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const JsonType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(JsonType& a, JsonType& b); - -std::ostream& operator<<(std::ostream& out, const JsonType& obj); - -/** - * Embedded BSON logical type annotation - * - * Allowed for physical types: BINARY - */ -class BsonType : public virtual ::apache::thrift::TBase { - public: - BsonType(const BsonType&); - BsonType& operator=(const BsonType&); - BsonType() {} - - virtual ~BsonType() noexcept; - - bool operator==(const BsonType& /* rhs */) const { - return true; - } - bool operator!=(const BsonType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const BsonType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BsonType& a, BsonType& b); - -std::ostream& operator<<(std::ostream& out, const BsonType& obj); - -typedef struct _LogicalType__isset { - _LogicalType__isset() - : STRING(false), - MAP(false), - LIST(false), - ENUM(false), - DECIMAL(false), - DATE(false), - TIME(false), - TIMESTAMP(false), - INTEGER(false), - UNKNOWN(false), - JSON(false), - BSON(false), - UUID(false) {} - bool STRING : 1; - bool MAP : 1; - bool LIST : 1; - bool ENUM : 1; - bool DECIMAL : 1; - bool DATE : 1; - bool TIME : 1; - bool TIMESTAMP : 1; - bool INTEGER : 1; - bool UNKNOWN : 1; - bool JSON : 1; - bool BSON : 1; - bool UUID : 1; -} _LogicalType__isset; - -/** - * LogicalType annotations to replace ConvertedType. - * - * To maintain compatibility, implementations using LogicalType for a - * SchemaElement must also set the corresponding ConvertedType (if any) - * from the following table. - */ -class LogicalType : public virtual ::apache::thrift::TBase { - public: - LogicalType(const LogicalType&); - LogicalType& operator=(const LogicalType&); - LogicalType() {} - - virtual ~LogicalType() noexcept; - StringType STRING; - MapType MAP; - ListType LIST; - EnumType ENUM; - DecimalType DECIMAL; - DateType DATE; - TimeType TIME; - TimestampType TIMESTAMP; - IntType INTEGER; - NullType UNKNOWN; - JsonType JSON; - BsonType BSON; - UUIDType UUID; - - _LogicalType__isset __isset; - - void __set_STRING(const StringType& val); - - void __set_MAP(const MapType& val); - - void __set_LIST(const ListType& val); - - void __set_ENUM(const EnumType& val); - - void __set_DECIMAL(const DecimalType& val); - - void __set_DATE(const DateType& val); - - void __set_TIME(const TimeType& val); - - void __set_TIMESTAMP(const TimestampType& val); - - void __set_INTEGER(const IntType& val); - - void __set_UNKNOWN(const NullType& val); - - void __set_JSON(const JsonType& val); - - void __set_BSON(const BsonType& val); - - void __set_UUID(const UUIDType& val); - - bool operator==(const LogicalType& rhs) const { - if (__isset.STRING != rhs.__isset.STRING) - return false; - else if (__isset.STRING && !(STRING == rhs.STRING)) - return false; - if (__isset.MAP != rhs.__isset.MAP) - return false; - else if (__isset.MAP && !(MAP == rhs.MAP)) - return false; - if (__isset.LIST != rhs.__isset.LIST) - return false; - else if (__isset.LIST && !(LIST == rhs.LIST)) - return false; - if (__isset.ENUM != rhs.__isset.ENUM) - return false; - else if (__isset.ENUM && !(ENUM == rhs.ENUM)) - return false; - if (__isset.DECIMAL != rhs.__isset.DECIMAL) - return false; - else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) - return false; - if (__isset.DATE != rhs.__isset.DATE) - return false; - else if (__isset.DATE && !(DATE == rhs.DATE)) - return false; - if (__isset.TIME != rhs.__isset.TIME) - return false; - else if (__isset.TIME && !(TIME == rhs.TIME)) - return false; - if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) - return false; - else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) - return false; - if (__isset.INTEGER != rhs.__isset.INTEGER) - return false; - else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) - return false; - if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) - return false; - else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) - return false; - if (__isset.JSON != rhs.__isset.JSON) - return false; - else if (__isset.JSON && !(JSON == rhs.JSON)) - return false; - if (__isset.BSON != rhs.__isset.BSON) - return false; - else if (__isset.BSON && !(BSON == rhs.BSON)) - return false; - if (__isset.UUID != rhs.__isset.UUID) - return false; - else if (__isset.UUID && !(UUID == rhs.UUID)) - return false; - return true; - } - bool operator!=(const LogicalType& rhs) const { - return !(*this == rhs); - } - - bool operator<(const LogicalType&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(LogicalType& a, LogicalType& b); - -std::ostream& operator<<(std::ostream& out, const LogicalType& obj); - -typedef struct _SchemaElement__isset { - _SchemaElement__isset() - : type(false), - type_length(false), - repetition_type(false), - num_children(false), - converted_type(false), - scale(false), - precision(false), - field_id(false), - logicalType(false) {} - bool type : 1; - bool type_length : 1; - bool repetition_type : 1; - bool num_children : 1; - bool converted_type : 1; - bool scale : 1; - bool precision : 1; - bool field_id : 1; - bool logicalType : 1; -} _SchemaElement__isset; - -/** - * Represents a element inside a schema definition. - * - if it is a group (inner node) then type is undefined and num_children is - * defined - * - if it is a primitive type (leaf) then type is defined and num_children is - * undefined the nodes are listed in depth first traversal order. - */ -class SchemaElement : public virtual ::apache::thrift::TBase { - public: - SchemaElement(const SchemaElement&); - SchemaElement& operator=(const SchemaElement&); - SchemaElement() - : type((Type::type)0), - type_length(0), - repetition_type((FieldRepetitionType::type)0), - name(), - num_children(0), - converted_type((ConvertedType::type)0), - scale(0), - precision(0), - field_id(0) {} - - virtual ~SchemaElement() noexcept; - /** - * Data type for this field. Not set if the current element is a non-leaf node - * - * @see Type - */ - Type::type type; - /** - * If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. - * Otherwise, if specified, this is the maximum bit length to store any of the - * values. (e.g. a low cardinality INT col could have this set to 3). Note - * that this is in the schema, and therefore fixed for the entire file. - */ - int32_t type_length; - /** - * repetition of the field. The root of the schema does not have a - * repetition_type. All other nodes must have one - * - * @see FieldRepetitionType - */ - FieldRepetitionType::type repetition_type; - /** - * Name of the field in the schema - */ - std::string name; - /** - * Nested fields. Since thrift does not support nested fields, - * the nesting is flattened to a single list by a depth-first traversal. - * The children count is used to construct the nested relationship. - * This field is not set when the element is a primitive type - */ - int32_t num_children; - /** - * DEPRECATED: When the schema is the result of a conversion from another - * model. Used to record the original type to help with cross conversion. - * - * This is superseded by logicalType. - * - * @see ConvertedType - */ - ConvertedType::type converted_type; - /** - * DEPRECATED: Used when this column contains decimal data. - * See the DECIMAL converted type for more details. - * - * This is superseded by using the DecimalType annotation in logicalType. - */ - int32_t scale; - int32_t precision; - /** - * When the original schema supports field ids, this will save the - * original field id in the parquet schema - */ - int32_t field_id; - /** - * The logical type of this SchemaElement - * - * LogicalType replaces ConvertedType, but ConvertedType is still required - * for some logical types to ensure forward-compatibility in format v1. - */ - LogicalType logicalType; - - _SchemaElement__isset __isset; - - void __set_type(const Type::type val); - - void __set_type_length(const int32_t val); - - void __set_repetition_type(const FieldRepetitionType::type val); - - void __set_name(const std::string& val); - - void __set_num_children(const int32_t val); - - void __set_converted_type(const ConvertedType::type val); - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - void __set_field_id(const int32_t val); - - void __set_logicalType(const LogicalType& val); - - bool operator==(const SchemaElement& rhs) const { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if ( - __isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - if (__isset.field_id != rhs.__isset.field_id) - return false; - else if (__isset.field_id && !(field_id == rhs.field_id)) - return false; - if (__isset.logicalType != rhs.__isset.logicalType) - return false; - else if (__isset.logicalType && !(logicalType == rhs.logicalType)) - return false; - return true; - } - bool operator!=(const SchemaElement& rhs) const { - return !(*this == rhs); - } - - bool operator<(const SchemaElement&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SchemaElement& a, SchemaElement& b); - -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); - -typedef struct _DataPageHeader__isset { - _DataPageHeader__isset() : statistics(false) {} - bool statistics : 1; -} _DataPageHeader__isset; - -/** - * Data page header - */ -class DataPageHeader : public virtual ::apache::thrift::TBase { - public: - DataPageHeader(const DataPageHeader&); - DataPageHeader& operator=(const DataPageHeader&); - DataPageHeader() - : num_values(0), - encoding((Encoding::type)0), - definition_level_encoding((Encoding::type)0), - repetition_level_encoding((Encoding::type)0) {} - - virtual ~DataPageHeader() noexcept; - /** - * Number of values, including NULLs, in this data page. * - */ - int32_t num_values; - /** - * Encoding used for this data page * - * - * @see Encoding - */ - Encoding::type encoding; - /** - * Encoding used for definition levels * - * - * @see Encoding - */ - Encoding::type definition_level_encoding; - /** - * Encoding used for repetition levels * - * - * @see Encoding - */ - Encoding::type repetition_level_encoding; - /** - * Optional statistics for the data in this page* - */ - Statistics statistics; - - _DataPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_level_encoding(const Encoding::type val); - - void __set_repetition_level_encoding(const Encoding::type val); - - void __set_statistics(const Statistics& val); - - bool operator==(const DataPageHeader& rhs) const { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator!=(const DataPageHeader& rhs) const { - return !(*this == rhs); - } - - bool operator<(const DataPageHeader&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeader& a, DataPageHeader& b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); - -class IndexPageHeader : public virtual ::apache::thrift::TBase { - public: - IndexPageHeader(const IndexPageHeader&); - IndexPageHeader& operator=(const IndexPageHeader&); - IndexPageHeader() {} - - virtual ~IndexPageHeader() noexcept; - - bool operator==(const IndexPageHeader& /* rhs */) const { - return true; - } - bool operator!=(const IndexPageHeader& rhs) const { - return !(*this == rhs); - } - - bool operator<(const IndexPageHeader&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IndexPageHeader& a, IndexPageHeader& b); - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); - -typedef struct _DictionaryPageHeader__isset { - _DictionaryPageHeader__isset() : is_sorted(false) {} - bool is_sorted : 1; -} _DictionaryPageHeader__isset; - -/** - * The dictionary page must be placed at the first position of the column chunk - * if it is partly or completely dictionary encoded. At most one dictionary page - * can be placed in a column chunk. - * - */ -class DictionaryPageHeader : public virtual ::apache::thrift::TBase { - public: - DictionaryPageHeader(const DictionaryPageHeader&); - DictionaryPageHeader& operator=(const DictionaryPageHeader&); - DictionaryPageHeader() - : num_values(0), encoding((Encoding::type)0), is_sorted(0) {} - - virtual ~DictionaryPageHeader() noexcept; - /** - * Number of values in the dictionary * - */ - int32_t num_values; - /** - * Encoding using this dictionary page * - * - * @see Encoding - */ - Encoding::type encoding; - /** - * If true, the entries in the dictionary are sorted in ascending order * - */ - bool is_sorted; - - _DictionaryPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_is_sorted(const bool val); - - bool operator==(const DictionaryPageHeader& rhs) const { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } - bool operator!=(const DictionaryPageHeader& rhs) const { - return !(*this == rhs); - } - - bool operator<(const DictionaryPageHeader&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DictionaryPageHeader& a, DictionaryPageHeader& b); - -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); - -typedef struct _DataPageHeaderV2__isset { - _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} - bool is_compressed : 1; - bool statistics : 1; -} _DataPageHeaderV2__isset; - -/** - * New page format allowing reading levels without decompressing the data - * Repetition and definition levels are uncompressed - * The remaining section containing the data is compressed if is_compressed is - * true - * - */ -class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { - public: - DataPageHeaderV2(const DataPageHeaderV2&); - DataPageHeaderV2& operator=(const DataPageHeaderV2&); - DataPageHeaderV2() - : num_values(0), - num_nulls(0), - num_rows(0), - encoding((Encoding::type)0), - definition_levels_byte_length(0), - repetition_levels_byte_length(0), - is_compressed(true) {} - - virtual ~DataPageHeaderV2() noexcept; - /** - * Number of values, including NULLs, in this data page. * - */ - int32_t num_values; - /** - * Number of NULL values, in this data page. - * Number of non-null = num_values - num_nulls which is also the number of - * values in the data section * - */ - int32_t num_nulls; - /** - * Number of rows in this data page. which means pages change on record - * boundaries (r = 0) * - */ - int32_t num_rows; - /** - * Encoding used for data in this page * - * - * @see Encoding - */ - Encoding::type encoding; - /** - * length of the definition levels - */ - int32_t definition_levels_byte_length; - /** - * length of the repetition levels - */ - int32_t repetition_levels_byte_length; - /** - * whether the values are compressed. - * Which means the section of the page between - * definition_levels_byte_length + repetition_levels_byte_length + 1 and - * compressed_page_size (included) is compressed with the compression_codec. - * If missing it is considered compressed - */ - bool is_compressed; - /** - * optional statistics for the data in this page * - */ - Statistics statistics; - - _DataPageHeaderV2__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_num_nulls(const int32_t val); - - void __set_num_rows(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_levels_byte_length(const int32_t val); - - void __set_repetition_levels_byte_length(const int32_t val); - - void __set_is_compressed(const bool val); - - void __set_statistics(const Statistics& val); - - bool operator==(const DataPageHeaderV2& rhs) const { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator!=(const DataPageHeaderV2& rhs) const { - return !(*this == rhs); - } - - bool operator<(const DataPageHeaderV2&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeaderV2& a, DataPageHeaderV2& b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); - -/** - * Block-based algorithm type annotation. * - */ -class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { - public: - SplitBlockAlgorithm(const SplitBlockAlgorithm&); - SplitBlockAlgorithm& operator=(const SplitBlockAlgorithm&); - SplitBlockAlgorithm() {} - - virtual ~SplitBlockAlgorithm() noexcept; - - bool operator==(const SplitBlockAlgorithm& /* rhs */) const { - return true; - } - bool operator!=(const SplitBlockAlgorithm& rhs) const { - return !(*this == rhs); - } - - bool operator<(const SplitBlockAlgorithm&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SplitBlockAlgorithm& a, SplitBlockAlgorithm& b); - -std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj); - -typedef struct _BloomFilterAlgorithm__isset { - _BloomFilterAlgorithm__isset() : BLOCK(false) {} - bool BLOCK : 1; -} _BloomFilterAlgorithm__isset; - -/** - * The algorithm used in Bloom filter. * - */ -class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { - public: - BloomFilterAlgorithm(const BloomFilterAlgorithm&); - BloomFilterAlgorithm& operator=(const BloomFilterAlgorithm&); - BloomFilterAlgorithm() {} - - virtual ~BloomFilterAlgorithm() noexcept; - /** - * Block-based Bloom filter. * - */ - SplitBlockAlgorithm BLOCK; - - _BloomFilterAlgorithm__isset __isset; - - void __set_BLOCK(const SplitBlockAlgorithm& val); - - bool operator==(const BloomFilterAlgorithm& rhs) const { - if (__isset.BLOCK != rhs.__isset.BLOCK) - return false; - else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) - return false; - return true; - } - bool operator!=(const BloomFilterAlgorithm& rhs) const { - return !(*this == rhs); - } - - bool operator<(const BloomFilterAlgorithm&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BloomFilterAlgorithm& a, BloomFilterAlgorithm& b); - -std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); - -/** - * Hash strategy type annotation. xxHash is an extremely fast non-cryptographic - * hash algorithm. It uses 64 bits version of xxHash. - * - */ -class XxHash : public virtual ::apache::thrift::TBase { - public: - XxHash(const XxHash&); - XxHash& operator=(const XxHash&); - XxHash() {} - - virtual ~XxHash() noexcept; - - bool operator==(const XxHash& /* rhs */) const { - return true; - } - bool operator!=(const XxHash& rhs) const { - return !(*this == rhs); - } - - bool operator<(const XxHash&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(XxHash& a, XxHash& b); - -std::ostream& operator<<(std::ostream& out, const XxHash& obj); - -typedef struct _BloomFilterHash__isset { - _BloomFilterHash__isset() : XXHASH(false) {} - bool XXHASH : 1; -} _BloomFilterHash__isset; - -/** - * The hash function used in Bloom filter. This function takes the hash of a - * column value using plain encoding. - * - */ -class BloomFilterHash : public virtual ::apache::thrift::TBase { - public: - BloomFilterHash(const BloomFilterHash&); - BloomFilterHash& operator=(const BloomFilterHash&); - BloomFilterHash() {} - - virtual ~BloomFilterHash() noexcept; - /** - * xxHash Strategy. * - */ - XxHash XXHASH; - - _BloomFilterHash__isset __isset; - - void __set_XXHASH(const XxHash& val); - - bool operator==(const BloomFilterHash& rhs) const { - if (__isset.XXHASH != rhs.__isset.XXHASH) - return false; - else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) - return false; - return true; - } - bool operator!=(const BloomFilterHash& rhs) const { - return !(*this == rhs); - } - - bool operator<(const BloomFilterHash&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BloomFilterHash& a, BloomFilterHash& b); - -std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); - -/** - * The compression used in the Bloom filter. - * - */ -class Uncompressed : public virtual ::apache::thrift::TBase { - public: - Uncompressed(const Uncompressed&); - Uncompressed& operator=(const Uncompressed&); - Uncompressed() {} - - virtual ~Uncompressed() noexcept; - - bool operator==(const Uncompressed& /* rhs */) const { - return true; - } - bool operator!=(const Uncompressed& rhs) const { - return !(*this == rhs); - } - - bool operator<(const Uncompressed&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(Uncompressed& a, Uncompressed& b); - -std::ostream& operator<<(std::ostream& out, const Uncompressed& obj); - -typedef struct _BloomFilterCompression__isset { - _BloomFilterCompression__isset() : UNCOMPRESSED(false) {} - bool UNCOMPRESSED : 1; -} _BloomFilterCompression__isset; - -class BloomFilterCompression : public virtual ::apache::thrift::TBase { - public: - BloomFilterCompression(const BloomFilterCompression&); - BloomFilterCompression& operator=(const BloomFilterCompression&); - BloomFilterCompression() {} - - virtual ~BloomFilterCompression() noexcept; - Uncompressed UNCOMPRESSED; - - _BloomFilterCompression__isset __isset; - - void __set_UNCOMPRESSED(const Uncompressed& val); - - bool operator==(const BloomFilterCompression& rhs) const { - if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) - return false; - else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) - return false; - return true; - } - bool operator!=(const BloomFilterCompression& rhs) const { - return !(*this == rhs); - } - - bool operator<(const BloomFilterCompression&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BloomFilterCompression& a, BloomFilterCompression& b); - -std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); - -/** - * Bloom filter header is stored at beginning of Bloom filter data of each - * column and followed by its bitset. - * - */ -class BloomFilterHeader : public virtual ::apache::thrift::TBase { - public: - BloomFilterHeader(const BloomFilterHeader&); - BloomFilterHeader& operator=(const BloomFilterHeader&); - BloomFilterHeader() : numBytes(0) {} - - virtual ~BloomFilterHeader() noexcept; - /** - * The size of bitset in bytes * - */ - int32_t numBytes; - /** - * The algorithm for setting bits. * - */ - BloomFilterAlgorithm algorithm; - /** - * The hash function used for Bloom filter. * - */ - BloomFilterHash hash; - /** - * The compression used in the Bloom filter * - */ - BloomFilterCompression compression; - - void __set_numBytes(const int32_t val); - - void __set_algorithm(const BloomFilterAlgorithm& val); - - void __set_hash(const BloomFilterHash& val); - - void __set_compression(const BloomFilterCompression& val); - - bool operator==(const BloomFilterHeader& rhs) const { - if (!(numBytes == rhs.numBytes)) - return false; - if (!(algorithm == rhs.algorithm)) - return false; - if (!(hash == rhs.hash)) - return false; - if (!(compression == rhs.compression)) - return false; - return true; - } - bool operator!=(const BloomFilterHeader& rhs) const { - return !(*this == rhs); - } - - bool operator<(const BloomFilterHeader&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BloomFilterHeader& a, BloomFilterHeader& b); - -std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj); - -typedef struct _PageHeader__isset { - _PageHeader__isset() - : crc(false), - data_page_header(false), - index_page_header(false), - dictionary_page_header(false), - data_page_header_v2(false) {} - bool crc : 1; - bool data_page_header : 1; - bool index_page_header : 1; - bool dictionary_page_header : 1; - bool data_page_header_v2 : 1; -} _PageHeader__isset; - -class PageHeader : public virtual ::apache::thrift::TBase { - public: - PageHeader(const PageHeader&); - PageHeader& operator=(const PageHeader&); - PageHeader() - : type((PageType::type)0), - uncompressed_page_size(0), - compressed_page_size(0), - crc(0) {} - - virtual ~PageHeader() noexcept; - /** - * the type of the page: indicates which of the *_header fields is set * - * - * @see PageType - */ - PageType::type type; - /** - * Uncompressed page size in bytes (not including this header) * - */ - int32_t uncompressed_page_size; - /** - * Compressed (and potentially encrypted) page size in bytes, not including - * this header * - */ - int32_t compressed_page_size; - /** - * The 32bit CRC for the page, to be be calculated as follows: - * - Using the standard CRC32 algorithm - * - On the data only, i.e. this header should not be included. 'Data' - * hereby refers to the concatenation of the repetition levels, the - * definition levels and the column value, in this exact order. - * - On the encoded versions of the repetition levels, definition levels and - * column values - * - On the compressed versions of the repetition levels, definition levels - * and column values where possible; - * - For v1 data pages, the repetition levels, definition levels and column - * values are always compressed together. If a compression scheme is - * specified, the CRC shall be calculated on the compressed version of - * this concatenation. If no compression scheme is specified, the CRC - * shall be calculated on the uncompressed version of this concatenation. - * - For v2 data pages, the repetition levels and definition levels are - * handled separately from the data and are never compressed (only - * encoded). If a compression scheme is specified, the CRC shall be - * calculated on the concatenation of the uncompressed repetition levels, - * uncompressed definition levels and the compressed column values. - * If no compression scheme is specified, the CRC shall be calculated on - * the uncompressed concatenation. - * - In encrypted columns, CRC is calculated after page encryption; the - * encryption itself is performed after page compression (if compressed) - * If enabled, this allows for disabling checksumming in HDFS if only a few - * pages need to be read. - * - */ - int32_t crc; - DataPageHeader data_page_header; - IndexPageHeader index_page_header; - DictionaryPageHeader dictionary_page_header; - DataPageHeaderV2 data_page_header_v2; - - _PageHeader__isset __isset; - - void __set_type(const PageType::type val); - - void __set_uncompressed_page_size(const int32_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_crc(const int32_t val); - - void __set_data_page_header(const DataPageHeader& val); - - void __set_index_page_header(const IndexPageHeader& val); - - void __set_dictionary_page_header(const DictionaryPageHeader& val); - - void __set_data_page_header_v2(const DataPageHeaderV2& val); - - bool operator==(const PageHeader& rhs) const { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if ( - __isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if ( - __isset.index_page_header && - !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if ( - __isset.dictionary_page_header && - !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if ( - __isset.data_page_header_v2 && - !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } - bool operator!=(const PageHeader& rhs) const { - return !(*this == rhs); - } - - bool operator<(const PageHeader&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageHeader& a, PageHeader& b); - -std::ostream& operator<<(std::ostream& out, const PageHeader& obj); - -typedef struct _KeyValue__isset { - _KeyValue__isset() : value(false) {} - bool value : 1; -} _KeyValue__isset; - -/** - * Wrapper struct to store key values - */ -class KeyValue : public virtual ::apache::thrift::TBase { - public: - KeyValue(const KeyValue&); - KeyValue& operator=(const KeyValue&); - KeyValue() : key(), value() {} - - virtual ~KeyValue() noexcept; - std::string key; - std::string value; - - _KeyValue__isset __isset; - - void __set_key(const std::string& val); - - void __set_value(const std::string& val); - - bool operator==(const KeyValue& rhs) const { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } - bool operator!=(const KeyValue& rhs) const { - return !(*this == rhs); - } - - bool operator<(const KeyValue&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(KeyValue& a, KeyValue& b); - -std::ostream& operator<<(std::ostream& out, const KeyValue& obj); - -/** - * Wrapper struct to specify sort order - */ -class SortingColumn : public virtual ::apache::thrift::TBase { - public: - SortingColumn(const SortingColumn&); - SortingColumn& operator=(const SortingColumn&); - SortingColumn() : column_idx(0), descending(0), nulls_first(0) {} - - virtual ~SortingColumn() noexcept; - /** - * The column index (in this row group) * - */ - int32_t column_idx; - /** - * If true, indicates this column is sorted in descending order. * - */ - bool descending; - /** - * If true, nulls will come before non-null values, otherwise, - * nulls go at the end. - */ - bool nulls_first; - - void __set_column_idx(const int32_t val); - - void __set_descending(const bool val); - - void __set_nulls_first(const bool val); - - bool operator==(const SortingColumn& rhs) const { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } - bool operator!=(const SortingColumn& rhs) const { - return !(*this == rhs); - } - - bool operator<(const SortingColumn&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SortingColumn& a, SortingColumn& b); - -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); - -/** - * statistics of a given page type and encoding - */ -class PageEncodingStats : public virtual ::apache::thrift::TBase { - public: - PageEncodingStats(const PageEncodingStats&); - PageEncodingStats& operator=(const PageEncodingStats&); - PageEncodingStats() - : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) {} - - virtual ~PageEncodingStats() noexcept; - /** - * the page type (data/dic/...) * - * - * @see PageType - */ - PageType::type page_type; - /** - * encoding of the page * - * - * @see Encoding - */ - Encoding::type encoding; - /** - * number of pages of this type with this encoding * - */ - int32_t count; - - void __set_page_type(const PageType::type val); - - void __set_encoding(const Encoding::type val); - - void __set_count(const int32_t val); - - bool operator==(const PageEncodingStats& rhs) const { - if (!(page_type == rhs.page_type)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(count == rhs.count)) - return false; - return true; - } - bool operator!=(const PageEncodingStats& rhs) const { - return !(*this == rhs); - } - - bool operator<(const PageEncodingStats&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageEncodingStats& a, PageEncodingStats& b); - -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); - -typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() - : key_value_metadata(false), - index_page_offset(false), - dictionary_page_offset(false), - statistics(false), - encoding_stats(false), - bloom_filter_offset(false) {} - bool key_value_metadata : 1; - bool index_page_offset : 1; - bool dictionary_page_offset : 1; - bool statistics : 1; - bool encoding_stats : 1; - bool bloom_filter_offset : 1; -} _ColumnMetaData__isset; - -/** - * Description for column metadata - */ -class ColumnMetaData : public virtual ::apache::thrift::TBase { - public: - ColumnMetaData(const ColumnMetaData&); - ColumnMetaData& operator=(const ColumnMetaData&); - ColumnMetaData() - : type((Type::type)0), - codec((CompressionCodec::type)0), - num_values(0), - total_uncompressed_size(0), - total_compressed_size(0), - data_page_offset(0), - index_page_offset(0), - dictionary_page_offset(0), - bloom_filter_offset(0) {} - - virtual ~ColumnMetaData() noexcept; - /** - * Type of this column * - * - * @see Type - */ - Type::type type; - /** - * Set of all encodings used for this column. The purpose is to validate - * whether we can decode those pages. * - */ - std::vector encodings; - /** - * Path in schema * - */ - std::vector path_in_schema; - /** - * Compression codec * - * - * @see CompressionCodec - */ - CompressionCodec::type codec; - /** - * Number of values in this column * - */ - int64_t num_values; - /** - * total byte size of all uncompressed pages in this column chunk (including - * the headers) * - */ - int64_t total_uncompressed_size; - /** - * total byte size of all compressed, and potentially encrypted, pages - * in this column chunk (including the headers) * - */ - int64_t total_compressed_size; - /** - * Optional key/value metadata * - */ - std::vector key_value_metadata; - /** - * Byte offset from beginning of file to first data page * - */ - int64_t data_page_offset; - /** - * Byte offset from beginning of file to root index page * - */ - int64_t index_page_offset; - /** - * Byte offset from the beginning of file to first (only) dictionary page * - */ - int64_t dictionary_page_offset; - /** - * optional statistics for this column chunk - */ - Statistics statistics; - /** - * Set of all encodings used for pages in this column chunk. - * This information can be used to determine if all data pages are - * dictionary encoded for example * - */ - std::vector encoding_stats; - /** - * Byte offset from beginning of file to Bloom filter data. * - */ - int64_t bloom_filter_offset; - - _ColumnMetaData__isset __isset; - - void __set_type(const Type::type val); - - void __set_encodings(const std::vector& val); - - void __set_path_in_schema(const std::vector& val); - - void __set_codec(const CompressionCodec::type val); - - void __set_num_values(const int64_t val); - - void __set_total_uncompressed_size(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_key_value_metadata(const std::vector& val); - - void __set_data_page_offset(const int64_t val); - - void __set_index_page_offset(const int64_t val); - - void __set_dictionary_page_offset(const int64_t val); - - void __set_statistics(const Statistics& val); - - void __set_encoding_stats(const std::vector& val); - - void __set_bloom_filter_offset(const int64_t val); - - bool operator==(const ColumnMetaData& rhs) const { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if ( - __isset.key_value_metadata && - !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if ( - __isset.index_page_offset && - !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if ( - __isset.dictionary_page_offset && - !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - if (__isset.encoding_stats != rhs.__isset.encoding_stats) - return false; - else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) - return false; - if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) - return false; - else if ( - __isset.bloom_filter_offset && - !(bloom_filter_offset == rhs.bloom_filter_offset)) - return false; - return true; - } - bool operator!=(const ColumnMetaData& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ColumnMetaData&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnMetaData& a, ColumnMetaData& b); - -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); - -class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { - public: - EncryptionWithFooterKey(const EncryptionWithFooterKey&); - EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&); - EncryptionWithFooterKey() {} - - virtual ~EncryptionWithFooterKey() noexcept; - - bool operator==(const EncryptionWithFooterKey& /* rhs */) const { - return true; - } - bool operator!=(const EncryptionWithFooterKey& rhs) const { - return !(*this == rhs); - } - - bool operator<(const EncryptionWithFooterKey&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithFooterKey& a, EncryptionWithFooterKey& b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj); - -typedef struct _EncryptionWithColumnKey__isset { - _EncryptionWithColumnKey__isset() : key_metadata(false) {} - bool key_metadata : 1; -} _EncryptionWithColumnKey__isset; - -class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { - public: - EncryptionWithColumnKey(const EncryptionWithColumnKey&); - EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); - EncryptionWithColumnKey() : key_metadata() {} - - virtual ~EncryptionWithColumnKey() noexcept; - /** - * Column path in schema * - */ - std::vector path_in_schema; - /** - * Retrieval metadata of column encryption key * - */ - std::string key_metadata; - - _EncryptionWithColumnKey__isset __isset; - - void __set_path_in_schema(const std::vector& val); - - void __set_key_metadata(const std::string& val); - - bool operator==(const EncryptionWithColumnKey& rhs) const { - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator!=(const EncryptionWithColumnKey& rhs) const { - return !(*this == rhs); - } - - bool operator<(const EncryptionWithColumnKey&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithColumnKey& a, EncryptionWithColumnKey& b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj); - -typedef struct _ColumnCryptoMetaData__isset { - _ColumnCryptoMetaData__isset() - : ENCRYPTION_WITH_FOOTER_KEY(false), ENCRYPTION_WITH_COLUMN_KEY(false) {} - bool ENCRYPTION_WITH_FOOTER_KEY : 1; - bool ENCRYPTION_WITH_COLUMN_KEY : 1; -} _ColumnCryptoMetaData__isset; - -class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { - public: - ColumnCryptoMetaData(const ColumnCryptoMetaData&); - ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); - ColumnCryptoMetaData() {} - - virtual ~ColumnCryptoMetaData() noexcept; - EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; - EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY; - - _ColumnCryptoMetaData__isset __isset; - - void __set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val); - - void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); - - bool operator==(const ColumnCryptoMetaData& rhs) const { - if (__isset.ENCRYPTION_WITH_FOOTER_KEY != - rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) - return false; - else if ( - __isset.ENCRYPTION_WITH_FOOTER_KEY && - !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) - return false; - if (__isset.ENCRYPTION_WITH_COLUMN_KEY != - rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) - return false; - else if ( - __isset.ENCRYPTION_WITH_COLUMN_KEY && - !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) - return false; - return true; - } - bool operator!=(const ColumnCryptoMetaData& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ColumnCryptoMetaData&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnCryptoMetaData& a, ColumnCryptoMetaData& b); - -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj); - -typedef struct _ColumnChunk__isset { - _ColumnChunk__isset() - : file_path(false), - meta_data(false), - offset_index_offset(false), - offset_index_length(false), - column_index_offset(false), - column_index_length(false), - crypto_metadata(false), - encrypted_column_metadata(false) {} - bool file_path : 1; - bool meta_data : 1; - bool offset_index_offset : 1; - bool offset_index_length : 1; - bool column_index_offset : 1; - bool column_index_length : 1; - bool crypto_metadata : 1; - bool encrypted_column_metadata : 1; -} _ColumnChunk__isset; - -class ColumnChunk : public virtual ::apache::thrift::TBase { - public: - ColumnChunk(const ColumnChunk&); - ColumnChunk& operator=(const ColumnChunk&); - ColumnChunk() - : file_path(), - file_offset(0), - offset_index_offset(0), - offset_index_length(0), - column_index_offset(0), - column_index_length(0), - encrypted_column_metadata() {} - - virtual ~ColumnChunk() noexcept; - /** - * File where column data is stored. If not set, assumed to be same file as - * metadata. This path is relative to the current file. - * - */ - std::string file_path; - /** - * Byte offset in file_path to the ColumnMetaData * - */ - int64_t file_offset; - /** - * Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. - * - */ - ColumnMetaData meta_data; - /** - * File offset of ColumnChunk's OffsetIndex * - */ - int64_t offset_index_offset; - /** - * Size of ColumnChunk's OffsetIndex, in bytes * - */ - int32_t offset_index_length; - /** - * File offset of ColumnChunk's ColumnIndex * - */ - int64_t column_index_offset; - /** - * Size of ColumnChunk's ColumnIndex, in bytes * - */ - int32_t column_index_length; - /** - * Crypto metadata of encrypted columns * - */ - ColumnCryptoMetaData crypto_metadata; - /** - * Encrypted column metadata for this chunk * - */ - std::string encrypted_column_metadata; - - _ColumnChunk__isset __isset; - - void __set_file_path(const std::string& val); - - void __set_file_offset(const int64_t val); - - void __set_meta_data(const ColumnMetaData& val); - - void __set_offset_index_offset(const int64_t val); - - void __set_offset_index_length(const int32_t val); - - void __set_column_index_offset(const int64_t val); - - void __set_column_index_length(const int32_t val); - - void __set_crypto_metadata(const ColumnCryptoMetaData& val); - - void __set_encrypted_column_metadata(const std::string& val); - - bool operator==(const ColumnChunk& rhs) const { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) - return false; - else if ( - __isset.offset_index_offset && - !(offset_index_offset == rhs.offset_index_offset)) - return false; - if (__isset.offset_index_length != rhs.__isset.offset_index_length) - return false; - else if ( - __isset.offset_index_length && - !(offset_index_length == rhs.offset_index_length)) - return false; - if (__isset.column_index_offset != rhs.__isset.column_index_offset) - return false; - else if ( - __isset.column_index_offset && - !(column_index_offset == rhs.column_index_offset)) - return false; - if (__isset.column_index_length != rhs.__isset.column_index_length) - return false; - else if ( - __isset.column_index_length && - !(column_index_length == rhs.column_index_length)) - return false; - if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) - return false; - else if ( - __isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) - return false; - if (__isset.encrypted_column_metadata != - rhs.__isset.encrypted_column_metadata) - return false; - else if ( - __isset.encrypted_column_metadata && - !(encrypted_column_metadata == rhs.encrypted_column_metadata)) - return false; - return true; - } - bool operator!=(const ColumnChunk& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ColumnChunk&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnChunk& a, ColumnChunk& b); - -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); - -typedef struct _RowGroup__isset { - _RowGroup__isset() - : sorting_columns(false), - file_offset(false), - total_compressed_size(false), - ordinal(false) {} - bool sorting_columns : 1; - bool file_offset : 1; - bool total_compressed_size : 1; - bool ordinal : 1; -} _RowGroup__isset; - -class RowGroup : public virtual ::apache::thrift::TBase { - public: - RowGroup(const RowGroup&); - RowGroup& operator=(const RowGroup&); - RowGroup() - : total_byte_size(0), - num_rows(0), - file_offset(0), - total_compressed_size(0), - ordinal(0) {} - - virtual ~RowGroup() noexcept; - /** - * Metadata for each column chunk in this row group. - * This list must have the same order as the SchemaElement list in - * FileMetaData. - * - */ - std::vector columns; - /** - * Total byte size of all the uncompressed column data in this row group * - */ - int64_t total_byte_size; - /** - * Number of rows in this row group * - */ - int64_t num_rows; - /** - * If set, specifies a sort ordering of the rows in this RowGroup. - * The sorting columns can be a subset of all the columns. - */ - std::vector sorting_columns; - /** - * Byte offset from beginning of file to first page (data or dictionary) - * in this row group * - */ - int64_t file_offset; - /** - * Total byte size of all compressed (and potentially encrypted) column data - * in this row group * - */ - int64_t total_compressed_size; - /** - * Row group ordinal in the file * - */ - int16_t ordinal; - - _RowGroup__isset __isset; - - void __set_columns(const std::vector& val); - - void __set_total_byte_size(const int64_t val); - - void __set_num_rows(const int64_t val); - - void __set_sorting_columns(const std::vector& val); - - void __set_file_offset(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_ordinal(const int16_t val); - - bool operator==(const RowGroup& rhs) const { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if ( - __isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - if (__isset.file_offset != rhs.__isset.file_offset) - return false; - else if (__isset.file_offset && !(file_offset == rhs.file_offset)) - return false; - if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) - return false; - else if ( - __isset.total_compressed_size && - !(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.ordinal != rhs.__isset.ordinal) - return false; - else if (__isset.ordinal && !(ordinal == rhs.ordinal)) - return false; - return true; - } - bool operator!=(const RowGroup& rhs) const { - return !(*this == rhs); - } - - bool operator<(const RowGroup&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(RowGroup& a, RowGroup& b); - -std::ostream& operator<<(std::ostream& out, const RowGroup& obj); - -/** - * Empty struct to signal the order defined by the physical or logical type - */ -class TypeDefinedOrder : public virtual ::apache::thrift::TBase { - public: - TypeDefinedOrder(const TypeDefinedOrder&); - TypeDefinedOrder& operator=(const TypeDefinedOrder&); - TypeDefinedOrder() {} - - virtual ~TypeDefinedOrder() noexcept; - - bool operator==(const TypeDefinedOrder& /* rhs */) const { - return true; - } - bool operator!=(const TypeDefinedOrder& rhs) const { - return !(*this == rhs); - } - - bool operator<(const TypeDefinedOrder&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TypeDefinedOrder& a, TypeDefinedOrder& b); - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj); - -typedef struct _ColumnOrder__isset { - _ColumnOrder__isset() : TYPE_ORDER(false) {} - bool TYPE_ORDER : 1; -} _ColumnOrder__isset; - -/** - * Union to specify the order used for the min_value and max_value fields for a - * column. This union takes the role of an enhanced enum that allows rich - * elements (which will be needed for a collation-based ordering in the future). - * - * Possible values are: - * * TypeDefinedOrder - the column uses the order defined by its logical or - * physical type (if there is no logical type). - * - * If the reader does not support the value of this union, min and max stats - * for this column should be ignored. - */ -class ColumnOrder : public virtual ::apache::thrift::TBase { - public: - ColumnOrder(const ColumnOrder&); - ColumnOrder& operator=(const ColumnOrder&); - ColumnOrder() {} - - virtual ~ColumnOrder() noexcept; - /** - * The sort orders for logical types are: - * UTF8 - unsigned byte-wise comparison - * INT8 - signed comparison - * INT16 - signed comparison - * INT32 - signed comparison - * INT64 - signed comparison - * UINT8 - unsigned comparison - * UINT16 - unsigned comparison - * UINT32 - unsigned comparison - * UINT64 - unsigned comparison - * DECIMAL - signed comparison of the represented value - * DATE - signed comparison - * TIME_MILLIS - signed comparison - * TIME_MICROS - signed comparison - * TIMESTAMP_MILLIS - signed comparison - * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison - * JSON - unsigned byte-wise comparison - * BSON - unsigned byte-wise comparison - * ENUM - unsigned byte-wise comparison - * LIST - undefined - * MAP - undefined - * - * In the absence of logical types, the sort order is determined by the - * physical type: BOOLEAN - false, true INT32 - signed comparison INT64 - - * signed comparison INT96 (only used for legacy timestamps) - undefined FLOAT - * - signed comparison of the represented value (*) DOUBLE - signed comparison - * of the represented value (*) BYTE_ARRAY - unsigned byte-wise comparison - * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison - * - * (*) Because the sorting order is not specified properly for floating - * point values (relations vs. total ordering) the following - * compatibility rules should be applied when reading statistics: - * - If the min is a NaN, it should be ignored. - * - If the max is a NaN, it should be ignored. - * - If the min is +0, the row group may contain -0 values as well. - * - If the max is -0, the row group may contain +0 values as well. - * - When looking for NaN values, min and max should be ignored. - */ - TypeDefinedOrder TYPE_ORDER; - - _ColumnOrder__isset __isset; - - void __set_TYPE_ORDER(const TypeDefinedOrder& val); - - bool operator==(const ColumnOrder& rhs) const { - if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) - return false; - else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) - return false; - return true; - } - bool operator!=(const ColumnOrder& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ColumnOrder&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnOrder& a, ColumnOrder& b); - -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); - -class PageLocation : public virtual ::apache::thrift::TBase { - public: - PageLocation(const PageLocation&); - PageLocation& operator=(const PageLocation&); - PageLocation() : offset(0), compressed_page_size(0), first_row_index(0) {} - - virtual ~PageLocation() noexcept; - /** - * Offset of the page in the file * - */ - int64_t offset; - /** - * Size of the page, including header. Sum of compressed_page_size and header - * length - */ - int32_t compressed_page_size; - /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). - */ - int64_t first_row_index; - - void __set_offset(const int64_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_first_row_index(const int64_t val); - - bool operator==(const PageLocation& rhs) const { - if (!(offset == rhs.offset)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (!(first_row_index == rhs.first_row_index)) - return false; - return true; - } - bool operator!=(const PageLocation& rhs) const { - return !(*this == rhs); - } - - bool operator<(const PageLocation&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageLocation& a, PageLocation& b); - -std::ostream& operator<<(std::ostream& out, const PageLocation& obj); - -class OffsetIndex : public virtual ::apache::thrift::TBase { - public: - OffsetIndex(const OffsetIndex&); - OffsetIndex& operator=(const OffsetIndex&); - OffsetIndex() {} - - virtual ~OffsetIndex() noexcept; - /** - * PageLocations, ordered by increasing PageLocation.offset. It is required - * that page_locations[i].first_row_index < - * page_locations[i+1].first_row_index. - */ - std::vector page_locations; - - void __set_page_locations(const std::vector& val); - - bool operator==(const OffsetIndex& rhs) const { - if (!(page_locations == rhs.page_locations)) - return false; - return true; - } - bool operator!=(const OffsetIndex& rhs) const { - return !(*this == rhs); - } - - bool operator<(const OffsetIndex&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(OffsetIndex& a, OffsetIndex& b); - -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); - -typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false) {} - bool null_counts : 1; -} _ColumnIndex__isset; - -/** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] - */ -class ColumnIndex : public virtual ::apache::thrift::TBase { - public: - ColumnIndex(const ColumnIndex&); - ColumnIndex& operator=(const ColumnIndex&); - ColumnIndex() : boundary_order((BoundaryOrder::type)0) {} - - virtual ~ColumnIndex() noexcept; - /** - * A list of Boolean values to determine the validity of the corresponding - * min and max values. If true, a page contains only null values, and writers - * have to set the corresponding entries in min_values and max_values to - * byte[0], so that all lists have the same length. If false, the - * corresponding entries in min_values and max_values must be valid. - */ - std::vector null_pages; - /** - * Two lists containing lower and upper bounds for the values of each page - * determined by the ColumnOrder of the column. These may be the actual - * minimum and maximum values found on a page, but can also be (more compact) - * values that do not exist on a page. For example, instead of storing ""Blart - * Versenwald III", a writer may set min_values[i]="B", max_values[i]="C". - * Such more compact values must still be valid values within the column's - * logical type. Readers must make sure that list entries are populated before - * using them by inspecting null_pages. - */ - std::vector min_values; - std::vector max_values; - /** - * Stores whether both min_values and max_values are orderd and if so, in - * which direction. This allows readers to perform binary searches in both - * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even - * if the lists are ordered. - * - * @see BoundaryOrder - */ - BoundaryOrder::type boundary_order; - /** - * A list containing the number of null values for each page * - */ - std::vector null_counts; - - _ColumnIndex__isset __isset; - - void __set_null_pages(const std::vector& val); - - void __set_min_values(const std::vector& val); - - void __set_max_values(const std::vector& val); - - void __set_boundary_order(const BoundaryOrder::type val); - - void __set_null_counts(const std::vector& val); - - bool operator==(const ColumnIndex& rhs) const { - if (!(null_pages == rhs.null_pages)) - return false; - if (!(min_values == rhs.min_values)) - return false; - if (!(max_values == rhs.max_values)) - return false; - if (!(boundary_order == rhs.boundary_order)) - return false; - if (__isset.null_counts != rhs.__isset.null_counts) - return false; - else if (__isset.null_counts && !(null_counts == rhs.null_counts)) - return false; - return true; - } - bool operator!=(const ColumnIndex& rhs) const { - return !(*this == rhs); - } - - bool operator<(const ColumnIndex&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnIndex& a, ColumnIndex& b); - -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj); - -typedef struct _AesGcmV1__isset { - _AesGcmV1__isset() - : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix : 1; - bool aad_file_unique : 1; - bool supply_aad_prefix : 1; -} _AesGcmV1__isset; - -class AesGcmV1 : public virtual ::apache::thrift::TBase { - public: - AesGcmV1(const AesGcmV1&); - AesGcmV1& operator=(const AesGcmV1&); - AesGcmV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) {} - - virtual ~AesGcmV1() noexcept; - /** - * AAD prefix * - */ - std::string aad_prefix; - /** - * Unique file identifier part of AAD suffix * - */ - std::string aad_file_unique; - /** - * In files encrypted with AAD prefix without storing it, - * readers must supply the prefix * - */ - bool supply_aad_prefix; - - _AesGcmV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator==(const AesGcmV1& rhs) const { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if ( - __isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if ( - __isset.supply_aad_prefix && - !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator!=(const AesGcmV1& rhs) const { - return !(*this == rhs); - } - - bool operator<(const AesGcmV1&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmV1& a, AesGcmV1& b); - -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj); - -typedef struct _AesGcmCtrV1__isset { - _AesGcmCtrV1__isset() - : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix : 1; - bool aad_file_unique : 1; - bool supply_aad_prefix : 1; -} _AesGcmCtrV1__isset; - -class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { - public: - AesGcmCtrV1(const AesGcmCtrV1&); - AesGcmCtrV1& operator=(const AesGcmCtrV1&); - AesGcmCtrV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) {} - - virtual ~AesGcmCtrV1() noexcept; - /** - * AAD prefix * - */ - std::string aad_prefix; - /** - * Unique file identifier part of AAD suffix * - */ - std::string aad_file_unique; - /** - * In files encrypted with AAD prefix without storing it, - * readers must supply the prefix * - */ - bool supply_aad_prefix; - - _AesGcmCtrV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator==(const AesGcmCtrV1& rhs) const { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if ( - __isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if ( - __isset.supply_aad_prefix && - !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator!=(const AesGcmCtrV1& rhs) const { - return !(*this == rhs); - } - - bool operator<(const AesGcmCtrV1&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmCtrV1& a, AesGcmCtrV1& b); - -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj); - -typedef struct _EncryptionAlgorithm__isset { - _EncryptionAlgorithm__isset() : AES_GCM_V1(false), AES_GCM_CTR_V1(false) {} - bool AES_GCM_V1 : 1; - bool AES_GCM_CTR_V1 : 1; -} _EncryptionAlgorithm__isset; - -class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { - public: - EncryptionAlgorithm(const EncryptionAlgorithm&); - EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); - EncryptionAlgorithm() {} - - virtual ~EncryptionAlgorithm() noexcept; - AesGcmV1 AES_GCM_V1; - AesGcmCtrV1 AES_GCM_CTR_V1; - - _EncryptionAlgorithm__isset __isset; - - void __set_AES_GCM_V1(const AesGcmV1& val); - - void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); - - bool operator==(const EncryptionAlgorithm& rhs) const { - if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) - return false; - else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) - return false; - if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) - return false; - else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) - return false; - return true; - } - bool operator!=(const EncryptionAlgorithm& rhs) const { - return !(*this == rhs); - } - - bool operator<(const EncryptionAlgorithm&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionAlgorithm& a, EncryptionAlgorithm& b); - -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj); - -typedef struct _FileMetaData__isset { - _FileMetaData__isset() - : key_value_metadata(false), - created_by(false), - column_orders(false), - encryption_algorithm(false), - footer_signing_key_metadata(false) {} - bool key_value_metadata : 1; - bool created_by : 1; - bool column_orders : 1; - bool encryption_algorithm : 1; - bool footer_signing_key_metadata : 1; -} _FileMetaData__isset; - -/** - * Description for file metadata - */ -class FileMetaData : public virtual ::apache::thrift::TBase { - public: - FileMetaData(const FileMetaData&); - FileMetaData& operator=(const FileMetaData&); - FileMetaData() - : version(0), num_rows(0), created_by(), footer_signing_key_metadata() {} - - virtual ~FileMetaData() noexcept; - /** - * Version of this file * - */ - int32_t version; - /** - * Parquet schema for this file. This schema contains metadata for all the - * columns. The schema is represented as a tree with a single root. The nodes - * of the tree are flattened to a list by doing a depth-first traversal. The - * column metadata contains the path in the schema for that column which can - * be used to map columns to nodes in the schema. The first element is the - * root * - */ - std::vector schema; - /** - * Number of rows in this file * - */ - int64_t num_rows; - /** - * Row groups in this file * - */ - std::vector row_groups; - /** - * Optional key/value metadata * - */ - std::vector key_value_metadata; - /** - * String for application that wrote this file. This should be in the format - * version (build ). - * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) - * - */ - std::string created_by; - /** - * Sort order used for the min_value and max_value fields in the Statistics - * objects and the min_values and max_values fields in the ColumnIndex - * objects of each column in this file. Sort orders are listed in the order - * matching the columns in the schema. The indexes are not necessary the same - * though, because only leaf nodes of the schema are represented in the list - * of sort orders. - * - * Without column_orders, the meaning of the min_value and max_value fields - * in the Statistics object and the ColumnIndex object is undefined. To ensure - * well-defined behaviour, if these fields are written to a Parquet file, - * column_orders must be written as well. - * - * The obsolete min and max fields in the Statistics object are always sorted - * by signed comparison regardless of column_orders. - */ - std::vector column_orders; - /** - * Encryption algorithm. This field is set only in encrypted files - * with plaintext footer. Files with encrypted footer store algorithm id - * in FileCryptoMetaData structure. - */ - EncryptionAlgorithm encryption_algorithm; - /** - * Retrieval metadata of key used for signing the footer. - * Used only in encrypted files with plaintext footer. - */ - std::string footer_signing_key_metadata; - - _FileMetaData__isset __isset; - - void __set_version(const int32_t val); - - void __set_schema(const std::vector& val); - - void __set_num_rows(const int64_t val); - - void __set_row_groups(const std::vector& val); - - void __set_key_value_metadata(const std::vector& val); - - void __set_created_by(const std::string& val); - - void __set_column_orders(const std::vector& val); - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_footer_signing_key_metadata(const std::string& val); - - bool operator==(const FileMetaData& rhs) const { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if ( - __isset.key_value_metadata && - !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - if (__isset.column_orders != rhs.__isset.column_orders) - return false; - else if (__isset.column_orders && !(column_orders == rhs.column_orders)) - return false; - if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) - return false; - else if ( - __isset.encryption_algorithm && - !(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.footer_signing_key_metadata != - rhs.__isset.footer_signing_key_metadata) - return false; - else if ( - __isset.footer_signing_key_metadata && - !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) - return false; - return true; - } - bool operator!=(const FileMetaData& rhs) const { - return !(*this == rhs); - } - - bool operator<(const FileMetaData&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileMetaData& a, FileMetaData& b); - -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); - -typedef struct _FileCryptoMetaData__isset { - _FileCryptoMetaData__isset() : key_metadata(false) {} - bool key_metadata : 1; -} _FileCryptoMetaData__isset; - -/** - * Crypto metadata for files with encrypted footer * - */ -class FileCryptoMetaData : public virtual ::apache::thrift::TBase { - public: - FileCryptoMetaData(const FileCryptoMetaData&); - FileCryptoMetaData& operator=(const FileCryptoMetaData&); - FileCryptoMetaData() : key_metadata() {} - - virtual ~FileCryptoMetaData() noexcept; - /** - * Encryption algorithm. This field is only used for files - * with encrypted footer. Files with plaintext footer store algorithm id - * inside footer (FileMetaData structure). - */ - EncryptionAlgorithm encryption_algorithm; - /** - * Retrieval metadata of key used for encryption of footer, - * and (possibly) columns * - */ - std::string key_metadata; - - _FileCryptoMetaData__isset __isset; - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_key_metadata(const std::string& val); - - bool operator==(const FileCryptoMetaData& rhs) const { - if (!(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator!=(const FileCryptoMetaData& rhs) const { - return !(*this == rhs); - } - - bool operator<(const FileCryptoMetaData&) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileCryptoMetaData& a, FileCryptoMetaData& b); - -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); - -} // namespace facebook::velox::parquet::thrift - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::Type::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::CompressionCodec::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::ConvertedType::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter< - facebook::velox::parquet::thrift::FieldRepetitionType::type> - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::FieldRepetitionType::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::Encoding::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::PageType::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; - -template <> -struct fmt::formatter - : fmt::formatter { - auto format( - const facebook::velox::parquet::thrift::BoundaryOrder::type& s, - format_context& ctx) const { - return formatter::format( - facebook::velox::parquet::thrift::to_string(s), ctx); - } -}; diff --git a/velox/dwio/parquet/thrift/ThriftTransport.h b/velox/dwio/parquet/thrift/ThriftTransport.h deleted file mode 100644 index cee094ce443..00000000000 --- a/velox/dwio/parquet/thrift/ThriftTransport.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include "velox/dwio/common/BufferedInput.h" - -namespace facebook::velox::parquet::thrift { - -class ThriftTransport - : public apache::thrift::transport::TVirtualTransport { - public: - virtual uint32_t read(uint8_t* outputBuf, uint32_t len) = 0; - virtual ~ThriftTransport() = default; -}; - -class ThriftStreamingTransport : public ThriftTransport { - public: - ThriftStreamingTransport( - dwio::common::SeekableInputStream* inputStream, - const char*& bufferStart, - const char*& bufferEnd) - : inputStream_(inputStream), - bufferStart_(bufferStart), - bufferEnd_(bufferEnd) { - VELOX_CHECK_NOT_NULL(inputStream_); - VELOX_CHECK_NOT_NULL(bufferStart_); - VELOX_CHECK_NOT_NULL(bufferEnd_); - } - - uint32_t read(uint8_t* outputBuf, uint32_t len) { - uint32_t bytesToRead = len; - while (bytesToRead > 0) { - if (bufferEnd_ == bufferStart_) { - int32_t size; - if (!inputStream_->Next( - reinterpret_cast(&bufferStart_), &size)) { - VELOX_FAIL("Reading past the end of the stream"); - } - bufferEnd_ = bufferStart_ + size; - } - - uint32_t bytesToReadInBuffer = - std::min(bufferEnd_ - bufferStart_, bytesToRead); - memcpy(outputBuf, bufferStart_, bytesToReadInBuffer); - bufferStart_ += bytesToReadInBuffer; - bytesToRead -= bytesToReadInBuffer; - outputBuf += bytesToReadInBuffer; - } - - return len; - } - - private: - dwio::common::SeekableInputStream* inputStream_; - const char*& bufferStart_; - const char*& bufferEnd_; -}; - -class ThriftBufferedTransport : public ThriftTransport { - public: - ThriftBufferedTransport(const void* inputBuf, uint64_t len) - : ThriftTransport(), - inputBuf_(reinterpret_cast(inputBuf)), - size_(len), - offset_(0) {} - - uint32_t read(uint8_t* outputBuf, uint32_t len) { - DWIO_ENSURE(offset_ + len <= size_); - memcpy(outputBuf, inputBuf_ + offset_, len); - offset_ += len; - return len; - } - - private: - const uint8_t* inputBuf_; - const uint64_t size_; - uint64_t offset_; -}; - -} // namespace facebook::velox::parquet::thrift diff --git a/velox/dwio/parquet/thrift/parquet.thrift b/velox/dwio/parquet/thrift/parquet.thrift index c7c962a0c50..2d6d5aabb7f 100644 --- a/velox/dwio/parquet/thrift/parquet.thrift +++ b/velox/dwio/parquet/thrift/parquet.thrift @@ -37,7 +37,9 @@ * File format description for the parquet file format */ -namespace cpp parquet.format +include "thrift/annotation/thrift.thrift" + +namespace cpp2 facebook.velox.parquet.thrift namespace java org.apache.parquet.format /** @@ -579,8 +581,9 @@ struct DataPageHeaderV2 { Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. - If missing it is considered compressed */ - 7: optional bool is_compressed = 1; + If missing it is considered compressed. + Note: Code must use is_compressed().value_or(true) to handle the default. */ + 7: optional bool is_compressed; /** optional statistics for the data in this page **/ 8: optional Statistics statistics; diff --git a/velox/dwio/parquet/writer/arrow/ArrowSchema.cpp b/velox/dwio/parquet/writer/arrow/ArrowSchema.cpp index a514c33a5da..4dfda06d24f 100644 --- a/velox/dwio/parquet/writer/arrow/ArrowSchema.cpp +++ b/velox/dwio/parquet/writer/arrow/ArrowSchema.cpp @@ -457,7 +457,7 @@ Status fieldToNode( type = precision <= 9 ? ParquetType::kInt32 : ParquetType::kInt64; } else { type = ParquetType::kFixedLenByteArray; - length = DecimalType::DecimalSize(precision); + length = ::arrow::DecimalType::DecimalSize(precision); } PARQUET_CATCH_NOT_OK( logicalType = LogicalType::decimal(precision, scale)); diff --git a/velox/dwio/parquet/writer/arrow/CMakeLists.txt b/velox/dwio/parquet/writer/arrow/CMakeLists.txt index d1685bc9d45..768d658b3a3 100644 --- a/velox/dwio/parquet/writer/arrow/CMakeLists.txt +++ b/velox/dwio/parquet/writer/arrow/CMakeLists.txt @@ -68,7 +68,6 @@ velox_link_libraries( velox_dwio_arrow_parquet_writer_lib velox_dwio_arrow_parquet_writer_util_lib velox_dwio_parquet_common - velox_dwio_parquet_thrift velox_dwio_common velox_arrow_bridge arrow diff --git a/velox/dwio/parquet/writer/arrow/ColumnWriter.cpp b/velox/dwio/parquet/writer/arrow/ColumnWriter.cpp index 1ba58979697..ff71fbeb548 100644 --- a/velox/dwio/parquet/writer/arrow/ColumnWriter.cpp +++ b/velox/dwio/parquet/writer/arrow/ColumnWriter.cpp @@ -328,10 +328,10 @@ class SerializedPageWriter : public PageWriter { compressedData = page.buffer(); } - facebook::velox::parquet::thrift::DictionaryPageHeader dictPageHeader; - dictPageHeader.__set_num_values(page.numValues()); - dictPageHeader.__set_encoding(toThrift(page.encoding())); - dictPageHeader.__set_is_sorted(page.isSorted()); + facebook::velox::parquet::thrift::DictionaryPageHeader dict_page_header; + dict_page_header.num_values() = page.numValues(); + dict_page_header.encoding() = toThrift(page.encoding()); + dict_page_header.is_sorted() = page.isSorted(); const uint8_t* outputDataBuffer = compressedData->data(); int32_t outputDataLen = static_cast(compressedData->size()); @@ -347,17 +347,17 @@ class SerializedPageWriter : public PageWriter { outputDataBuffer = encryptionBuffer_->data(); } - facebook::velox::parquet::thrift::PageHeader pageHeader; - pageHeader.__set_type( - facebook::velox::parquet::thrift::PageType::DICTIONARY_PAGE); - pageHeader.__set_uncompressed_page_size( - static_cast(uncompressedSize)); - pageHeader.__set_compressed_page_size(static_cast(outputDataLen)); - pageHeader.__set_dictionary_page_header(dictPageHeader); + facebook::velox::parquet::thrift::PageHeader page_header; + page_header.type() = + facebook::velox::parquet::thrift::PageType::DICTIONARY_PAGE; + page_header.uncompressed_page_size() = + static_cast(uncompressedSize); + page_header.compressed_page_size() = static_cast(outputDataLen); + page_header.dictionary_page_header() = dict_page_header; if (pageChecksumVerification_) { uint32_t crc32 = internal::crc32(/* prev */ 0, outputDataBuffer, outputDataLen); - pageHeader.__set_crc(static_cast(crc32)); + page_header.crc() = static_cast(crc32); } PARQUET_ASSIGN_OR_THROW(int64_t startPos, sink_->Tell()); @@ -369,7 +369,7 @@ class SerializedPageWriter : public PageWriter { updateEncryption(encryption::kDictionaryPageHeader); } const int64_t headerSize = - thriftSerializer_->serialize(&pageHeader, sink_.get(), metaEncryptor_); + thriftSerializer_->serialize(&page_header, sink_.get(), metaEncryptor_); PARQUET_THROW_NOT_OK(sink_->Write(outputDataBuffer, outputDataLen)); @@ -447,14 +447,14 @@ class SerializedPageWriter : public PageWriter { } facebook::velox::parquet::thrift::PageHeader pageHeader; - pageHeader.__set_uncompressed_page_size( - static_cast(uncompressedSize)); - pageHeader.__set_compressed_page_size(static_cast(outputDataLen)); + pageHeader.uncompressed_page_size() = + static_cast(uncompressedSize); + pageHeader.compressed_page_size() = static_cast(outputDataLen); if (pageChecksumVerification_) { uint32_t crc32 = internal::crc32(/* prev */ 0, outputDataBuffer, outputDataLen); - pageHeader.__set_crc(static_cast(crc32)); + pageHeader.crc() = static_cast(crc32); } if (page.type() == PageType::kDataPage) { @@ -512,47 +512,46 @@ class SerializedPageWriter : public PageWriter { facebook::velox::parquet::thrift::PageHeader& pageHeader, const DataPageV1& page) { facebook::velox::parquet::thrift::DataPageHeader dataPageHeader; - dataPageHeader.__set_num_values(page.numValues()); - dataPageHeader.__set_encoding(toThrift(page.encoding())); - dataPageHeader.__set_definition_level_encoding( - toThrift(page.definitionLevelEncoding())); - dataPageHeader.__set_repetition_level_encoding( - toThrift(page.repetitionLevelEncoding())); + dataPageHeader.num_values() = page.numValues(); + dataPageHeader.encoding() = toThrift(page.encoding()); + dataPageHeader.definition_level_encoding() = + toThrift(page.definitionLevelEncoding()); + dataPageHeader.repetition_level_encoding() = + toThrift(page.repetitionLevelEncoding()); // Write page statistics only when page index is not enabled. if (columnIndexBuilder_ == nullptr) { - dataPageHeader.__set_statistics(toThrift(page.statistics())); + dataPageHeader.statistics() = toThrift(page.statistics()); } - pageHeader.__set_type( - facebook::velox::parquet::thrift::PageType::DATA_PAGE); - pageHeader.__set_data_page_header(dataPageHeader); + pageHeader.type() = facebook::velox::parquet::thrift::PageType::DATA_PAGE; + pageHeader.data_page_header() = dataPageHeader; } void setDataPageV2Header( facebook::velox::parquet::thrift::PageHeader& pageHeader, const DataPageV2& page) { facebook::velox::parquet::thrift::DataPageHeaderV2 dataPageHeader; - dataPageHeader.__set_num_values(page.numValues()); - dataPageHeader.__set_num_nulls(page.numNulls()); - dataPageHeader.__set_num_rows(page.numRows()); - dataPageHeader.__set_encoding(toThrift(page.encoding())); + dataPageHeader.num_values() = page.numValues(); + dataPageHeader.num_nulls() = page.numNulls(); + dataPageHeader.num_rows() = page.numRows(); + dataPageHeader.encoding() = toThrift(page.encoding()); - dataPageHeader.__set_definition_levels_byte_length( - page.definitionLevelsByteLength()); - dataPageHeader.__set_repetition_levels_byte_length( - page.repetitionLevelsByteLength()); + dataPageHeader.definition_levels_byte_length() = + page.definitionLevelsByteLength(); + dataPageHeader.repetition_levels_byte_length() = + page.repetitionLevelsByteLength(); - dataPageHeader.__set_is_compressed(page.isCompressed()); + dataPageHeader.is_compressed() = page.isCompressed(); // Write page statistics only when page index is not enabled. if (columnIndexBuilder_ == nullptr) { - dataPageHeader.__set_statistics(toThrift(page.statistics())); + dataPageHeader.statistics() = toThrift(page.statistics()); } - pageHeader.__set_type( - facebook::velox::parquet::thrift::PageType::DATA_PAGE_V2); - pageHeader.__set_data_page_header_v2(dataPageHeader); + pageHeader.type() = + facebook::velox::parquet::thrift::PageType::DATA_PAGE_V2; + pageHeader.data_page_header_v2() = dataPageHeader; } /// \brief Finish page index builders and update the stream offset to adjust diff --git a/velox/dwio/parquet/writer/arrow/Metadata.cpp b/velox/dwio/parquet/writer/arrow/Metadata.cpp index b62d9ecea85..c0bd3c07fa6 100644 --- a/velox/dwio/parquet/writer/arrow/Metadata.cpp +++ b/velox/dwio/parquet/writer/arrow/Metadata.cpp @@ -18,6 +18,8 @@ #include "velox/dwio/parquet/writer/arrow/Metadata.h" +#include + #include #include #include @@ -93,40 +95,71 @@ static std::shared_ptr makeTypedColumnStats( const facebook::velox::parquet::thrift::ColumnMetaData& metadata, const ColumnDescriptor* descr) { // If ColumnOrder is defined, return max_value and min_value. - const auto& stats = metadata.statistics; if (descr->columnOrder().order() == ColumnOrder::kTypeDefinedOrder) { return makeStatistics( descr, - stats.min_value, - stats.max_value, - metadata.num_values - stats.null_count, - stats.null_count, - stats.distinct_count, - stats.__isset.max_value || stats.__isset.min_value, - stats.__isset.null_count, - stats.__isset.distinct_count, + apache::thrift::can_throw(metadata.statistics()) + ->min_value() + .value_or(""), + apache::thrift::can_throw(metadata.statistics()) + ->max_value() + .value_or(""), + apache::thrift::can_throw(*metadata.num_values()) - + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics()) + ->distinct_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics()) + ->max_value() + .has_value() || + apache::thrift::can_throw(metadata.statistics()) + ->min_value() + .has_value(), + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .has_value(), + apache::thrift::can_throw(metadata.statistics()) + ->distinct_count() + .has_value(), false, 0); } - // Default behavior. + // Default behavior return makeStatistics( descr, - stats.min, - stats.max, - metadata.num_values - stats.null_count, - stats.null_count, - stats.distinct_count, - stats.__isset.max || stats.__isset.min, - stats.__isset.null_count, - stats.__isset.distinct_count, + apache::thrift::can_throw(metadata.statistics())->min().value_or(""), + apache::thrift::can_throw(metadata.statistics())->max().value_or(""), + apache::thrift::can_throw(*metadata.num_values()) - + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics()) + ->distinct_count() + .value_or(0), + apache::thrift::can_throw(metadata.statistics())->max().has_value() || + apache::thrift::can_throw(metadata.statistics())->min().has_value(), + apache::thrift::can_throw(metadata.statistics()) + ->null_count() + .has_value(), + apache::thrift::can_throw(metadata.statistics()) + ->distinct_count() + .has_value(), false, 0); } -std::shared_ptr makeColumnStats( +std::shared_ptr MakeColumnStats( const facebook::velox::parquet::thrift::ColumnMetaData& meta_data, const ColumnDescriptor* descr) { - switch (static_cast(meta_data.type)) { + switch (static_cast(*meta_data.type())) { case Type::kBoolean: return makeTypedColumnStats(meta_data, descr); case Type::kInt32: @@ -161,17 +194,24 @@ class ColumnCryptoMetaData::ColumnCryptoMetaDataImpl { : cryptoMetadata_(cryptoMetadata) {} bool encryptedWithFooterKey() const { - return cryptoMetadata_->__isset.ENCRYPTION_WITH_FOOTER_KEY; + return cryptoMetadata_->getType() == + facebook::velox::parquet::thrift::ColumnCryptoMetaData::Type:: + ENCRYPTION_WITH_FOOTER_KEY; } bool encryptedWithColumnKey() const { - return cryptoMetadata_->__isset.ENCRYPTION_WITH_COLUMN_KEY; + return cryptoMetadata_->getType() == + facebook::velox::parquet::thrift::ColumnCryptoMetaData::Type:: + ENCRYPTION_WITH_COLUMN_KEY; } std::shared_ptr pathInSchema() const { return std::make_shared( - cryptoMetadata_->ENCRYPTION_WITH_COLUMN_KEY.path_in_schema); + *cryptoMetadata_->get_ENCRYPTION_WITH_COLUMN_KEY().path_in_schema()); } const std::string& keyMetadata() const { - return cryptoMetadata_->ENCRYPTION_WITH_COLUMN_KEY.key_metadata; + return apache::thrift::can_throw( + cryptoMetadata_->get_ENCRYPTION_WITH_COLUMN_KEY() + .key_metadata() + .value()); } private: @@ -217,20 +257,22 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { descr_(descr), properties_(properties), writerVersion_(writerVersion) { - columnMetadata_ = &column->meta_data; - if (column->__isset.crypto_metadata) { // column metadata is encrypted + columnMetadata_ = &apache::thrift::can_throw(*column->meta_data()); + if (column->crypto_metadata()) { // column metadata is encrypted facebook::velox::parquet::thrift::ColumnCryptoMetaData ccmd = - column->crypto_metadata; + *column->crypto_metadata(); - if (ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY) { + if (ccmd.getType() == + facebook::velox::parquet::thrift::ColumnCryptoMetaData::Type:: + ENCRYPTION_WITH_COLUMN_KEY) { if (fileDecryptor != nullptr && fileDecryptor->properties() != nullptr) { // Should decrypt metadata. std::shared_ptr path = std::make_shared( - ccmd.ENCRYPTION_WITH_COLUMN_KEY.path_in_schema); - std::string key_metadata = - ccmd.ENCRYPTION_WITH_COLUMN_KEY.key_metadata; + *ccmd.get_ENCRYPTION_WITH_COLUMN_KEY().path_in_schema()); + std::string key_metadata = apache::thrift::can_throw( + ccmd.get_ENCRYPTION_WITH_COLUMN_KEY().key_metadata().value()); std::string aadColumnMetadata = encryption::createModuleAad( fileDecryptor->fileAad(), @@ -240,12 +282,15 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { static_cast(-1)); auto Decryptor = fileDecryptor->getColumnMetaDecryptor( path->toDotString(), key_metadata, aadColumnMetadata); - auto len = - static_cast(column->encrypted_column_metadata.size()); + auto len = static_cast( + apache::thrift::can_throw(*column->encrypted_column_metadata()) + .size()); ThriftDeserializer deserializer(properties_); deserializer.deserializeMessage( reinterpret_cast( - column->encrypted_column_metadata.c_str()), + apache::thrift::can_throw( + *column->encrypted_column_metadata()) + .c_str()), &len, &decryptedMetadata_, Decryptor); @@ -257,14 +302,17 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } } } - for (const auto& encoding : columnMetadata_->encodings) { - encodings_.push_back(loadenumSafe(&encoding)); + for (const auto& encoding : *columnMetadata_->encodings()) { + encodings_.push_back(loadEnumSafe(&encoding)); } - for (const auto& encodingStats : columnMetadata_->encoding_stats) { - encodingStats_.push_back( - {loadenumSafe(&encodingStats.page_type), - loadenumSafe(&encodingStats.encoding), - encodingStats.count}); + if (columnMetadata_->encoding_stats()) { + for (const auto& encodingStatsEntry : + *columnMetadata_->encoding_stats()) { + encodingStats_.push_back( + {loadEnumSafe(&*encodingStatsEntry.page_type()), + loadEnumSafe(&*encodingStatsEntry.encoding()), + *encodingStatsEntry.count()}); + } } possibleStats_ = nullptr; } @@ -275,38 +323,43 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { // Column chunk. inline int64_t fileOffset() const { - return column_->file_offset; + return *column_->file_offset(); + } + + inline bool has_file_path() const { + return column_->file_path().has_value(); } + inline const std::string& filePath() const { - return column_->file_path; + return apache::thrift::can_throw(*column_->file_path()); } inline Type::type type() const { - return loadenumSafe(&columnMetadata_->type); + return loadEnumSafe(&*columnMetadata_->type()); } inline int64_t numValues() const { - return columnMetadata_->num_values; + return *columnMetadata_->num_values(); } std::shared_ptr pathInSchema() { return std::make_shared( - columnMetadata_->path_in_schema); + *columnMetadata_->path_in_schema()); } - // Check if statistics are set and are valid. - // 1) Must be set in the metadata. - // 2) Statistics must not be corrupted. + // Check if statistics are set and are valid + // 1) Must be set in the metadata + // 2) Statistics must not be corrupted inline bool isStatsSet() const { VELOX_DCHECK_NOT_NULL(writerVersion_); - // If the column statistics don't exist or column sort order is unknown, - // we cannot use the column stats. - if (!columnMetadata_->__isset.statistics || + // If the column statistics don't exist or column sort order is unknown + // we cannot use the column stats + if (!columnMetadata_->statistics() || descr_->sortOrder() == SortOrder::kUnknown) { return false; } if (possibleStats_ == nullptr) { - possibleStats_ = makeColumnStats(*columnMetadata_, descr_); + possibleStats_ = MakeColumnStats(*columnMetadata_, descr_); } EncodedStatistics encodedStats = possibleStats_->encode(); return writerVersion_->hasCorrectStatistics( @@ -319,7 +372,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } inline Compression::type compression() const { - return loadenumSafe(&columnMetadata_->codec); + return loadEnumSafe(&*columnMetadata_->codec()); } const std::vector& encodings() const { @@ -331,63 +384,58 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } inline std::optional bloomFilterOffset() const { - if (columnMetadata_->__isset.bloom_filter_offset) { - return columnMetadata_->bloom_filter_offset; - } - return std::nullopt; + return columnMetadata_->bloom_filter_offset().to_optional(); } inline bool hasDictionaryPage() const { - return columnMetadata_->__isset.dictionary_page_offset; + return columnMetadata_->dictionary_page_offset().has_value(); } inline int64_t dictionaryPageOffset() const { - return columnMetadata_->dictionary_page_offset; + return columnMetadata_->dictionary_page_offset().value_or(0); } inline int64_t dataPageOffset() const { - return columnMetadata_->data_page_offset; + return *columnMetadata_->data_page_offset(); } inline bool hasIndexPage() const { - return columnMetadata_->__isset.index_page_offset; + return columnMetadata_->index_page_offset().has_value(); } inline int64_t indexPageOffset() const { - return columnMetadata_->index_page_offset; + return apache::thrift::can_throw(*columnMetadata_->index_page_offset()); } inline int64_t totalCompressedSize() const { - return columnMetadata_->total_compressed_size; + return *columnMetadata_->total_compressed_size(); } inline int64_t totalUncompressedSize() const { - return columnMetadata_->total_uncompressed_size; + return *columnMetadata_->total_uncompressed_size(); } inline std::unique_ptr cryptoMetadata() const { - if (column_->__isset.crypto_metadata) { + if (column_->crypto_metadata()) { return ColumnCryptoMetaData::make( - reinterpret_cast(&column_->crypto_metadata)); + reinterpret_cast(&*column_->crypto_metadata())); } else { return nullptr; } } std::optional getColumnIndexLocation() const { - if (column_->__isset.column_index_offset && - column_->__isset.column_index_length) { + if (column_->column_index_offset() && column_->column_index_length()) { return IndexLocation{ - column_->column_index_offset, column_->column_index_length}; + *column_->column_index_offset(), *column_->column_index_length()}; } return std::nullopt; } std::optional getOffsetIndexLocation() const { - if (column_->__isset.offset_index_offset && - column_->__isset.offset_index_length) { + if (column_->offset_index_offset() && column_->offset_index_length()) { return IndexLocation{ - column_->offset_index_offset, column_->offset_index_length}; + *column_->offset_index_offset(), *column_->offset_index_length()}; } return std::nullopt; } @@ -469,6 +517,10 @@ int64_t ColumnChunkMetaData::fileOffset() const { return impl_->fileOffset(); } +bool ColumnChunkMetaData::has_file_path() const { + return impl_->has_file_path(); +} + const std::string& ColumnChunkMetaData::filePath() const { return impl_->filePath(); } @@ -580,10 +632,10 @@ class RowGroupMetaData::RowGroupMetaDataImpl { writerVersion_(writerVersion), fileDecryptor_(std::move(fileDecryptor)) { if (ARROW_PREDICT_FALSE( - rowGroup_->columns.size() > + rowGroup_->columns()->size() > static_cast(std::numeric_limits::max()))) { throw ParquetException( - "Row group had too many columns: ", rowGroup_->columns.size()); + "Row group had too many columns: ", rowGroup_->columns()->size()); } } @@ -592,23 +644,23 @@ class RowGroupMetaData::RowGroupMetaDataImpl { } inline int numColumns() const { - return static_cast(rowGroup_->columns.size()); + return static_cast(rowGroup_->columns()->size()); } inline int64_t numRows() const { - return rowGroup_->num_rows; + return *rowGroup_->num_rows(); } inline int64_t totalByteSize() const { - return rowGroup_->total_byte_size; + return *rowGroup_->total_byte_size(); } inline int64_t totalCompressedSize() const { - return rowGroup_->total_compressed_size; + return apache::thrift::can_throw(*rowGroup_->total_compressed_size()); } inline int64_t fileOffset() const { - return rowGroup_->file_offset; + return apache::thrift::can_throw(*rowGroup_->file_offset()); } inline const SchemaDescriptor* schema() const { @@ -618,11 +670,11 @@ class RowGroupMetaData::RowGroupMetaDataImpl { std::unique_ptr columnChunk(int i) { if (i >= 0 && i < numColumns()) { return ColumnChunkMetaData::make( - &rowGroup_->columns[i], + &rowGroup_->columns()[i], schema_->column(i), properties_, writerVersion_, - rowGroup_->ordinal, + rowGroup_->ordinal().value_or(0), i, fileDecryptor_); } @@ -635,12 +687,12 @@ class RowGroupMetaData::RowGroupMetaDataImpl { std::vector sortingColumns() const { std::vector sortingColumns; - if (!rowGroup_->__isset.sorting_columns) { + if (!rowGroup_->sorting_columns()) { return sortingColumns; } - sortingColumns.resize(rowGroup_->sorting_columns.size()); + sortingColumns.resize(rowGroup_->sorting_columns()->size()); for (size_t i = 0; i < sortingColumns.size(); ++i) { - sortingColumns[i] = fromThrift(rowGroup_->sorting_columns[i]); + sortingColumns[i] = FromThrift((*rowGroup_->sorting_columns())[i]); } return sortingColumns; } @@ -766,8 +818,8 @@ class FileMetaData::FileMetaDataImpl { footerDecryptor); metadataLen_ = *metadataLen; - if (metadata_->__isset.created_by) { - writerVersion_ = ApplicationVersion(metadata_->created_by); + if (metadata_->created_by()) { + writerVersion_ = ApplicationVersion(*metadata_->created_by()); } else { writerVersion_ = ApplicationVersion("unknown 0.0.0"); } @@ -787,7 +839,7 @@ class FileMetaData::FileMetaDataImpl { uint8_t* serializedData; uint32_t serializedLen = metadataLen_; ThriftSerializer serializer; - serializer.serializeToBuffer( + serializer.SerializeToBuffer( metadata_.get(), &serializedLen, &serializedData); // Encrypt with nonce. @@ -838,29 +890,30 @@ class FileMetaData::FileMetaDataImpl { return schema_.numColumns(); } inline int64_t numRows() const { - return metadata_->num_rows; + return *metadata_->num_rows(); } inline int numRowGroups() const { - return static_cast(metadata_->row_groups.size()); + return static_cast(metadata_->row_groups()->size()); } inline int32_t version() const { - return metadata_->version; + return *metadata_->version(); } inline const std::string& createdBy() const { - return metadata_->created_by; + return apache::thrift::can_throw(*metadata_->created_by()); } inline int numSchemaElements() const { - return static_cast(metadata_->schema.size()); + return static_cast(metadata_->schema()->size()); } inline bool isEncryptionAlgorithmSet() const { - return metadata_->__isset.encryption_algorithm; + return metadata_->encryption_algorithm().has_value(); } - inline EncryptionAlgorithm encryptionAlgorithm() { - return fromThrift(metadata_->encryption_algorithm); + inline EncryptionAlgorithm encryption_algorithm() { + return fromThrift( + apache::thrift::can_throw(*metadata_->encryption_algorithm())); } inline const std::string& footerSigningKeyMetadata() { - return metadata_->footer_signing_key_metadata; + return apache::thrift::can_throw(*metadata_->footer_signing_key_metadata()); } const ApplicationVersion& writerVersion() const { @@ -876,7 +929,7 @@ class FileMetaData::FileMetaDataImpl { if (isEncryptionAlgorithmSet()) { uint8_t* serializedData; uint32_t serializedLen; - serializer.serializeToBuffer( + serializer.SerializeToBuffer( metadata_.get(), &serializedLen, &serializedData); // Encrypt the footer key. @@ -907,7 +960,7 @@ class FileMetaData::FileMetaDataImpl { throw ParquetException(ss.str()); } return RowGroupMetaData::make( - &metadata_->row_groups[i], + &(*metadata_->row_groups())[i], &schema_, properties_, &writerVersion_, @@ -928,10 +981,10 @@ class FileMetaData::FileMetaDataImpl { void setFilePath(const std::string& path) { for (facebook::velox::parquet::thrift::RowGroup& rowGroup : - metadata_->row_groups) { + *metadata_->row_groups()) { for (facebook::velox::parquet::thrift::ColumnChunk& chunk : - rowGroup.columns) { - chunk.__set_file_path(path); + *rowGroup.columns()) { + chunk.file_path() = path; } } } @@ -943,7 +996,7 @@ class FileMetaData::FileMetaDataImpl { << " row groups, requested metadata for row group: " << i; throw ParquetException(ss.str()); } - return metadata_->row_groups[i]; + return (*metadata_->row_groups())[i]; } void appendRowGroups(const std::unique_ptr& other) { @@ -960,11 +1013,12 @@ class FileMetaData::FileMetaDataImpl { // And incur O(n²) behavior on repeated calls to AppendRowGroups(). // (See https://en.cppreference.com/w/cpp/container/vector/reserve. // About inappropriate uses of reserve()). - const auto start = metadata_->row_groups.size(); - metadata_->row_groups.resize(start + n); + const auto start = metadata_->row_groups()->size(); + metadata_->row_groups()->resize(start + n); for (int i = 0; i < n; i++) { - metadata_->row_groups[start + i] = other->thriftRowGroup(i); - metadata_->num_rows += metadata_->row_groups[start + i].num_rows; + (*metadata_->row_groups())[start + i] = other->thriftRowGroup(i); + metadata_->num_rows() = *metadata_->num_rows() + + *((*metadata_->row_groups())[start + i].num_rows()); } } @@ -986,23 +1040,24 @@ class FileMetaData::FileMetaDataImpl { std::make_unique(); auto metadata = out->impl_->metadata_.get(); - metadata->version = metadata_->version; - metadata->schema = metadata_->schema; + metadata->version() = *metadata_->version(); + metadata->schema() = *metadata_->schema(); - metadata->row_groups.resize(rowGroups.size()); + metadata->row_groups()->resize(rowGroups.size()); int i = 0; for (int selectedIndex : rowGroups) { - metadata->num_rows += thriftRowGroup(selectedIndex).num_rows; - metadata->row_groups[i++] = thriftRowGroup(selectedIndex); + metadata->num_rows() = + *metadata->num_rows() + *thriftRowGroup(selectedIndex).num_rows(); + (*metadata->row_groups())[i++] = thriftRowGroup(selectedIndex); } - metadata->key_value_metadata = metadata_->key_value_metadata; - metadata->created_by = metadata_->created_by; - metadata->column_orders = metadata_->column_orders; - metadata->encryption_algorithm = metadata_->encryption_algorithm; - metadata->footer_signing_key_metadata = - metadata_->footer_signing_key_metadata; - metadata->__isset = metadata_->__isset; + metadata->key_value_metadata().copy_from(metadata_->key_value_metadata()); + metadata->created_by().copy_from(metadata_->created_by()); + metadata->column_orders().copy_from(metadata_->column_orders()); + metadata->encryption_algorithm().copy_from( + metadata_->encryption_algorithm()); + metadata->footer_signing_key_metadata().copy_from( + metadata_->footer_signing_key_metadata()); out->impl_->schema_ = schema_; out->impl_->writerVersion_ = writerVersion_; @@ -1046,21 +1101,23 @@ class FileMetaData::FileMetaDataImpl { std::unordered_map> fieldNanCounts_; void initSchema() { - if (metadata_->schema.empty()) { + if (metadata_->schema()->empty()) { throw ParquetException("Empty file schema (no root)"); } schema_.init( schema::unflatten( - &metadata_->schema[0], static_cast(metadata_->schema.size()))); + &(*metadata_->schema())[0], + static_cast(metadata_->schema()->size()))); } void initColumnOrders() { // Update ColumnOrder. std::vector columnOrders; - if (metadata_->__isset.column_orders) { - columnOrders.reserve(metadata_->column_orders.size()); - for (auto columnOrder : metadata_->column_orders) { - if (columnOrder.__isset.TYPE_ORDER) { + if (metadata_->column_orders()) { + columnOrders.reserve(metadata_->column_orders()->size()); + for (auto columnOrder : *metadata_->column_orders()) { + if (columnOrder.getType() == + facebook::velox::parquet::thrift::ColumnOrder::Type::TYPE_ORDER) { columnOrders.push_back(ColumnOrder::typeDefined_); } else { columnOrders.push_back(ColumnOrder::undefined_); @@ -1075,10 +1132,12 @@ class FileMetaData::FileMetaDataImpl { void initKeyValueMetadata() { std::shared_ptr metadata = nullptr; - if (metadata_->__isset.key_value_metadata) { + if (metadata_->key_value_metadata()) { metadata = std::make_shared(); - for (const auto& it : metadata_->key_value_metadata) { - metadata->Append(it.key, it.value); + for (const auto& it : *metadata_->key_value_metadata()) { + metadata->Append( + apache::thrift::can_throw(*it.key()), + apache::thrift::can_throw(*it.value())); } } keyValueMetadata_ = std::move(metadata); @@ -1161,7 +1220,7 @@ bool FileMetaData::isEncryptionAlgorithmSet() const { } EncryptionAlgorithm FileMetaData::encryptionAlgorithm() const { - return impl_->encryptionAlgorithm(); + return impl_->encryption_algorithm(); } const std::string& FileMetaData::footerSigningKeyMetadata() const { @@ -1243,12 +1302,13 @@ class FileCryptoMetaData::FileCryptoMetaDataImpl { metadataLen_ = *metadataLen; } - EncryptionAlgorithm encryptionAlgorithm() const { - return fromThrift(metadata_.encryption_algorithm); + EncryptionAlgorithm encryption_algorithm() const { + return fromThrift( + apache::thrift::can_throw(*metadata_.encryption_algorithm())); } const std::string& keyMetadata() const { - return metadata_.key_metadata; + return apache::thrift::can_throw(*metadata_.key_metadata()); } void writeTo(::arrow::io::OutputStream* dst) const { @@ -1263,7 +1323,7 @@ class FileCryptoMetaData::FileCryptoMetaDataImpl { }; EncryptionAlgorithm FileCryptoMetaData::encryptionAlgorithm() const { - return impl_->encryptionAlgorithm(); + return impl_->encryption_algorithm(); } const std::string& FileCryptoMetaData::keyMetadata() const { @@ -1727,12 +1787,13 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { // Column chunk. void setFilePath(const std::string& val) { - columnChunk_->__set_file_path(val); + columnChunk_->file_path() = val; } // Column metadata. void setStatistics(const EncodedStatistics& val) { - columnChunk_->meta_data.__set_statistics(toThrift(val)); + apache::thrift::can_throw(columnChunk_->meta_data())->statistics() = + toThrift(val); // Store NaN count separately since it's not written to the parquet file. if (val.hasNanCount) { nanCount_ = val.nanCount; @@ -1740,12 +1801,15 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { } } - int64_t nanCount() const { - return nanCount_; + // column chunk + void set_file_path(const std::string& val) { + columnChunk_->file_path() = val; } - bool hasNanCount() const { - return hasNanCount_; + // column metadata + void SetStatistics(const EncodedStatistics& val) { + apache::thrift::can_throw(*columnChunk_->meta_data()).statistics() = + toThrift(val); } void finish( @@ -1760,49 +1824,47 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { const std::map& dictEncodingStats, const std::map& dataEncodingStats, const std::shared_ptr& encryptor) { + columnChunk_->meta_data().ensure(); if (dictionary_page_offset > 0) { - columnChunk_->meta_data.__set_dictionary_page_offset( - dictionary_page_offset); - columnChunk_->__set_file_offset(dictionary_page_offset + compressedSize); + columnChunk_->meta_data()->dictionary_page_offset() = + dictionary_page_offset; + columnChunk_->file_offset() = dictionary_page_offset + compressedSize; } else { - columnChunk_->__set_file_offset(data_page_offset + compressedSize); + columnChunk_->file_offset() = data_page_offset + compressedSize; } - columnChunk_->__isset.meta_data = true; - columnChunk_->meta_data.__set_num_values(num_values); + columnChunk_->meta_data()->num_values() = num_values; if (index_page_offset >= 0) { - columnChunk_->meta_data.__set_index_page_offset(index_page_offset); + columnChunk_->meta_data()->index_page_offset() = index_page_offset; } - columnChunk_->meta_data.__set_data_page_offset(data_page_offset); - columnChunk_->meta_data.__set_total_uncompressed_size(uncompressedSize); - columnChunk_->meta_data.__set_total_compressed_size(compressedSize); + columnChunk_->meta_data()->data_page_offset() = data_page_offset; + columnChunk_->meta_data()->total_uncompressed_size() = uncompressedSize; + columnChunk_->meta_data()->total_compressed_size() = compressedSize; - std::vector - thriftEncodings; + std::vector thrift_encodings; std::vector - thriftEncodingStats; - auto addEncoding = - [&thriftEncodings]( - facebook::velox::parquet::thrift::Encoding::type value) { - auto it = std::find( - thriftEncodings.cbegin(), thriftEncodings.cend(), value); - if (it == thriftEncodings.cend()) { - thriftEncodings.push_back(value); - } - }; - // Add dictionary page encoding stats. + thrift_encoding_stats; + auto add_encoding = [&thrift_encodings]( + facebook::velox::parquet::thrift::Encoding value) { + auto it = + std::find(thrift_encodings.cbegin(), thrift_encodings.cend(), value); + if (it == thrift_encodings.cend()) { + thrift_encodings.push_back(value); + } + }; + // Add dictionary page encoding stats if (hasDictionary) { for (const auto& entry : dictEncodingStats) { - facebook::velox::parquet::thrift::PageEncodingStats dictEncStat; - dictEncStat.__set_page_type( - facebook::velox::parquet::thrift::PageType::DICTIONARY_PAGE); - // Dictionary encoding would be PLAIN_DICTIONARY in v1 and + facebook::velox::parquet::thrift::PageEncodingStats dict_enc_stat; + dict_enc_stat.page_type() = + facebook::velox::parquet::thrift::PageType::DICTIONARY_PAGE; + // Dictionary Encoding would be PLAIN_DICTIONARY in v1 and // PLAIN in v2. - facebook::velox::parquet::thrift::Encoding::type dictEncoding = + facebook::velox::parquet::thrift::Encoding dict_encoding = toThrift(entry.first); - dictEncStat.__set_encoding(dictEncoding); - dictEncStat.__set_count(entry.second); - thriftEncodingStats.push_back(dictEncStat); - addEncoding(dictEncoding); + dict_enc_stat.encoding() = dict_encoding; + dict_enc_stat.count() = entry.second; + thrift_encoding_stats.push_back(dict_enc_stat); + add_encoding(dict_encoding); } } // Always add encoding for RL/DL. @@ -1810,41 +1872,38 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { // In benchmark and testing. // And for now, we always add RLE even if there are no levels at all, // while parquet-mr is more fine-grained. - addEncoding(facebook::velox::parquet::thrift::Encoding::RLE); - // Add data page encoding stats. + add_encoding(facebook::velox::parquet::thrift::Encoding::RLE); + // Add data page encoding stats for (const auto& entry : dataEncodingStats) { - facebook::velox::parquet::thrift::PageEncodingStats dataEncStat; - dataEncStat.__set_page_type( - facebook::velox::parquet::thrift::PageType::DATA_PAGE); - facebook::velox::parquet::thrift::Encoding::type dataEncoding = + facebook::velox::parquet::thrift::PageEncodingStats data_enc_stat; + data_enc_stat.page_type() = + facebook::velox::parquet::thrift::PageType::DATA_PAGE; + facebook::velox::parquet::thrift::Encoding data_encoding = toThrift(entry.first); - dataEncStat.__set_encoding(dataEncoding); - dataEncStat.__set_count(entry.second); - thriftEncodingStats.push_back(dataEncStat); - addEncoding(dataEncoding); + data_enc_stat.encoding() = data_encoding; + data_enc_stat.count() = entry.second; + thrift_encoding_stats.push_back(data_enc_stat); + add_encoding(data_encoding); } - columnChunk_->meta_data.__set_encodings(thriftEncodings); - columnChunk_->meta_data.__set_encoding_stats(thriftEncodingStats); + columnChunk_->meta_data()->encodings() = thrift_encodings; + columnChunk_->meta_data()->encoding_stats() = thrift_encoding_stats; const auto& encryptMd = properties_->columnEncryptionProperties(column_->path()->toDotString()); - // Column is encrypted. + // column is encrypted if (encryptMd != nullptr && encryptMd->isEncrypted()) { - columnChunk_->__isset.crypto_metadata = true; facebook::velox::parquet::thrift::ColumnCryptoMetaData ccmd; if (encryptMd->isEncryptedWithFooterKey()) { // Encrypted with footer key. - ccmd.__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - ccmd.__set_ENCRYPTION_WITH_FOOTER_KEY( + ccmd.set_ENCRYPTION_WITH_FOOTER_KEY( facebook::velox::parquet::thrift::EncryptionWithFooterKey()); } else { // encrypted with column key facebook::velox::parquet::thrift::EncryptionWithColumnKey eck; - eck.__set_key_metadata(encryptMd->keyMetadata()); - eck.__set_path_in_schema(column_->path()->toDotVector()); - ccmd.__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - ccmd.__set_ENCRYPTION_WITH_COLUMN_KEY(eck); + eck.key_metadata() = encryptMd->keyMetadata(); + eck.path_in_schema() = column_->path()->toDotVector(); + ccmd.set_ENCRYPTION_WITH_COLUMN_KEY(eck); } - columnChunk_->__set_crypto_metadata(ccmd); + columnChunk_->crypto_metadata() = ccmd; bool encryptedFooter = properties_->fileEncryptionProperties()->encryptedFooter(); @@ -1859,8 +1918,8 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { uint8_t* serializedData; uint32_t serializedLen; - serializer.serializeToBuffer( - &columnChunk_->meta_data, &serializedLen, &serializedData); + serializer.SerializeToBuffer( + &*columnChunk_->meta_data(), &serializedLen, &serializedData); std::vector encryptedData( encryptor->ciphertextSizeDelta() + serializedLen); @@ -1870,16 +1929,17 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { const char* temp = const_cast( reinterpret_cast(encryptedData.data())); std::string encrypted_column_metadata(temp, encryptedLen); - columnChunk_->__set_encrypted_column_metadata( - encrypted_column_metadata); + columnChunk_->encrypted_column_metadata() = encrypted_column_metadata; if (encryptedFooter) { - columnChunk_->__isset.meta_data = false; + columnChunk_->meta_data().reset(); } else { - // Keep redacted metadata version for old readers. - columnChunk_->__isset.meta_data = true; - columnChunk_->meta_data.__isset.statistics = false; - columnChunk_->meta_data.__isset.encoding_stats = false; + // Keep redacted metadata version for old readers + if (!columnChunk_->meta_data()) { + columnChunk_->meta_data().ensure(); + } + columnChunk_->meta_data()->statistics().reset(); + columnChunk_->meta_data()->encoding_stats().reset(); } } } @@ -1893,19 +1953,33 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { const ColumnDescriptor* descr() const { return column_; } - int64_t totalCompressedSize() const { - return columnChunk_->meta_data.total_compressed_size; + int64_t total_compressed_size() const { + return apache::thrift::can_throw( + *apache::thrift::can_throw(*columnChunk_->meta_data()) + .total_compressed_size()); + } + + int64_t nanCount() const { + return nanCount_; + } + bool hasNanCount() const { + return hasNanCount_; + } + + int32_t fieldId() const { + return column_->schemaNode()->fieldId(); } private: void init(facebook::velox::parquet::thrift::ColumnChunk* columnChunk) { columnChunk_ = columnChunk; - columnChunk_->meta_data.__set_type(toThrift(column_->physicalType())); - columnChunk_->meta_data.__set_path_in_schema( - column_->path()->toDotVector()); - columnChunk_->meta_data.__set_codec( - toThrift(properties_->compression(column_->path()))); + columnChunk_->meta_data().ensure(); + columnChunk_->meta_data()->type() = toThrift(column_->physicalType()); + columnChunk_->meta_data()->path_in_schema() = + column_->path()->toDotVector(); + columnChunk_->meta_data()->codec() = + toThrift(properties_->compression(column_->path())); } facebook::velox::parquet::thrift::ColumnChunk* columnChunk_; @@ -2000,7 +2074,7 @@ void ColumnChunkMetaDataBuilder::setStatistics( } int64_t ColumnChunkMetaDataBuilder::totalCompressedSize() const { - return impl_->totalCompressedSize(); + return impl_->total_compressed_size(); } int64_t ColumnChunkMetaDataBuilder::nanCount() const { @@ -2011,6 +2085,10 @@ bool ColumnChunkMetaDataBuilder::hasNanCount() const { return impl_->hasNanCount(); } +int32_t ColumnChunkMetaDataBuilder::fieldId() const { + return impl_->fieldId(); +} + class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { public: explicit RowGroupMetaDataBuilderImpl( @@ -2024,18 +2102,18 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { } ColumnChunkMetaDataBuilder* nextColumnChunk() { - if (!(nextColumn_ < numColumns())) { + if (!(nextColumn_ < num_columns())) { std::stringstream ss; - ss << "The schema only has " << numColumns() + ss << "The schema only has " << num_columns() << " columns, requested metadata for column: " << nextColumn_; throw ParquetException(ss.str()); } auto column = schema_->column(nextColumn_); - auto columnBuilder = ColumnChunkMetaDataBuilder::make( - properties_, column, &rowGroup_->columns[nextColumn_++]); - auto columnBuilderPtr = columnBuilder.get(); - columnBuilders_.push_back(std::move(columnBuilder)); - return columnBuilderPtr; + auto column_builder = ColumnChunkMetaDataBuilder::make( + properties_, column, &(*rowGroup_->columns())[nextColumn_++]); + auto column_builder_ptr = column_builder.get(); + columnBuilders_.push_back(std::move(column_builder)); + return column_builder_ptr; } int currentColumn() { @@ -2053,21 +2131,22 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { int64_t fileOffset = 0; int64_t total_compressed_size = 0; for (int i = 0; i < schema_->numColumns(); i++) { - if (!(rowGroup_->columns[i].file_offset >= 0)) { + if (!(apache::thrift::can_throw( + *(*rowGroup_->columns())[i].file_offset()) >= 0)) { std::stringstream ss; ss << "Column " << i << " is not complete."; throw ParquetException(ss.str()); } if (i == 0) { - const facebook::velox::parquet::thrift::ColumnMetaData& firstCol = - rowGroup_->columns[0].meta_data; + const facebook::velox::parquet::thrift::ColumnMetaData& first_col = + apache::thrift::can_throw(*(*rowGroup_->columns())[0].meta_data()); // As per spec, file_offset for the row group points to the first // dictionary or data page of the column. - if (firstCol.__isset.dictionary_page_offset && - firstCol.dictionary_page_offset > 0) { - fileOffset = firstCol.dictionary_page_offset; + if (first_col.dictionary_page_offset() && + *first_col.dictionary_page_offset() > 0) { + fileOffset = *first_col.dictionary_page_offset(); } else { - fileOffset = firstCol.data_page_offset; + fileOffset = *first_col.data_page_offset(); } } // Sometimes column metadata is encrypted and not available to read, @@ -2082,40 +2161,41 @@ class RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl { for (size_t i = 0; i < sortingColumns.size(); ++i) { thriftSortingColumns[i] = toThrift(sortingColumns[i]); } - rowGroup_->__set_sorting_columns(std::move(thriftSortingColumns)); + rowGroup_->sorting_columns() = std::move(thriftSortingColumns); } - rowGroup_->__set_file_offset(fileOffset); - rowGroup_->__set_total_compressed_size(total_compressed_size); - rowGroup_->__set_total_byte_size(totalBytesWritten); - rowGroup_->__set_ordinal(rowGroupOrdinal); + rowGroup_->file_offset() = fileOffset; + rowGroup_->total_compressed_size() = total_compressed_size; + rowGroup_->total_byte_size() = totalBytesWritten; + rowGroup_->ordinal() = rowGroupOrdinal; } - void setNumRows(int64_t numRows) { - rowGroup_->num_rows = numRows; + void set_num_rows(int64_t num_rows) { + rowGroup_->num_rows() = num_rows; } - int numColumns() { - return static_cast(rowGroup_->columns.size()); + int num_columns() { + return static_cast(rowGroup_->columns()->size()); } - int64_t numRows() { - return rowGroup_->num_rows; + int64_t num_rows() { + return *rowGroup_->num_rows(); } - // Returns a map of field_id -> (nan_count, has_nan_count). std::unordered_map> nanCounts() const { std::unordered_map> result; - for (const auto& builder : columnBuilders_) { - int32_t field_id = builder->descr()->schemaNode()->fieldId(); - result[field_id] = {builder->nanCount(), builder->hasNanCount()}; + for (int i = 0; i < static_cast(columnBuilders_.size()); ++i) { + if (columnBuilders_[i]->hasNanCount()) { + const auto fieldId = columnBuilders_[i]->fieldId(); + result[fieldId] = {columnBuilders_[i]->nanCount(), true}; + } } return result; } private: void initializeColumns(int ncols) { - rowGroup_->columns.resize(ncols); + rowGroup_->columns()->resize(ncols); } facebook::velox::parquet::thrift::RowGroup* rowGroup_; @@ -2153,15 +2233,15 @@ int RowGroupMetaDataBuilder::currentColumn() const { } int RowGroupMetaDataBuilder::numColumns() { - return impl_->numColumns(); + return impl_->num_columns(); } int64_t RowGroupMetaDataBuilder::numRows() { - return impl_->numRows(); + return impl_->num_rows(); } void RowGroupMetaDataBuilder::setNumRows(int64_t numRows) { - impl_->setNumRows(numRows); + impl_->set_num_rows(numRows); } void RowGroupMetaDataBuilder::finish( @@ -2194,8 +2274,6 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { } RowGroupMetaDataBuilder* appendRowGroup() { - // Accumulate NaN counts from the previous row group before creating a new - // one. accumulateNaNCountsFromCurrentRowGroup(); rowGroups_.emplace_back(); currentRowGroupBuilder_ = @@ -2204,68 +2282,73 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { } void setPageIndexLocation(const PageIndexLocation& location) { - auto setIndexLocation = [this]( - size_t rowGroupOrdinal, - const PageIndexLocation::FileIndexLocation& - fileIndexLocation, - bool columnIndex) { - auto& rowGroupMetadata = this->rowGroups_.at(rowGroupOrdinal); - auto iter = fileIndexLocation.find(rowGroupOrdinal); - if (iter != fileIndexLocation.cend()) { - const auto& rowGroupIndexLocation = iter->second; - for (size_t i = 0; i < rowGroupIndexLocation.size(); ++i) { - if (i >= rowGroupMetadata.columns.size()) { - throw ParquetException( - "Cannot find metadata for column ordinal ", i); - } - auto& columnMetadata = rowGroupMetadata.columns.at(i); - const auto& indexLocation = rowGroupIndexLocation.at(i); - if (indexLocation.has_value()) { - if (columnIndex) { - columnMetadata.__set_column_index_offset(indexLocation->offset); - columnMetadata.__set_column_index_length(indexLocation->length); - } else { - columnMetadata.__set_offset_index_offset(indexLocation->offset); - columnMetadata.__set_offset_index_length(indexLocation->length); + auto set_index_location = + [this]( + size_t row_group_ordinal, + const PageIndexLocation::FileIndexLocation& file_index_location, + bool column_index) { + auto& row_group_metadata = this->rowGroups_.at(row_group_ordinal); + auto iter = file_index_location.find(row_group_ordinal); + if (iter != file_index_location.cend()) { + const auto& row_group_index_location = iter->second; + for (size_t i = 0; i < row_group_index_location.size(); ++i) { + if (i >= row_group_metadata.columns()->size()) { + throw ParquetException( + "Cannot find metadata for column ordinal ", i); + } + auto& column_metadata = row_group_metadata.columns()->at(i); + const auto& index_location = row_group_index_location.at(i); + if (index_location.has_value()) { + if (column_index) { + column_metadata.column_index_offset() = + index_location->offset; + column_metadata.column_index_length() = + index_location->length; + } else { + column_metadata.offset_index_offset() = + index_location->offset; + column_metadata.offset_index_length() = + index_location->length; + } + } } } - } - } - }; + }; for (size_t i = 0; i < rowGroups_.size(); ++i) { - setIndexLocation(i, location.columnIndexLocation, true); - setIndexLocation(i, location.offsetIndexLocation, false); + set_index_location(i, location.columnIndexLocation, true); + set_index_location(i, location.offsetIndexLocation, false); } } std::unique_ptr finish( - const std::shared_ptr& keyValueMetadata) { - // Accumulate NaN counts from the last row group. + const std::shared_ptr& key_value_metadata) { accumulateNaNCountsFromCurrentRowGroup(); - - int64_t totalRows = 0; - for (auto rowGroup : rowGroups_) { - totalRows += rowGroup.num_rows; + int64_t total_rows = 0; + for (auto row_group : rowGroups_) { + total_rows += *row_group.num_rows(); } - metadata_->__set_num_rows(totalRows); - metadata_->__set_row_groups(rowGroups_); + metadata_->num_rows() = total_rows; + metadata_->row_groups() = rowGroups_; - if (keyValueMetadata_ || keyValueMetadata) { + if (keyValueMetadata_ || key_value_metadata) { if (!keyValueMetadata_) { - keyValueMetadata_ = keyValueMetadata; - } else if (keyValueMetadata) { - keyValueMetadata_ = keyValueMetadata_->Merge(*keyValueMetadata); + keyValueMetadata_ = key_value_metadata; + } else if (key_value_metadata) { + keyValueMetadata_ = keyValueMetadata_->Merge(*key_value_metadata); } - metadata_->key_value_metadata.clear(); - metadata_->key_value_metadata.reserve(keyValueMetadata_->size()); + if (metadata_->key_value_metadata()) { + metadata_->key_value_metadata()->clear(); + } else { + metadata_->key_value_metadata().ensure(); + } + metadata_->key_value_metadata()->reserve(keyValueMetadata_->size()); for (int64_t i = 0; i < keyValueMetadata_->size(); ++i) { - facebook::velox::parquet::thrift::KeyValue kvPair; - kvPair.__set_key(keyValueMetadata_->key(i)); - kvPair.__set_value(keyValueMetadata_->value(i)); - metadata_->key_value_metadata.push_back(kvPair); + facebook::velox::parquet::thrift::KeyValue kv_pair; + kv_pair.key() = keyValueMetadata_->key(i); + kv_pair.value() = keyValueMetadata_->value(i); + metadata_->key_value_metadata()->push_back(kv_pair); } - metadata_->__isset.key_value_metadata = true; } int32_t fileVersion = 0; @@ -2277,20 +2360,19 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { fileVersion = 2; break; } - metadata_->__set_version(fileVersion); - metadata_->__set_created_by(properties_->createdBy()); - - // Users cannot set the `ColumnOrder` since we do not have user-defined - // sort order in the spec yet. We always default to `TYPE_DEFINED_ORDER`. - // We can expose it in the API once we have user-defined sort orders in the - // Parquet format. TypeDefinedOrder implies choose SortOrder based on - // convertedType/physicalType. - facebook::velox::parquet::thrift::TypeDefinedOrder typeDefinedOrder; - facebook::velox::parquet::thrift::ColumnOrder columnOrder; - columnOrder.__set_TYPE_ORDER(typeDefinedOrder); - columnOrder.__isset.TYPE_ORDER = true; - metadata_->column_orders.resize(schema_->numColumns(), columnOrder); - metadata_->__isset.column_orders = true; + metadata_->version() = fileVersion; + metadata_->created_by() = properties_->createdBy(); + + // Users cannot set the `ColumnOrder` since we do not have user defined sort + // order in the spec yet. We always default to `TYPE_DEFINED_ORDER`. We can + // expose it in the API once we have user defined sort orders in the Parquet + // format. TypeDefinedOrder implies choose SortOrder based on + // ConvertedType/PhysicalType + facebook::velox::parquet::thrift::TypeDefinedOrder type_defined_order; + facebook::velox::parquet::thrift::ColumnOrder column_order; + column_order.set_TYPE_ORDER(type_defined_order); + metadata_->column_orders().ensure(); + metadata_->column_orders()->resize(schema_->numColumns(), column_order); // If plaintext footer, set footer signing algorithm. auto fileEncryptionProperties = properties_->fileEncryptionProperties(); @@ -2305,17 +2387,17 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { } signingAlgorithm.algorithm = ParquetCipher::kAesGcmV1; - metadata_->__set_encryption_algorithm(toThrift(signingAlgorithm)); + metadata_->encryption_algorithm() = toThrift(signingAlgorithm); const std::string& footerSigningKeyMetadata = fileEncryptionProperties->footerKeyMetadata(); if (footerSigningKeyMetadata.size() > 0) { - metadata_->__set_footer_signing_key_metadata(footerSigningKeyMetadata); + metadata_->footer_signing_key_metadata() = footerSigningKeyMetadata; } } toParquet( static_cast(schema_->schemaRoot().get()), - &metadata_->schema); + &*metadata_->schema()); auto fileMetaData = std::unique_ptr(new FileMetaData()); fileMetaData->impl_->metadata_ = std::move(metadata_); fileMetaData->impl_->initSchema(); @@ -2332,12 +2414,12 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { auto fileEncryptionProperties = properties_->fileEncryptionProperties(); - crypto_metadata_->__set_encryption_algorithm( - toThrift(fileEncryptionProperties->algorithm())); - std::string keyMetadata = fileEncryptionProperties->footerKeyMetadata(); + crypto_metadata_->encryption_algorithm() = + toThrift(fileEncryptionProperties->algorithm()); + std::string key_metadata = fileEncryptionProperties->footerKeyMetadata(); - if (!keyMetadata.empty()) { - crypto_metadata_->__set_key_metadata(keyMetadata); + if (!key_metadata.empty()) { + crypto_metadata_->key_metadata() = key_metadata; } std::unique_ptr fileCryptoMetadata( diff --git a/velox/dwio/parquet/writer/arrow/Metadata.h b/velox/dwio/parquet/writer/arrow/Metadata.h index c6104a2a794..9bbbc39badc 100644 --- a/velox/dwio/parquet/writer/arrow/Metadata.h +++ b/velox/dwio/parquet/writer/arrow/Metadata.h @@ -162,6 +162,8 @@ class PARQUET_EXPORT ColumnChunkMetaData { // Column chunk. int64_t fileOffset() const; + bool has_file_path() const; + // Parameter is only used when a dataset is spread across multiple files. const std::string& filePath() const; @@ -498,6 +500,8 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder { bool hasNanCount() const; + int32_t fieldId() const; + // Commit the metadata. void finish( int64_t numValues, diff --git a/velox/dwio/parquet/writer/arrow/PageIndex.cpp b/velox/dwio/parquet/writer/arrow/PageIndex.cpp index e07538e83ba..c9218730647 100644 --- a/velox/dwio/parquet/writer/arrow/PageIndex.cpp +++ b/velox/dwio/parquet/writer/arrow/PageIndex.cpp @@ -18,6 +18,8 @@ #include "velox/dwio/parquet/writer/arrow/PageIndex.h" +#include + #include "velox/dwio/parquet/writer/arrow/Encoding.h" #include "velox/dwio/parquet/writer/arrow/Exception.h" #include "velox/dwio/parquet/writer/arrow/Metadata.h" @@ -109,18 +111,18 @@ class TypedColumnIndexImpl : public TypedColumnIndex { : columnIndex_(std::move(columnIndex)) { // Make sure the number of pages is valid and it does not overflow to // int32_t. - const size_t numPages = columnIndex_.null_pages.size(); + const size_t numPages = columnIndex_.null_pages()->size(); if (numPages >= static_cast(std::numeric_limits::max()) || - columnIndex_.min_values.size() != numPages || - columnIndex_.max_values.size() != numPages || - (columnIndex_.__isset.null_counts && - columnIndex_.null_counts.size() != numPages)) { + columnIndex_.min_values()->size() != numPages || + columnIndex_.max_values()->size() != numPages || + (columnIndex_.null_counts() && + columnIndex_.null_counts()->size() != numPages)) { throw ParquetException("Invalid column index"); } const size_t numNonNullPages = static_cast(std::accumulate( - columnIndex_.null_pages.cbegin(), - columnIndex_.null_pages.cend(), + columnIndex_.null_pages()->cbegin(), + columnIndex_.null_pages()->cend(), 0, [](int32_t numNonNullPages, bool nullPage) { return numNonNullPages + (nullPage ? 0 : 1); @@ -133,41 +135,43 @@ class TypedColumnIndexImpl : public TypedColumnIndex { nonNullPageIndices_.reserve(numNonNullPages); // Decode min and max values according to the physical type. - // Note that null pages are skipped. - auto plainDecoder = makeTypedDecoder(Encoding::kPlain, &descr); + // Note that null page are skipped. + auto plain_decoder = makeTypedDecoder(Encoding::kPlain, &descr); for (size_t i = 0; i < numPages; ++i) { - if (!columnIndex_.null_pages[i]) { + if (!(*columnIndex_.null_pages())[i]) { // The check on `numPages` has guaranteed the cast below is safe. nonNullPageIndices_.emplace_back(static_cast(i)); - decode(plainDecoder, columnIndex_.min_values[i], &minValues_, i); - decode(plainDecoder, columnIndex_.max_values[i], &maxValues_, i); + decode( + plain_decoder, (*columnIndex_.min_values())[i], &minValues_, i); + decode( + plain_decoder, (*columnIndex_.max_values())[i], &maxValues_, i); } } VELOX_DCHECK_EQ(numNonNullPages, nonNullPageIndices_.size()); } const std::vector& nullPages() const override { - return columnIndex_.null_pages; + return *columnIndex_.null_pages(); } const std::vector& encodedMinValues() const override { - return columnIndex_.min_values; + return *columnIndex_.min_values(); } const std::vector& encodedMaxValues() const override { - return columnIndex_.max_values; + return *columnIndex_.max_values(); } BoundaryOrder::type boundaryOrder() const override { - return loadenumSafe(&columnIndex_.boundary_order); + return loadEnumSafe(&*columnIndex_.boundary_order()); } bool hasNullCounts() const override { - return columnIndex_.__isset.null_counts; + return columnIndex_.null_counts().has_value(); } const std::vector& nullCounts() const override { - return columnIndex_.null_counts; + return apache::thrift::can_throw(*columnIndex_.null_counts()); } const std::vector& nonNullPageIndices() const override { @@ -195,14 +199,14 @@ class TypedColumnIndexImpl : public TypedColumnIndex { class OffsetIndexImpl : public OffsetIndex { public: explicit OffsetIndexImpl( - const facebook::velox::parquet::thrift::OffsetIndex& offsetIndex) { - pageLocations_.reserve(offsetIndex.page_locations.size()); - for (const auto& pageLocation : offsetIndex.page_locations) { + const facebook::velox::parquet::thrift::OffsetIndex& offset_index) { + pageLocations_.reserve(offset_index.page_locations()->size()); + for (const auto& page_location : *offset_index.page_locations()) { pageLocations_.emplace_back( PageLocation{ - pageLocation.offset, - pageLocation.compressed_page_size, - pageLocation.first_row_index}); + *page_location.offset(), + *page_location.compressed_page_size(), + *page_location.first_row_index()}); } } @@ -504,8 +508,8 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { /// Initialize the nullCounts vector as set. Invalid nullCounts vector /// from any page will invalidate the nullCounts vector of the column /// index. - columnIndex_.__isset.null_counts = true; - columnIndex_.boundary_order = + columnIndex_.null_counts() = {}; + columnIndex_.boundary_order() = facebook::velox::parquet::thrift::BoundaryOrder::UNORDERED; } @@ -520,15 +524,15 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { state_ = BuilderState::kStarted; if (stats.allNullValue) { - columnIndex_.null_pages.emplace_back(true); - columnIndex_.min_values.emplace_back(""); - columnIndex_.max_values.emplace_back(""); + columnIndex_.null_pages()->emplace_back(true); + columnIndex_.min_values()->emplace_back(""); + columnIndex_.max_values()->emplace_back(""); } else if (stats.hasMin && stats.hasMax) { - const size_t pageOrdinal = columnIndex_.null_pages.size(); + const size_t pageOrdinal = columnIndex_.null_pages()->size(); nonNullPageIndices_.emplace_back(pageOrdinal); - columnIndex_.min_values.emplace_back(stats.min()); - columnIndex_.max_values.emplace_back(stats.max()); - columnIndex_.null_pages.emplace_back(false); + columnIndex_.min_values()->emplace_back(stats.min()); + columnIndex_.max_values()->emplace_back(stats.max()); + columnIndex_.null_pages()->emplace_back(false); } else { /// This is a non-null page but it lacks of meaningful min/max values. /// Discard the column index. @@ -536,11 +540,10 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { return; } - if (columnIndex_.__isset.null_counts && stats.hasNullCount) { - columnIndex_.null_counts.emplace_back(stats.nullCount); + if (columnIndex_.null_counts() && stats.hasNullCount) { + columnIndex_.null_counts()->emplace_back(stats.nullCount); } else { - columnIndex_.__isset.null_counts = false; - columnIndex_.null_counts.clear(); + columnIndex_.null_counts().reset(); } } @@ -563,8 +566,8 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { state_ = BuilderState::kFinished; /// Clear null_counts vector because at least one page does not provide it. - if (!columnIndex_.__isset.null_counts) { - columnIndex_.null_counts.clear(); + if (!columnIndex_.null_counts()) { + columnIndex_.null_counts().reset(); } /// Decode min/max values according to the data type. @@ -576,14 +579,14 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { for (size_t i = 0; i < nonNullPageCount; ++i) { auto pageOrdinal = nonNullPageIndices_.at(i); decode( - decoder, columnIndex_.min_values.at(pageOrdinal), &minValues, i); + decoder, columnIndex_.min_values()->at(pageOrdinal), &minValues, i); decode( - decoder, columnIndex_.max_values.at(pageOrdinal), &maxValues, i); + decoder, columnIndex_.max_values()->at(pageOrdinal), &maxValues, i); } /// Decide the boundary order from decoded min/max values. auto boundaryOrder = determineBoundaryOrder(minValues, maxValues); - columnIndex_.__set_boundary_order(toThrift(boundaryOrder)); + columnIndex_.boundary_order() = toThrift(boundaryOrder); } void writeTo(::arrow::io::OutputStream* sink) const override { @@ -670,11 +673,11 @@ class OffsetIndexBuilderImpl final : public OffsetIndexBuilder { state_ = BuilderState::kStarted; - facebook::velox::parquet::thrift::PageLocation pageLocation; - pageLocation.__set_offset(offset); - pageLocation.__set_compressed_page_size(compressedPageSize); - pageLocation.__set_first_row_index(firstRowIndex); - offsetIndex_.page_locations.emplace_back(std::move(pageLocation)); + facebook::velox::parquet::thrift::PageLocation page_location; + page_location.offset() = offset; + page_location.compressed_page_size() = compressedPageSize; + page_location.first_row_index() = firstRowIndex; + offsetIndex_.page_locations()->emplace_back(std::move(page_location)); } void finish(int64_t finalPosition) override { @@ -685,10 +688,10 @@ class OffsetIndexBuilderImpl final : public OffsetIndexBuilder { break; } case BuilderState::kStarted: { - /// Adjust page offsets according to the final position. + /// Adjust page offsets according the final position. if (finalPosition > 0) { - for (auto& pageLocation : offsetIndex_.page_locations) { - pageLocation.__set_offset(pageLocation.offset + finalPosition); + for (auto& page_location : *offsetIndex_.page_locations()) { + page_location.offset() = *page_location.offset() + finalPosition; } } state_ = BuilderState::kFinished; diff --git a/velox/dwio/parquet/writer/arrow/Schema.cpp b/velox/dwio/parquet/writer/arrow/Schema.cpp index 3a431bb5c70..971da61676b 100644 --- a/velox/dwio/parquet/writer/arrow/Schema.cpp +++ b/velox/dwio/parquet/writer/arrow/Schema.cpp @@ -20,6 +20,8 @@ #include "velox/dwio/parquet/writer/arrow/SchemaInternal.h" #include "velox/dwio/parquet/writer/arrow/ThriftInternal.h" +#include + #include #include #include @@ -455,27 +457,28 @@ std::unique_ptr GroupNode::fromParquet( opaqueElement); int fieldId = -1; - if (element->__isset.field_id) { - fieldId = element->field_id; + if (element->field_id()) { + fieldId = *element->field_id(); } std::unique_ptr groupNode; - if (element->__isset.logicalType) { + if (element->logicalType()) { // Updated writer with logical type present. groupNode = std::unique_ptr(new GroupNode( - element->name, - loadenumSafe(&element->repetition_type), + apache::thrift::can_throw(*element->name()), + loadEnumSafe(&apache::thrift::can_throw(*element->repetition_type())), fields, - LogicalType::fromThrift(element->logicalType), + LogicalType::fromThrift( + apache::thrift::can_throw(*element->logicalType())), fieldId)); } else { groupNode = std::unique_ptr(new GroupNode( - element->name, - loadenumSafe(&element->repetition_type), + apache::thrift::can_throw(*element->name()), + loadEnumSafe(&apache::thrift::can_throw(*element->repetition_type())), fields, - (element->__isset.converted_type - ? loadenumSafe(&element->converted_type) - : ConvertedType::kNone), + (element->converted_type() ? loadEnumSafe(&apache::thrift::can_throw( + *element->converted_type())) + : ConvertedType::kNone), fieldId)); } @@ -488,39 +491,40 @@ std::unique_ptr PrimitiveNode::fromParquet(const void* opaqueElement) { opaqueElement); int fieldId = -1; - if (element->__isset.field_id) { - fieldId = element->field_id; + if (element->field_id()) { + fieldId = *element->field_id(); } std::unique_ptr primitiveNode; - if (element->__isset.logicalType) { + if (element->logicalType()) { // Updated writer with logical type present. primitiveNode = std::unique_ptr(new PrimitiveNode( - element->name, - loadenumSafe(&element->repetition_type), - LogicalType::fromThrift(element->logicalType), - loadenumSafe(&element->type), - element->type_length, + apache::thrift::can_throw(*element->name()), + loadEnumSafe(&apache::thrift::can_throw(*element->repetition_type())), + LogicalType::fromThrift( + apache::thrift::can_throw(*element->logicalType())), + loadEnumSafe(&apache::thrift::can_throw(*element->type())), + element->type_length().value_or(0), fieldId)); - } else if (element->__isset.converted_type) { + } else if (element->converted_type()) { // Legacy writer with converted type present. primitiveNode = std::unique_ptr(new PrimitiveNode( - element->name, - loadenumSafe(&element->repetition_type), - loadenumSafe(&element->type), - loadenumSafe(&element->converted_type), - element->type_length, - element->precision, - element->scale, + apache::thrift::can_throw(*element->name()), + loadEnumSafe(&apache::thrift::can_throw(*element->repetition_type())), + loadEnumSafe(&apache::thrift::can_throw(*element->type())), + loadEnumSafe(&apache::thrift::can_throw(*element->converted_type())), + element->type_length().value_or(0), + element->precision().value_or(0), + element->scale().value_or(0), fieldId)); } else { // Logical type not present. primitiveNode = std::unique_ptr(new PrimitiveNode( - element->name, - loadenumSafe(&element->repetition_type), + apache::thrift::can_throw(*element->name()), + loadEnumSafe(&apache::thrift::can_throw(*element->repetition_type())), NoLogicalType::make(), - loadenumSafe(&element->type), - element->type_length, + loadEnumSafe(&apache::thrift::can_throw(*element->type())), + element->type_length().value_or(0), fieldId)); } @@ -546,17 +550,17 @@ void GroupNode::toParquet(void* opaqueElement) const { facebook::velox::parquet::thrift::SchemaElement* element = static_cast( opaqueElement); - element->__set_name(name_); - element->__set_num_children(fieldCount()); - element->__set_repetition_type(toThrift(repetition_)); + element->name() = name_; + element->num_children() = fieldCount(); + element->repetition_type() = toThrift(repetition_); if (convertedType_ != ConvertedType::kNone) { - element->__set_converted_type(toThrift(convertedType_)); + element->converted_type() = toThrift(convertedType_); } if (fieldId_ >= 0) { - element->__set_field_id(fieldId_); + element->field_id() = fieldId_; } if (logicalType_ && logicalType_->isSerialized()) { - element->__set_logicalType(logicalType_->toThrift()); + element->logicalType() = logicalType_->toThrift(); } return; } @@ -565,11 +569,11 @@ void PrimitiveNode::toParquet(void* opaqueElement) const { facebook::velox::parquet::thrift::SchemaElement* element = static_cast( opaqueElement); - element->__set_name(name_); - element->__set_repetition_type(toThrift(repetition_)); + element->name() = name_; + element->repetition_type() = toThrift(repetition_); if (convertedType_ != ConvertedType::kNone) { if (convertedType_ != ConvertedType::kNa) { - element->__set_converted_type(toThrift(convertedType_)); + element->converted_type() = toThrift(convertedType_); } else { // ConvertedType::kNa is an unreleased, obsolete synonym for. // LogicalType::nullType. Never emit it (see PARQUET-1990 for discussion). @@ -580,21 +584,21 @@ void PrimitiveNode::toParquet(void* opaqueElement) const { } } if (fieldId_ >= 0) { - element->__set_field_id(fieldId_); + element->field_id() = fieldId_; } if (logicalType_ && logicalType_->isSerialized() && // TODO(tpboudreau): remove the following conjunct to enable // serialization. Of IntervalTypes after parquet.thrift recognizes them. !logicalType_->isInterval()) { - element->__set_logicalType(logicalType_->toThrift()); + element->logicalType() = logicalType_->toThrift(); } - element->__set_type(toThrift(physicalType_)); + element->type() = toThrift(physicalType_); if (physicalType_ == Type::kFixedLenByteArray) { - element->__set_type_length(typeLength_); + element->type_length() = typeLength_; } if (decimalMetadata_.isset) { - element->__set_precision(decimalMetadata_.precision); - element->__set_scale(decimalMetadata_.scale); + element->precision() = decimalMetadata_.precision; + element->scale() = decimalMetadata_.scale; } return; } @@ -605,7 +609,7 @@ void PrimitiveNode::toParquet(void* opaqueElement) const { std::unique_ptr unflatten( const facebook::velox::parquet::thrift::SchemaElement* elements, int length) { - if (elements[0].num_children == 0) { + if (elements[0].num_children().value_or(0) == 0) { if (length == 1) { // Degenerate case of Parquet file with no columns. return GroupNode::fromParquet(elements, {}); @@ -627,13 +631,13 @@ std::unique_ptr unflatten( const SchemaElement& element = elements[pos++]; const void* opaqueElement = static_cast(&element); - if (element.num_children == 0 && element.__isset.type) { + if (element.num_children().value_or(0) == 0 && element.type()) { // Leaf (primitive) node: always has a type. return PrimitiveNode::fromParquet(opaqueElement); } else { // Group node (may have 0 children, but cannot have a type) NodeVector fields; - for (int i = 0; i < element.num_children; ++i) { + for (int i = 0; i < element.num_children().value_or(0); ++i) { fields.emplace_back(nextNode()); } return GroupNode::fromParquet(opaqueElement, std::move(fields)); diff --git a/velox/dwio/parquet/writer/arrow/Statistics.cpp b/velox/dwio/parquet/writer/arrow/Statistics.cpp index 019157b8a6c..7718f375175 100644 --- a/velox/dwio/parquet/writer/arrow/Statistics.cpp +++ b/velox/dwio/parquet/writer/arrow/Statistics.cpp @@ -633,10 +633,14 @@ class TypedStatisticsImpl : public TypedStatistics { } if (!encodedMin.empty()) { - plainDecode(encodedMin, &min_); + T decoded_min; + plainDecode(encodedMin, &decoded_min); + copy(decoded_min, &min_, minBuffer_.get()); } if (!encodedMax.empty()) { - plainDecode(encodedMax, &max_); + T decoded_max; + plainDecode(encodedMax, &decoded_max); + copy(decoded_max, &max_, maxBuffer_.get()); } hasMinMax_ = hasMinMax; } diff --git a/velox/dwio/parquet/writer/arrow/ThriftInternal.h b/velox/dwio/parquet/writer/arrow/ThriftInternal.h index 2ce0e151741..fa3a005d01f 100644 --- a/velox/dwio/parquet/writer/arrow/ThriftInternal.h +++ b/velox/dwio/parquet/writer/arrow/ThriftInternal.h @@ -27,12 +27,8 @@ #include #include -// TCompactProtocol requires some #defines to work right. -#define SIGNED_RIGHT_SHIFT_IS 1 -#define ARITHMETIC_RIGHT_SHIFT 1 -#include -#include -#include +#include +#include #include "velox/common/base/Exceptions.h" #include "velox/dwio/parquet/writer/arrow/Exception.h" @@ -52,33 +48,33 @@ namespace facebook::velox::parquet::arrow { // Unsafe enum converters (input is not checked for validity) static inline Type::type fromThriftUnsafe( - facebook::velox::parquet::thrift::Type::type type) { + facebook::velox::parquet::thrift::Type type) { return static_cast(type); } static inline ConvertedType::type fromThriftUnsafe( - facebook::velox::parquet::thrift::ConvertedType::type type) { + facebook::velox::parquet::thrift::ConvertedType type) { // Item 0 is NONE. return static_cast(static_cast(type) + 1); } static inline Repetition::type fromThriftUnsafe( - facebook::velox::parquet::thrift::FieldRepetitionType::type type) { + facebook::velox::parquet::thrift::FieldRepetitionType type) { return static_cast(type); } static inline Encoding::type fromThriftUnsafe( - facebook::velox::parquet::thrift::Encoding::type type) { + facebook::velox::parquet::thrift::Encoding type) { return static_cast(type); } static inline PageType::type fromThriftUnsafe( - facebook::velox::parquet::thrift::PageType::type type) { + facebook::velox::parquet::thrift::PageType type) { return static_cast(type); } static inline Compression::type fromThriftUnsafe( - facebook::velox::parquet::thrift::CompressionCodec::type type) { + facebook::velox::parquet::thrift::CompressionCodec type) { switch (type) { case facebook::velox::parquet::thrift::CompressionCodec::UNCOMPRESSED: return Compression::UNCOMPRESSED; @@ -103,48 +99,44 @@ static inline Compression::type fromThriftUnsafe( } static inline BoundaryOrder::type fromThriftUnsafe( - facebook::velox::parquet::thrift::BoundaryOrder::type type) { + facebook::velox::parquet::thrift::BoundaryOrder type) { return static_cast(type); } namespace internal { template -struct ThriftenumTypeTraits {}; +struct ThriftEnumTypeTraits {}; template <> -struct ThriftenumTypeTraits<::facebook::velox::parquet::thrift::Type::type> { - using Parquetenum = Type; +struct ThriftEnumTypeTraits<::facebook::velox::parquet::thrift::Type> { + using ParquetEnum = Type; }; template <> -struct ThriftenumTypeTraits< - ::facebook::velox::parquet::thrift::ConvertedType::type> { - using Parquetenum = ConvertedType; +struct ThriftEnumTypeTraits<::facebook::velox::parquet::thrift::ConvertedType> { + using ParquetEnum = ConvertedType; }; template <> -struct ThriftenumTypeTraits< - ::facebook::velox::parquet::thrift::FieldRepetitionType::type> { - using Parquetenum = Repetition; +struct ThriftEnumTypeTraits< + ::facebook::velox::parquet::thrift::FieldRepetitionType> { + using ParquetEnum = Repetition; }; template <> -struct ThriftenumTypeTraits< - ::facebook::velox::parquet::thrift::Encoding::type> { - using Parquetenum = Encoding; +struct ThriftEnumTypeTraits<::facebook::velox::parquet::thrift::Encoding> { + using ParquetEnum = Encoding; }; template <> -struct ThriftenumTypeTraits< - ::facebook::velox::parquet::thrift::PageType::type> { - using Parquetenum = PageType; +struct ThriftEnumTypeTraits<::facebook::velox::parquet::thrift::PageType> { + using ParquetEnum = PageType; }; template <> -struct ThriftenumTypeTraits< - ::facebook::velox::parquet::thrift::BoundaryOrder::type> { - using Parquetenum = BoundaryOrder; +struct ThriftEnumTypeTraits<::facebook::velox::parquet::thrift::BoundaryOrder> { + using ParquetEnum = BoundaryOrder; }; // If the parquet file is corrupted it is possible the enum value decoded. @@ -155,7 +147,7 @@ struct ThriftenumTypeTraits< template < typename enumType, typename enumTypeRaw = typename std::underlying_type::type> -inline static enumTypeRaw loadenumRaw(const enumType* in) { +inline static enumTypeRaw loadEnumRaw(const enumType* in) { enumTypeRaw rawValue; // Use memcpy(), as a regular cast would be undefined behaviour on invalid // values. @@ -165,34 +157,34 @@ inline static enumTypeRaw loadenumRaw(const enumType* in) { template struct SafeLoader { - using ApiTypeenum = typename ApiType::type; - using ApiTypeRawenum = typename std::underlying_type::type; + using ApiTypeEnum = typename ApiType::type; + using ApiTypeRawEnum = typename std::underlying_type::type; template - inline static ApiTypeRawenum loadRaw(const ThriftType* in) { + inline static ApiTypeRawEnum loadRaw(const ThriftType* in) { static_assert( - sizeof(ApiTypeenum) == sizeof(ThriftType), + sizeof(ApiTypeEnum) == sizeof(ThriftType), "parquet type should always be the same size as thrift type"); - return static_cast(loadenumRaw(in)); + return static_cast(loadEnumRaw(in)); } template - inline static ApiTypeenum loadChecked( + inline static ApiTypeEnum loadChecked( const typename std::enable_if::type* in) { auto rawValue = loadRaw(in); if (ARROW_PREDICT_FALSE( - rawValue >= static_cast(ApiType::kUndefined))) { + rawValue >= static_cast(ApiType::kUndefined))) { return ApiType::kUndefined; } return fromThriftUnsafe(static_cast(rawValue)); } template - inline static ApiTypeenum loadChecked( + inline static ApiTypeEnum loadChecked( const typename std::enable_if::type* in) { auto rawValue = loadRaw(in); if (ARROW_PREDICT_FALSE( - rawValue >= static_cast(ApiType::kUndefined) || + rawValue >= static_cast(ApiType::kUndefined) || rawValue < 0)) { return ApiType::kUndefined; } @@ -200,8 +192,8 @@ struct SafeLoader { } template - inline static ApiTypeenum load(const ThriftType* in) { - return loadChecked::value>(in); + inline static ApiTypeEnum load(const ThriftType* in) { + return loadChecked::value>(in); } }; @@ -211,15 +203,15 @@ struct SafeLoader { template < typename ThriftType, - typename Parquetenum = - typename internal::ThriftenumTypeTraits::Parquetenum> -inline typename Parquetenum::type loadenumSafe(const ThriftType* in) { - return internal::SafeLoader::load(in); + typename ParquetEnum = + typename internal::ThriftEnumTypeTraits::ParquetEnum> +inline typename ParquetEnum::type loadEnumSafe(const ThriftType* in) { + return internal::SafeLoader::load(in); } -inline typename Compression::type loadenumSafe( - const facebook::velox::parquet::thrift::CompressionCodec::type* in) { - const auto rawValue = internal::loadenumRaw(in); +inline typename Compression::type loadEnumSafe( + const facebook::velox::parquet::thrift::CompressionCodec* in) { + const auto rawValue = internal::loadEnumRaw(in); // Check bounds manually, as Compression::type doesn't have the same values // as facebook::velox::parquet::thrift::CompressionCodec. const auto minValue = static_cast( @@ -237,50 +229,49 @@ inline typename Compression::type loadenumSafe( static inline AadMetadata fromThrift( facebook::velox::parquet::thrift::AesGcmV1 aesGcmV1) { return AadMetadata{ - aesGcmV1.aad_prefix, - aesGcmV1.aad_file_unique, - aesGcmV1.supply_aad_prefix}; + *aesGcmV1.aad_prefix(), + *aesGcmV1.aad_file_unique(), + *aesGcmV1.supply_aad_prefix()}; } static inline AadMetadata fromThrift( facebook::velox::parquet::thrift::AesGcmCtrV1 aesGcmCtrV1) { return AadMetadata{ - aesGcmCtrV1.aad_prefix, - aesGcmCtrV1.aad_file_unique, - aesGcmCtrV1.supply_aad_prefix}; + *aesGcmCtrV1.aad_prefix(), + *aesGcmCtrV1.aad_file_unique(), + *aesGcmCtrV1.supply_aad_prefix()}; } static inline EncryptionAlgorithm fromThrift( facebook::velox::parquet::thrift::EncryptionAlgorithm encryption) { + using Type = facebook::velox::parquet::thrift::EncryptionAlgorithm; EncryptionAlgorithm encryptionAlgorithm; - - if (encryption.__isset.AES_GCM_V1) { + if (encryption.getType() == Type::Type::AES_GCM_V1) { encryptionAlgorithm.algorithm = ParquetCipher::kAesGcmV1; - encryptionAlgorithm.aad = fromThrift(encryption.AES_GCM_V1); - } else if (encryption.__isset.AES_GCM_CTR_V1) { + encryptionAlgorithm.aad = fromThrift(encryption.get_AES_GCM_V1()); + } else if (encryption.getType() == Type::Type::AES_GCM_CTR_V1) { encryptionAlgorithm.algorithm = ParquetCipher::kAesGcmCtrV1; - encryptionAlgorithm.aad = fromThrift(encryption.AES_GCM_CTR_V1); + encryptionAlgorithm.aad = fromThrift(encryption.get_AES_GCM_CTR_V1()); } else { throw ParquetException("Unsupported algorithm"); } return encryptionAlgorithm; } -static inline SortingColumn fromThrift( - facebook::velox::parquet::thrift::SortingColumn thriftSortingColumn) { - SortingColumn sortingColumn; - sortingColumn.columnIdx = thriftSortingColumn.column_idx; - sortingColumn.nullsFirst = thriftSortingColumn.nulls_first; - sortingColumn.descending = thriftSortingColumn.descending; - return sortingColumn; +static inline SortingColumn FromThrift( + facebook::velox::parquet::thrift::SortingColumn thrift_sorting_column) { + SortingColumn sorting_column; + sorting_column.columnIdx = *thrift_sorting_column.column_idx(); + sorting_column.nullsFirst = *thrift_sorting_column.nulls_first(); + sorting_column.descending = *thrift_sorting_column.descending(); + return sorting_column; } // ----------------------------------------------------------------------. // Convert Thrift enums from Parquet enums. -static inline facebook::velox::parquet::thrift::Type::type toThrift( - Type::type type) { - return static_cast(type); +static inline facebook::velox::parquet::thrift::Type toThrift(Type::type type) { + return static_cast(type); } static fmt::underlying_t formatAs( @@ -288,7 +279,7 @@ static fmt::underlying_t formatAs( return fmt::underlying(type); } -static inline facebook::velox::parquet::thrift::ConvertedType::type toThrift( +static inline facebook::velox::parquet::thrift::ConvertedType toThrift( ConvertedType::type type) { // Item 0 is NONE. const int typeValue = static_cast(type); @@ -296,22 +287,22 @@ static inline facebook::velox::parquet::thrift::ConvertedType::type toThrift( // it is forbidden to emit "NA" (PARQUET-1990) VELOX_DCHECK_NE(typeValue, static_cast(ConvertedType::kNa)); VELOX_DCHECK_NE(typeValue, static_cast(ConvertedType::kUndefined)); - return static_cast( + return static_cast( typeValue - 1); } -static inline facebook::velox::parquet::thrift::FieldRepetitionType::type -toThrift(Repetition::type type) { - return static_cast< - facebook::velox::parquet::thrift::FieldRepetitionType::type>(type); +static inline facebook::velox::parquet::thrift::FieldRepetitionType toThrift( + Repetition::type type) { + return static_cast( + type); } -static inline facebook::velox::parquet::thrift::Encoding::type toThrift( +static inline facebook::velox::parquet::thrift::Encoding toThrift( Encoding::type type) { - return static_cast(type); + return static_cast(type); } -static inline facebook::velox::parquet::thrift::CompressionCodec::type toThrift( +static inline facebook::velox::parquet::thrift::CompressionCodec toThrift( Compression::type type) { switch (type) { case Compression::UNCOMPRESSED: @@ -337,14 +328,13 @@ static inline facebook::velox::parquet::thrift::CompressionCodec::type toThrift( } } -static inline facebook::velox::parquet::thrift::BoundaryOrder::type toThrift( +static inline facebook::velox::parquet::thrift::BoundaryOrder toThrift( BoundaryOrder::type type) { switch (type) { case BoundaryOrder::kUnordered: case BoundaryOrder::kAscending: case BoundaryOrder::kDescending: - return static_cast( - type); + return static_cast(type); default: VELOX_DCHECK(false, "Cannot reach here"); return facebook::velox::parquet::thrift::BoundaryOrder::UNORDERED; @@ -354,9 +344,9 @@ static inline facebook::velox::parquet::thrift::BoundaryOrder::type toThrift( static inline facebook::velox::parquet::thrift::SortingColumn toThrift( SortingColumn sortingColumn) { facebook::velox::parquet::thrift::SortingColumn thriftSortingColumn; - thriftSortingColumn.column_idx = sortingColumn.columnIdx; - thriftSortingColumn.descending = sortingColumn.descending; - thriftSortingColumn.nulls_first = sortingColumn.nullsFirst; + thriftSortingColumn.column_idx() = sortingColumn.columnIdx; + thriftSortingColumn.descending() = sortingColumn.descending; + thriftSortingColumn.nulls_first() = sortingColumn.nullsFirst; return thriftSortingColumn; } @@ -364,26 +354,26 @@ static inline facebook::velox::parquet::thrift::Statistics toThrift( const EncodedStatistics& stats) { facebook::velox::parquet::thrift::Statistics Statistics; if (stats.hasMin) { - Statistics.__set_min_value(stats.min()); + Statistics.min_value() = stats.min(); // If the order is SIGNED, then the old min value must be set too. // This for backward compatibility. if (stats.isSigned()) { - Statistics.__set_min(stats.min()); + Statistics.min() = stats.min(); } } if (stats.hasMax) { - Statistics.__set_max_value(stats.max()); + Statistics.max_value() = stats.max(); // If the order is SIGNED, then the old max value must be set too. // This for backward compatibility. if (stats.isSigned()) { - Statistics.__set_max(stats.max()); + Statistics.max() = stats.max(); } } if (stats.hasNullCount) { - Statistics.__set_null_count(stats.nullCount); + Statistics.null_count() = stats.nullCount; } if (stats.hasDistinctCount) { - Statistics.__set_distinct_count(stats.distinctCount); + Statistics.distinct_count() = stats.distinctCount; } return Statistics; @@ -392,11 +382,11 @@ static inline facebook::velox::parquet::thrift::Statistics toThrift( static inline facebook::velox::parquet::thrift::AesGcmV1 toAesGcmV1Thrift( AadMetadata aad) { facebook::velox::parquet::thrift::AesGcmV1 aesGcmV1; - // Aad_file_unique is always set. - aesGcmV1.__set_aad_file_unique(aad.aadFileUnique); - aesGcmV1.__set_supply_aad_prefix(aad.supplyAadPrefix); + // aad_file_unique is always set + aesGcmV1.aad_file_unique() = aad.aadFileUnique; + aesGcmV1.supply_aad_prefix() = aad.supplyAadPrefix; if (!aad.aadPrefix.empty()) { - aesGcmV1.__set_aad_prefix(aad.aadPrefix); + aesGcmV1.aad_prefix() = aad.aadPrefix; } return aesGcmV1; } @@ -404,11 +394,11 @@ static inline facebook::velox::parquet::thrift::AesGcmV1 toAesGcmV1Thrift( static inline facebook::velox::parquet::thrift::AesGcmCtrV1 toAesGcmCtrV1Thrift( AadMetadata aad) { facebook::velox::parquet::thrift::AesGcmCtrV1 aesGcmCtrV1; - // Aad_file_unique is always set. - aesGcmCtrV1.__set_aad_file_unique(aad.aadFileUnique); - aesGcmCtrV1.__set_supply_aad_prefix(aad.supplyAadPrefix); + // aad_file_unique is always set + aesGcmCtrV1.aad_file_unique() = aad.aadFileUnique; + aesGcmCtrV1.supply_aad_prefix() = aad.supplyAadPrefix; if (!aad.aadPrefix.empty()) { - aesGcmCtrV1.__set_aad_prefix(aad.aadPrefix); + aesGcmCtrV1.aad_prefix() = aad.aadPrefix; } return aesGcmCtrV1; } @@ -417,10 +407,9 @@ static inline facebook::velox::parquet::thrift::EncryptionAlgorithm toThrift( EncryptionAlgorithm encryption) { facebook::velox::parquet::thrift::EncryptionAlgorithm encryptionAlgorithm; if (encryption.algorithm == ParquetCipher::kAesGcmV1) { - encryptionAlgorithm.__set_AES_GCM_V1(toAesGcmV1Thrift(encryption.aad)); + encryptionAlgorithm.set_AES_GCM_V1(toAesGcmV1Thrift(encryption.aad)); } else { - encryptionAlgorithm.__set_AES_GCM_CTR_V1( - toAesGcmCtrV1Thrift(encryption.aad)); + encryptionAlgorithm.set_AES_GCM_CTR_V1(toAesGcmCtrV1Thrift(encryption.aad)); } return encryptionAlgorithm; } @@ -428,8 +417,6 @@ static inline facebook::velox::parquet::thrift::EncryptionAlgorithm toThrift( // ----------------------------------------------------------------------. // Thrift struct serialization / deserialization utilities. -using ThriftBuffer = apache::thrift::transport::TMemoryBuffer; - class ThriftDeserializer { public: explicit ThriftDeserializer(const ReaderProperties& properties) @@ -474,46 +461,24 @@ class ThriftDeserializer { } } - private: - // On Thrift 0.14.0+, we want to use TConfiguration to raise the max message - // size limit (ARROW-13655). If we wanted to protect against huge messages, - // we could do it ourselves since we know the message size up front. - std::shared_ptr createReadOnlyMemoryBuffer( - uint8_t* buf, - uint32_t len) { -#if PARQUET_THRIFT_VERSION_MAJOR > 0 || PARQUET_THRIFT_VERSION_MINOR >= 14 - auto conf = std::make_shared(); - conf->setMaxMessageSize(std::numeric_limits::max()); - return std::make_shared( - buf, len, ThriftBuffer::OBSERVE, conf); -#else - return std::make_shared(buf, len); -#endif - } - template void deserializeUnencryptedMessage( const uint8_t* buf, uint32_t* len, - T* deserializedMsg) { - // Deserialize msg bytes into c++ thrift msg using memory transport. - auto tmemTransport = - createReadOnlyMemoryBuffer(const_cast(buf), *len); - apache::thrift::protocol::TCompactProtocolFactoryT - tprotoFactory; - // Protect against CPU and memory bombs. - tprotoFactory.setStringSizeLimit(stringSizeLimit_); - tprotoFactory.setContainerSizeLimit(containerSizeLimit_); - auto tproto = tprotoFactory.getProtocol(tmemTransport); + T* deserialized_msg) { + // Protect against CPU and memory bombs + apache::thrift::CompactProtocolReader reader( + stringSizeLimit_, containerSizeLimit_); + // Deserialize msg bytes into C++ Thrift msg using memory transport. + folly::IOBuf buffer(folly::IOBuf::WRAP_BUFFER, folly::ByteRange(buf, *len)); + reader.setInput(&buffer); try { - deserializedMsg->read(tproto.get()); + *len = deserialized_msg->read(&reader); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't deserialize thrift: " << e.what() << "\n"; throw ParquetException(ss.str()); } - uint32_t bytesLeft = tmemTransport->available_read(); - *len = *len - bytesLeft; } const int32_t stringSizeLimit_; @@ -526,25 +491,25 @@ class ThriftDeserializer { /// valid. To treat it as a string. class ThriftSerializer { public: - explicit ThriftSerializer(int initialBufferSize = 1024) - : memBuffer_(std::make_shared(initialBufferSize)) { - apache::thrift::protocol::TCompactProtocolFactoryT factory; - protocol_ = factory.getProtocol(memBuffer_); + explicit ThriftSerializer() : buffer_(), output_buffer_(), writer_() { + writer_.setOutput(&buffer_); } /// Serialize obj into a memory buffer. The result is returned in buffer/len. /// The memory returned is owned by this object and will be invalid when /// another object is serialized. template - void serializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) { - serializeObject(obj); - memBuffer_->getBuffer(buffer, len); + void SerializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) { + output_buffer_.clear(); + SerializeToString(obj, output_buffer_); + *buffer = reinterpret_cast(output_buffer_.data()); + *len = output_buffer_.size(); } template - void serializeToString(const T* obj, std::string* result) { + void SerializeToString(const T* obj, std::string& result) { serializeObject(obj); - *result = memBuffer_->getBufferAsString(); + buffer_.appendToString(result); } template @@ -554,7 +519,7 @@ class ThriftSerializer { const std::shared_ptr& Encryptor = NULLPTR) { uint8_t* outBuffer; uint32_t outLength; - serializeToBuffer(obj, &outLength, &outBuffer); + SerializeToBuffer(obj, &outLength, &outBuffer); // Obj is not encrypted. if (Encryptor == NULLPTR) { @@ -569,8 +534,8 @@ class ThriftSerializer { template void serializeObject(const T* obj) { try { - memBuffer_->resetBuffer(); - obj->write(protocol_.get()); + buffer_.clearAndTryReuseLargestBuffer(); + obj->write(&writer_); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't serialize thrift: " << e.what() << "\n"; @@ -595,8 +560,9 @@ class ThriftSerializer { return static_cast(cipherBufferLen); } - std::shared_ptr memBuffer_; - std::shared_ptr protocol_; + folly::IOBufQueue buffer_; + std::string output_buffer_; + apache::thrift::CompactProtocolWriter writer_; }; } // namespace facebook::velox::parquet::arrow diff --git a/velox/dwio/parquet/writer/arrow/Types.cpp b/velox/dwio/parquet/writer/arrow/Types.cpp index 4dc5f465883..4437574634a 100644 --- a/velox/dwio/parquet/writer/arrow/Types.cpp +++ b/velox/dwio/parquet/writer/arrow/Types.cpp @@ -399,60 +399,70 @@ std::shared_ptr LogicalType::fromConvertedType( std::shared_ptr LogicalType::fromThrift( const facebook::velox::parquet::thrift::LogicalType& type) { - if (type.__isset.STRING) { - return StringLogicalType::make(); - } else if (type.__isset.MAP) { - return MapLogicalType::make(); - } else if (type.__isset.LIST) { - return ListLogicalType::make(); - } else if (type.__isset.ENUM) { - return EnumLogicalType::make(); - } else if (type.__isset.DECIMAL) { - return DecimalLogicalType::make(type.DECIMAL.precision, type.DECIMAL.scale); - } else if (type.__isset.DATE) { - return DateLogicalType::make(); - } else if (type.__isset.TIME) { - LogicalType::TimeUnit::Unit unit; - if (type.TIME.unit.__isset.MILLIS) { - unit = LogicalType::TimeUnit::kMillis; - } else if (type.TIME.unit.__isset.MICROS) { - unit = LogicalType::TimeUnit::kMicros; - } else if (type.TIME.unit.__isset.NANOS) { - unit = LogicalType::TimeUnit::kNanos; - } else { - unit = LogicalType::TimeUnit::kUnknown; + using Type = facebook::velox::parquet::thrift::LogicalType::Type; + using TimeUnitType = facebook::velox::parquet::thrift::TimeUnit::Type; + switch (type.getType()) { + case Type::STRING: + return StringLogicalType::make(); + case Type::MAP: + return MapLogicalType::make(); + case Type::LIST: + return ListLogicalType::make(); + case Type::ENUM: + return EnumLogicalType::make(); + case Type::DECIMAL: + return DecimalLogicalType::make( + *type.get_DECIMAL().precision(), *type.get_DECIMAL().scale()); + case Type::DATE: + return DateLogicalType::make(); + case Type::TIME: { + const auto& thrift_unit = type.get_TIME().unit(); + LogicalType::TimeUnit::Unit unit; + if (thrift_unit->getType() == TimeUnitType::MILLIS) { + unit = LogicalType::TimeUnit::kMillis; + } else if (thrift_unit->getType() == TimeUnitType::MICROS) { + unit = LogicalType::TimeUnit::kMicros; + } else if (thrift_unit->getType() == TimeUnitType::NANOS) { + unit = LogicalType::TimeUnit::kNanos; + } else { + unit = LogicalType::TimeUnit::kUnknown; + } + return TimeLogicalType::make(*type.get_TIME().isAdjustedToUTC(), unit); } - return TimeLogicalType::make(type.TIME.isAdjustedToUTC, unit); - } else if (type.__isset.TIMESTAMP) { - LogicalType::TimeUnit::Unit unit; - if (type.TIMESTAMP.unit.__isset.MILLIS) { - unit = LogicalType::TimeUnit::kMillis; - } else if (type.TIMESTAMP.unit.__isset.MICROS) { - unit = LogicalType::TimeUnit::kMicros; - } else if (type.TIMESTAMP.unit.__isset.NANOS) { - unit = LogicalType::TimeUnit::kNanos; - } else { - unit = LogicalType::TimeUnit::kUnknown; + case Type::TIMESTAMP: { + const auto& thrift_unit = type.get_TIMESTAMP().unit(); + LogicalType::TimeUnit::Unit unit; + if (thrift_unit->getType() == TimeUnitType::MILLIS) { + unit = LogicalType::TimeUnit::kMillis; + } else if (thrift_unit->getType() == TimeUnitType::MICROS) { + unit = LogicalType::TimeUnit::kMicros; + } else if (thrift_unit->getType() == TimeUnitType::NANOS) { + unit = LogicalType::TimeUnit::kNanos; + } else { + unit = LogicalType::TimeUnit::kUnknown; + } + return TimestampLogicalType::make( + *type.get_TIMESTAMP().isAdjustedToUTC(), unit); } - return TimestampLogicalType::make(type.TIMESTAMP.isAdjustedToUTC, unit); // TODO(tpboudreau): activate the commented code after parquet.thrift - // recognizes IntervalType as a LogicalType. - // } else if (type.__isset.INTERVAL) { + // recognizes IntervalType as a LogicalType + // case Type::INTERVAL: // return IntervalLogicalType::make(); - } else if (type.__isset.INTEGER) { - return IntLogicalType::make( - static_cast(type.INTEGER.bitWidth), type.INTEGER.isSigned); - } else if (type.__isset.UNKNOWN) { - return NullLogicalType::make(); - } else if (type.__isset.JSON) { - return JsonLogicalType::make(); - } else if (type.__isset.BSON) { - return BsonLogicalType::make(); - } else if (type.__isset.UUID) { - return UuidLogicalType::make(); - } else { - throw ParquetException( - "Metadata contains Thrift LogicalType that is not recognized"); + case Type::INTEGER: + return IntLogicalType::make( + static_cast(*type.get_INTEGER().bitWidth()), + *type.get_INTEGER().isSigned()); + case Type::UNKNOWN: + return NullLogicalType::make(); + case Type::JSON: + return JsonLogicalType::make(); + case Type::BSON: + return BsonLogicalType::make(); + case Type::UUID: + return UuidLogicalType::make(); + default: + throw ParquetException( + "Metadata contains Thrift LogicalType that is not recognized"); } } @@ -898,7 +908,7 @@ class LogicalType::Impl::Inapplicable : public virtual LogicalType::Impl { facebook::velox::parquet::thrift::LogicalType toThrift() const override { \ facebook::velox::parquet::thrift::LogicalType type; \ facebook::velox::parquet::thrift::t___ subtype; \ - type.__set_##s___(subtype); \ + type.set_##s___(subtype); \ return type; \ } @@ -1093,9 +1103,9 @@ facebook::velox::parquet::thrift::LogicalType LogicalType::Impl::Decimal::toThrift() const { facebook::velox::parquet::thrift::LogicalType type; facebook::velox::parquet::thrift::DecimalType decimalType; - decimalType.__set_precision(precision_); - decimalType.__set_scale(scale_); - type.__set_DECIMAL(decimalType); + decimalType.precision() = precision_; + decimalType.scale() = scale_; + type.set_DECIMAL(decimalType); return type; } @@ -1257,17 +1267,17 @@ LogicalType::Impl::Time::toThrift() const { static_cast(LogicalType::TimeUnit::kUnknown)); if (unit_ == LogicalType::TimeUnit::kMillis) { facebook::velox::parquet::thrift::MilliSeconds millis; - timeUnit.__set_MILLIS(millis); + timeUnit.set_MILLIS(millis); } else if (unit_ == LogicalType::TimeUnit::kMicros) { facebook::velox::parquet::thrift::MicroSeconds micros; - timeUnit.__set_MICROS(micros); + timeUnit.set_MICROS(micros); } else if (unit_ == LogicalType::TimeUnit::kNanos) { facebook::velox::parquet::thrift::NanoSeconds nanos; - timeUnit.__set_NANOS(nanos); + timeUnit.set_NANOS(nanos); } - timeType.__set_isAdjustedToUTC(adjusted_); - timeType.__set_unit(timeUnit); - type.__set_TIME(timeType); + timeType.isAdjustedToUTC() = adjusted_; + timeType.unit() = timeUnit; + type.set_TIME(timeType); return type; } @@ -1426,17 +1436,17 @@ LogicalType::Impl::Timestamp::toThrift() const { static_cast(LogicalType::TimeUnit::kUnknown)); if (unit_ == LogicalType::TimeUnit::kMillis) { facebook::velox::parquet::thrift::MilliSeconds millis; - timeUnit.__set_MILLIS(millis); + timeUnit.set_MILLIS(millis); } else if (unit_ == LogicalType::TimeUnit::kMicros) { facebook::velox::parquet::thrift::MicroSeconds micros; - timeUnit.__set_MICROS(micros); + timeUnit.set_MICROS(micros); } else if (unit_ == LogicalType::TimeUnit::kNanos) { facebook::velox::parquet::thrift::NanoSeconds nanos; - timeUnit.__set_NANOS(nanos); + timeUnit.set_NANOS(nanos); } - timestampType.__set_isAdjustedToUTC(adjusted_); - timestampType.__set_unit(timeUnit); - type.__set_TIMESTAMP(timestampType); + timestampType.isAdjustedToUTC() = adjusted_; + timestampType.unit() = timeUnit; + type.set_TIMESTAMP(timestampType); return type; } @@ -1628,9 +1638,9 @@ facebook::velox::parquet::thrift::LogicalType LogicalType::Impl::Int::toThrift() facebook::velox::parquet::thrift::LogicalType type; facebook::velox::parquet::thrift::IntType intType; VELOX_DCHECK(width_ == 64 || width_ == 32 || width_ == 16 || width_ == 8); - intType.__set_bitWidth(static_cast(width_)); - intType.__set_isSigned(signed_); - type.__set_INTEGER(intType); + intType.bitWidth() = static_cast(width_); + intType.isSigned() = signed_; + type.set_INTEGER(intType); return type; } diff --git a/velox/dwio/parquet/writer/arrow/Types.h b/velox/dwio/parquet/writer/arrow/Types.h index 0bf8274a542..2116000d153 100644 --- a/velox/dwio/parquet/writer/arrow/Types.h +++ b/velox/dwio/parquet/writer/arrow/Types.h @@ -27,7 +27,7 @@ #include #include -#include "velox/dwio/parquet/thrift/ParquetThriftTypes.h" +#include "velox/dwio/parquet/thrift/ParquetThrift.h" #include "velox/dwio/parquet/writer/arrow/Platform.h" #include "velox/dwio/parquet/writer/arrow/util/Compression.h" diff --git a/velox/dwio/parquet/writer/arrow/tests/BloomFilter.cpp b/velox/dwio/parquet/writer/arrow/tests/BloomFilter.cpp index b60ef1f10ae..e9625cdd078 100644 --- a/velox/dwio/parquet/writer/arrow/tests/BloomFilter.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/BloomFilter.cpp @@ -77,26 +77,34 @@ static constexpr uint32_t kBloomFilterHeaderSizeGuess = 256; static ::arrow::Status validateBloomFilterHeader( const facebook::velox::parquet::thrift::BloomFilterHeader& header) { - if (!header.algorithm.__isset.BLOCK) { + if (header.algorithm()->getType() != + facebook::velox::parquet::thrift::BloomFilterAlgorithm::Type::BLOCK) { return ::arrow::Status::Invalid( - "Unsupported Bloom filter algorithm: ", header.algorithm, "."); + "Unsupported Bloom filter algorithm: ", + header.algorithm()->getType(), + "."); } - if (!header.hash.__isset.XXHASH) { + if (header.hash()->getType() != + facebook::velox::parquet::thrift::BloomFilterHash::Type::XXHASH) { return ::arrow::Status::Invalid( - "Unsupported Bloom filter hash: ", header.hash, "."); + "Unsupported Bloom filter hash: ", header.hash()->getType(), "."); } - if (!header.compression.__isset.UNCOMPRESSED) { + if (header.compression()->getType() != + facebook::velox::parquet::thrift::BloomFilterCompression::Type:: + UNCOMPRESSED) { return ::arrow::Status::Invalid( - "Unsupported Bloom filter compression: ", header.compression, "."); + "Unsupported Bloom filter compression: ", + header.compression()->getType(), + "."); } - if (header.numBytes <= 0 || - static_cast(header.numBytes) > + if (*header.numBytes() <= 0 || + static_cast(*header.numBytes()) > BloomFilter::kMaximumBloomFilterBytes) { std::stringstream ss; - ss << "Bloom filter size is incorrect: " << header.numBytes + ss << "Bloom filter size is incorrect: " << *header.numBytes() << ". Must be in range (" << 0 << ", " << BloomFilter::kMaximumBloomFilterBytes << "]."; return ::arrow::Status::Invalid(ss.str()); @@ -134,7 +142,7 @@ BlockSplitBloomFilter BlockSplitBloomFilter::deserialize( } PARQUET_THROW_NOT_OK(validateBloomFilterHeader(header)); - const int32_t bloomFilterSize = header.numBytes; + const int32_t bloomFilterSize = *header.numBytes(); if (bloomFilterSize + headerSize <= headerBuf->size()) { // The bloom filter data is entirely contained in the buffer we just read. // => Just return it. @@ -175,21 +183,21 @@ void BlockSplitBloomFilter::writeTo(ArrowOutputStream* sink) const { throw ParquetException( "BloomFilter does not support Algorithm other than BLOCK"); } - header.algorithm.__set_BLOCK( + header.algorithm()->set_BLOCK( facebook::velox::parquet::thrift::SplitBlockAlgorithm()); if (ARROW_PREDICT_FALSE(hashStrategy_ != HashStrategy::XXHASH)) { throw ParquetException( "BloomFilter does not support Hash other than XXHASH"); } - header.hash.__set_XXHASH(facebook::velox::parquet::thrift::XxHash()); + header.hash()->set_XXHASH(facebook::velox::parquet::thrift::XxHash()); if (ARROW_PREDICT_FALSE( compressionStrategy_ != CompressionStrategy::UNCOMPRESSED)) { throw ParquetException( "BloomFilter does not support Compression other than UNCOMPRESSED"); } - header.compression.__set_UNCOMPRESSED( + header.compression()->set_UNCOMPRESSED( facebook::velox::parquet::thrift::Uncompressed()); - header.__set_numBytes(numBytes_); + header.numBytes() = numBytes_; ThriftSerializer serializer; serializer.serialize(&header, sink); diff --git a/velox/dwio/parquet/writer/arrow/tests/ColumnReader.cpp b/velox/dwio/parquet/writer/arrow/tests/ColumnReader.cpp index 916f93b76e4..0a94484419a 100644 --- a/velox/dwio/parquet/writer/arrow/tests/ColumnReader.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/ColumnReader.cpp @@ -212,34 +212,35 @@ namespace { template EncodedStatistics extractStatsFromHeader(const H& header) { EncodedStatistics pageStatistics; - if (!header.__isset.statistics) { + if (!header.statistics()) { return pageStatistics; } - const facebook::velox::parquet::thrift::Statistics& stats = header.statistics; + const facebook::velox::parquet::thrift::Statistics& stats = + *header.statistics(); // Use the new V2 min-max statistics over the former one if it is filled. - if (stats.__isset.max_value || stats.__isset.min_value) { + if (stats.max_value() || stats.min_value()) { // TODO: check if the column_order is TYPE_DEFINED_ORDER. - if (stats.__isset.max_value) { - pageStatistics.setMax(stats.max_value); + if (stats.max_value()) { + pageStatistics.setMax(*stats.max_value()); } - if (stats.__isset.min_value) { - pageStatistics.setMin(stats.min_value); + if (stats.min_value()) { + pageStatistics.setMin(*stats.min_value()); } - } else if (stats.__isset.max || stats.__isset.min) { + } else if (stats.max() || stats.min()) { // TODO: check created_by to see if it is corrupted for some types. // TODO: check if the sort_order is SIGNED. - if (stats.__isset.max) { - pageStatistics.setMax(stats.max); + if (stats.max()) { + pageStatistics.setMax(*stats.max()); } - if (stats.__isset.min) { - pageStatistics.setMin(stats.min); + if (stats.min()) { + pageStatistics.setMin(*stats.min()); } } - if (stats.__isset.null_count) { - pageStatistics.setNullCount(stats.null_count); + if (stats.null_count()) { + pageStatistics.setNullCount(*stats.null_count()); } - if (stats.__isset.distinct_count) { - pageStatistics.setDistinctCount(stats.distinct_count); + if (stats.distinct_count()) { + pageStatistics.setDistinctCount(*stats.distinct_count()); } return pageStatistics; } @@ -399,49 +400,52 @@ void SerializedPageReader::updateDecryption( bool SerializedPageReader::shouldSkipPage( EncodedStatistics* dataPageStatistics) { - const PageType::type pageType = loadenumSafe(¤tPageHeader_.type); - if (pageType == PageType::kDataPage) { + const PageType::type page_type = + loadEnumSafe(¤tPageHeader_.type().value()); + if (page_type == PageType::kDataPage) { const facebook::velox::parquet::thrift::DataPageHeader& header = - currentPageHeader_.data_page_header; - checkNumValuesInHeader(header.num_values); + *currentPageHeader_.data_page_header(); + checkNumValuesInHeader(*header.num_values()); *dataPageStatistics = extractStatsFromHeader(header); - seenNumValues_ += header.num_values; + seenNumValues_ += *header.num_values(); if (dataPageFilter_) { const EncodedStatistics* filterStatistics = dataPageStatistics->isSet() ? dataPageStatistics : nullptr; DataPageStats dataPageStats( - filterStatistics, header.num_values, std::nullopt); + filterStatistics, + *header.num_values(), + /*numRows=*/std::nullopt); if (dataPageFilter_(dataPageStats)) { return true; } } - } else if (pageType == PageType::kDataPageV2) { + } else if (page_type == PageType::kDataPageV2) { const facebook::velox::parquet::thrift::DataPageHeaderV2& header = - currentPageHeader_.data_page_header_v2; - checkNumValuesInHeader(header.num_values); - if (header.num_rows < 0) { + *currentPageHeader_.data_page_header_v2(); + checkNumValuesInHeader(*header.num_values()); + if (*header.num_rows() < 0) { throw ParquetException("Invalid page header (negative number of rows)"); } - if (header.definition_levels_byte_length < 0 || - header.repetition_levels_byte_length < 0) { + if (*header.definition_levels_byte_length() < 0 || + *header.repetition_levels_byte_length() < 0) { throw ParquetException( "Invalid page header (negative levels byte length)"); } *dataPageStatistics = extractStatsFromHeader(header); - seenNumValues_ += header.num_values; + seenNumValues_ += *header.num_values(); if (dataPageFilter_) { const EncodedStatistics* filterStatistics = dataPageStatistics->isSet() ? dataPageStatistics : nullptr; DataPageStats dataPageStats( - filterStatistics, header.num_values, header.num_rows); + filterStatistics, *header.num_values(), *header.num_rows()); if (dataPageFilter_(dataPageStats)) { return true; } } - } else if (pageType == PageType::kDictionaryPage) { - const facebook::velox::parquet::thrift::DictionaryPageHeader& dictHeader = - currentPageHeader_.dictionary_page_header; - checkNumValuesInHeader(dictHeader.num_values); + } else if (page_type == PageType::kDictionaryPage) { + const facebook::velox::parquet::thrift::DictionaryPageHeader& dict_header = + *currentPageHeader_.dictionary_page_header(); + checkNumValuesInHeader(*dict_header.num_values()); } else { // We don't know what this page type is. We're allowed to skip non-data. // Pages. @@ -500,8 +504,8 @@ std::shared_ptr SerializedPageReader::nextPage() { // Advance the stream offset. PARQUET_THROW_NOT_OK(stream_->Advance(headerSize)); - int compressedLen = currentPageHeader_.compressed_page_size; - int uncompressedLen = currentPageHeader_.uncompressed_page_size; + int compressedLen = *currentPageHeader_.compressed_page_size(); + int uncompressedLen = *currentPageHeader_.uncompressed_page_size(); if (compressedLen < 0 || uncompressedLen < 0) { throw ParquetException("Invalid page header"); } @@ -526,14 +530,15 @@ std::shared_ptr SerializedPageReader::nextPage() { ParquetException::eofException(ss.str()); } - const PageType::type pageType = loadenumSafe(¤tPageHeader_.type); + const PageType::type pageType = + loadEnumSafe(¤tPageHeader_.type().value()); - if (properties_.pageChecksumVerification() && - currentPageHeader_.__isset.crc && pageCanUseChecksum(pageType)) { + if (properties_.pageChecksumVerification() && currentPageHeader_.crc() && + pageCanUseChecksum(pageType)) { // Verify crc. uint32_t checksum = ::arrow::internal::crc32( /* prev */ 0, pageBuffer->data(), compressedLen); - if (static_cast(checksum) != currentPageHeader_.crc) { + if (static_cast(checksum) != *currentPageHeader_.crc()) { throw ParquetException( "could not verify page integrity, CRC checksum verification failed for " "page_ordinal " + @@ -554,49 +559,47 @@ std::shared_ptr SerializedPageReader::nextPage() { if (pageType == PageType::kDictionaryPage) { cryptoCtx_.startDecryptWithDictionaryPage = false; - const facebook::velox::parquet::thrift::DictionaryPageHeader& dictHeader = - currentPageHeader_.dictionary_page_header; - bool isSorted = - dictHeader.__isset.is_sorted ? dictHeader.is_sorted : false; + const facebook::velox::parquet::thrift::DictionaryPageHeader& + dict_header = *currentPageHeader_.dictionary_page_header(); + bool isSorted = dict_header.is_sorted().value_or(false); pageBuffer = decompressIfNeeded( std::move(pageBuffer), compressedLen, uncompressedLen); return std::make_shared( pageBuffer, - dictHeader.num_values, - loadenumSafe(&dictHeader.encoding), + *dict_header.num_values(), + loadEnumSafe(&dict_header.encoding().value()), isSorted); } else if (pageType == PageType::kDataPage) { ++pageOrdinal_; const facebook::velox::parquet::thrift::DataPageHeader& header = - currentPageHeader_.data_page_header; + *currentPageHeader_.data_page_header(); pageBuffer = decompressIfNeeded( std::move(pageBuffer), compressedLen, uncompressedLen); return std::make_shared( pageBuffer, - header.num_values, - loadenumSafe(&header.encoding), - loadenumSafe(&header.definition_level_encoding), - loadenumSafe(&header.repetition_level_encoding), + *header.num_values(), + loadEnumSafe(&header.encoding().value()), + loadEnumSafe(&header.definition_level_encoding().value()), + loadEnumSafe(&header.repetition_level_encoding().value()), uncompressedLen, dataPageStatistics); } else if (pageType == PageType::kDataPageV2) { ++pageOrdinal_; const facebook::velox::parquet::thrift::DataPageHeaderV2& header = - currentPageHeader_.data_page_header_v2; + *currentPageHeader_.data_page_header_v2(); // Arrow prior to 3.0.0 set is_compressed to false but still compressed. bool isCompressed = - (header.__isset.is_compressed ? header.is_compressed : false) || - alwaysCompressed_; + header.is_compressed().value_or(false) || alwaysCompressed_; // Uncompress if needed. int levelsByteLen; if (AddWithOverflow( - header.definition_levels_byte_length, - header.repetition_levels_byte_length, + *header.definition_levels_byte_length(), + *header.repetition_levels_byte_length(), &levelsByteLen)) { throw ParquetException("Levels size too large (corrupt file?)"); } @@ -612,18 +615,18 @@ std::shared_ptr SerializedPageReader::nextPage() { return std::make_shared( pageBuffer, - header.num_values, - header.num_nulls, - header.num_rows, - loadenumSafe(&header.encoding), - header.definition_levels_byte_length, - header.repetition_levels_byte_length, + *header.num_values(), + *header.num_nulls(), + *header.num_rows(), + loadEnumSafe(&header.encoding().value()), + *header.definition_levels_byte_length(), + *header.repetition_levels_byte_length(), uncompressedLen, isCompressed, dataPageStatistics); } else { throw ParquetException( - "Internal error, we have already skipped non-data pages in ShouldSkipPage()"); + "Internal error, we have already skipped non-data pages in shouldSkipPage()"); } } return std::shared_ptr(nullptr); diff --git a/velox/dwio/parquet/writer/arrow/tests/FileDeserializeTest.cpp b/velox/dwio/parquet/writer/arrow/tests/FileDeserializeTest.cpp index c0e9f27263b..afc26a9e4b3 100644 --- a/velox/dwio/parquet/writer/arrow/tests/FileDeserializeTest.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/FileDeserializeTest.cpp @@ -51,37 +51,37 @@ using ::arrow::io::BufferReader; template static inline void addDummyStats(int statSize, H& header, bool fillAllStats = false) { + header.statistics().ensure(); + std::vector statBytes(statSize); // Some non-zero value. std::fill(statBytes.begin(), statBytes.end(), 1); - header.statistics.__set_max( - std::string(reinterpret_cast(statBytes.data()), statSize)); + header.statistics()->max() = + std::string(reinterpret_cast(statBytes.data()), statSize); if (fillAllStats) { - header.statistics.__set_min( - std::string(reinterpret_cast(statBytes.data()), statSize)); - header.statistics.__set_null_count(42); - header.statistics.__set_distinct_count(1); + header.statistics()->min() = + std::string(reinterpret_cast(statBytes.data()), statSize); + header.statistics()->null_count() = 42; + header.statistics()->distinct_count() = 1; } - - header.__isset.statistics = true; } template static inline void checkStatistics( const H& expected, const EncodedStatistics& actual) { - if (expected.statistics.__isset.max) { - ASSERT_EQ(expected.statistics.max, actual.max()); + if (expected.statistics()->max()) { + ASSERT_EQ(*expected.statistics()->max(), actual.max()); } - if (expected.statistics.__isset.min) { - ASSERT_EQ(expected.statistics.min, actual.min()); + if (expected.statistics()->min()) { + ASSERT_EQ(*expected.statistics()->min(), actual.min()); } - if (expected.statistics.__isset.null_count) { - ASSERT_EQ(expected.statistics.null_count, actual.nullCount); + if (expected.statistics()->null_count()) { + ASSERT_EQ(*expected.statistics()->null_count(), actual.nullCount); } - if (expected.statistics.__isset.distinct_count) { - ASSERT_EQ(expected.statistics.distinct_count, actual.distinctCount); + if (expected.statistics()->distinct_count()) { + ASSERT_EQ(*expected.statistics()->distinct_count(), actual.distinctCount); } } @@ -106,13 +106,15 @@ static std::vector getSupportedCodecTypes() { class TestPageSerde : public ::testing::Test { public: void SetUp() { - dataPageHeader_.encoding = + dataPageHeader_.encoding() = facebook::velox::parquet::thrift::Encoding::PLAIN; - dataPageHeader_.definition_level_encoding = + dataPageHeader_.definition_level_encoding() = facebook::velox::parquet::thrift::Encoding::RLE; - dataPageHeader_.repetition_level_encoding = + dataPageHeader_.repetition_level_encoding() = facebook::velox::parquet::thrift::Encoding::RLE; + dataPageHeaderV2_.is_compressed() = false; + resetStream(); } @@ -131,16 +133,16 @@ class TestPageSerde : public ::testing::Test { int32_t uncompressedSize = 0, int32_t compressedSize = 0, std::optional checksum = std::nullopt) { - // Simplifying writing serialized data page headers which may or may not. - // Have meaningful data associated with them. - - // Serialize the Page header. - pageHeader_.__set_data_page_header(dataPageHeader_); - pageHeader_.uncompressed_page_size = uncompressedSize; - pageHeader_.compressed_page_size = compressedSize; - pageHeader_.type = facebook::velox::parquet::thrift::PageType::DATA_PAGE; + // Simplifying writing serialized data page headers which may or may not + // have meaningful data associated with them + + // Serialize the Page header + pageHeader_.data_page_header() = dataPageHeader_; + pageHeader_.uncompressed_page_size() = uncompressedSize; + pageHeader_.compressed_page_size() = compressedSize; + pageHeader_.type() = facebook::velox::parquet::thrift::PageType::DATA_PAGE; if (checksum.has_value()) { - pageHeader_.__set_crc(checksum.value()); + pageHeader_.crc() = checksum.value(); } ThriftSerializer serializer; @@ -152,16 +154,17 @@ class TestPageSerde : public ::testing::Test { int32_t uncompressedSize = 0, int32_t compressedSize = 0, std::optional checksum = std::nullopt) { - // Simplifying writing serialized data page V2 headers which may or may not. - // Have meaningful data associated with them. - - // Serialize the Page header. - pageHeader_.__set_data_page_header_v2(dataPageHeaderV2_); - pageHeader_.uncompressed_page_size = uncompressedSize; - pageHeader_.compressed_page_size = compressedSize; - pageHeader_.type = facebook::velox::parquet::thrift::PageType::DATA_PAGE_V2; + // Simplifying writing serialized data page V2 headers which may or may not + // have meaningful data associated with them + + // Serialize the Page header + pageHeader_.data_page_header_v2() = dataPageHeaderV2_; + pageHeader_.uncompressed_page_size() = uncompressedSize; + pageHeader_.compressed_page_size() = compressedSize; + pageHeader_.type() = + facebook::velox::parquet::thrift::PageType::DATA_PAGE_V2; if (checksum.has_value()) { - pageHeader_.__set_crc(checksum.value()); + pageHeader_.crc() = checksum.value(); } ThriftSerializer serializer; @@ -172,13 +175,13 @@ class TestPageSerde : public ::testing::Test { int32_t uncompressedSize = 0, int32_t compressedSize = 0, std::optional checksum = std::nullopt) { - pageHeader_.__set_dictionary_page_header(dictionaryPageHeader_); - pageHeader_.uncompressed_page_size = uncompressedSize; - pageHeader_.compressed_page_size = compressedSize; - pageHeader_.type = + pageHeader_.dictionary_page_header() = dictionaryPageHeader_; + pageHeader_.uncompressed_page_size() = uncompressedSize; + pageHeader_.compressed_page_size() = compressedSize; + pageHeader_.type() = facebook::velox::parquet::thrift::PageType::DICTIONARY_PAGE; if (checksum.has_value()) { - pageHeader_.__set_crc(checksum.value()); + pageHeader_.crc() = checksum.value(); } ThriftSerializer serializer; @@ -188,10 +191,10 @@ class TestPageSerde : public ::testing::Test { void writeIndexPageHeader( int32_t uncompressedSize = 0, int32_t compressedSize = 0) { - pageHeader_.__set_index_page_header(indexPageHeader_); - pageHeader_.uncompressed_page_size = uncompressedSize; - pageHeader_.compressed_page_size = compressedSize; - pageHeader_.type = facebook::velox::parquet::thrift::PageType::INDEX_PAGE; + pageHeader_.index_page_header() = indexPageHeader_; + pageHeader_.uncompressed_page_size() = uncompressedSize; + pageHeader_.compressed_page_size() = compressedSize; + pageHeader_.type() = facebook::velox::parquet::thrift::PageType::INDEX_PAGE; ThriftSerializer serializer; ASSERT_NO_THROW(serializer.serialize(&pageHeader_, outStream_.get())); @@ -236,11 +239,11 @@ void TestPageSerde::testPageSerdeCrc( codecTypes.push_back(Compression::UNCOMPRESSED); const int32_t numRows = 32; // dummy value if (writeDataPageV2) { - dataPageHeaderV2_.num_values = numRows; + dataPageHeaderV2_.num_values() = numRows; } else { - dataPageHeader_.num_values = numRows; + dataPageHeader_.num_values() = numRows; } - dictionaryPageHeader_.num_values = numRows; + dictionaryPageHeader_.num_values() = numRows; const int numPages = 10; @@ -284,6 +287,7 @@ void TestPageSerde::testPageSerdeCrc( dataSize, static_cast(actualSize), checksumOpt)); } else { if (writeDataPageV2) { + dataPageHeaderV2_.is_compressed() = (Codec != nullptr); ASSERT_NO_FATAL_FAILURE(writeDataPageHeaderV2( 1024, dataSize, static_cast(actualSize), checksumOpt)); } else { @@ -336,14 +340,21 @@ void checkDataPageHeader( const Page* page) { ASSERT_EQ(PageType::kDataPage, page->type()); - const DataPageV1* dataPage = static_cast(page); - ASSERT_EQ(expected.num_values, dataPage->numValues()); - ASSERT_EQ(expected.encoding, dataPage->encoding()); + const DataPageV1* data_page = static_cast(page); + ASSERT_EQ(*expected.num_values(), data_page->numValues()); ASSERT_EQ( - expected.definition_level_encoding, dataPage->definitionLevelEncoding()); + *expected.encoding(), + static_cast( + data_page->encoding())); ASSERT_EQ( - expected.repetition_level_encoding, dataPage->repetitionLevelEncoding()); - checkStatistics(expected, dataPage->statistics()); + *expected.definition_level_encoding(), + static_cast( + data_page->definitionLevelEncoding())); + ASSERT_EQ( + *expected.repetition_level_encoding(), + static_cast( + data_page->repetitionLevelEncoding())); + checkStatistics(expected, data_page->statistics()); } // Overload for DataPageV2 tests. @@ -352,26 +363,29 @@ void checkDataPageHeader( const Page* page) { ASSERT_EQ(PageType::kDataPageV2, page->type()); - const DataPageV2* dataPage = static_cast(page); - ASSERT_EQ(expected.num_values, dataPage->numValues()); - ASSERT_EQ(expected.num_nulls, dataPage->numNulls()); - ASSERT_EQ(expected.num_rows, dataPage->numRows()); - ASSERT_EQ(expected.encoding, dataPage->encoding()); + const DataPageV2* data_page = static_cast(page); + ASSERT_EQ(*expected.num_values(), data_page->numValues()); + ASSERT_EQ(*expected.num_nulls(), data_page->numNulls()); + ASSERT_EQ(*expected.num_rows(), data_page->numRows()); + ASSERT_EQ( + *expected.encoding(), + static_cast( + data_page->encoding())); ASSERT_EQ( - expected.definition_levels_byte_length, - dataPage->definitionLevelsByteLength()); + *expected.definition_levels_byte_length(), + data_page->definitionLevelsByteLength()); ASSERT_EQ( - expected.repetition_levels_byte_length, - dataPage->repetitionLevelsByteLength()); - ASSERT_EQ(expected.is_compressed, dataPage->isCompressed()); - checkStatistics(expected, dataPage->statistics()); + *expected.repetition_levels_byte_length(), + data_page->repetitionLevelsByteLength()); + ASSERT_EQ(*expected.is_compressed(), data_page->isCompressed()); + checkStatistics(expected, data_page->statistics()); } TEST_F(TestPageSerde, DataPageV1) { int statsSize = 512; const int32_t numRows = 4444; - addDummyStats(statsSize, dataPageHeader_, /*fill_all_stats=*/true); - dataPageHeader_.num_values = numRows; + addDummyStats(statsSize, dataPageHeader_, /*fillAllStats=*/true); + dataPageHeader_.num_values() = numRows; ASSERT_NO_FATAL_FAILURE(writeDataPageHeader()); initSerializedPageReader(numRows); @@ -404,14 +418,14 @@ void PageFilterTest< int32_t numRows = i + 100; totalRows_ += numRows; int dataSize = i + 1024; - this->dataPageHeader_.__set_num_values(numRows); - this->dataPageHeader_.statistics.__set_min_value("A" + std::to_string(i)); - this->dataPageHeader_.statistics.__set_max_value("Z" + std::to_string(i)); - this->dataPageHeader_.statistics.__set_null_count(0); - this->dataPageHeader_.statistics.__set_distinct_count(numRows); - this->dataPageHeader_.__isset.statistics = true; - ASSERT_NO_FATAL_FAILURE( - this->writeDataPageHeader(1024, dataSize, dataSize)); + this->dataPageHeader_.num_values() = numRows; + this->dataPageHeader_.statistics().ensure(); + this->dataPageHeader_.statistics()->min_value() = "A" + std::to_string(i); + this->dataPageHeader_.statistics()->max_value() = "Z" + std::to_string(i); + this->dataPageHeader_.statistics()->null_count() = 0; + this->dataPageHeader_.statistics()->distinct_count() = numRows; + ASSERT_NO_FATAL_FAILURE(this->writeDataPageHeader( + /*maxSerializedLen=*/1024, dataSize, dataSize)); dataPageHeaders_.push_back(this->dataPageHeader_); // Also write data, to make sure we skip the data correctly. std::vector fauxData(dataSize); @@ -428,15 +442,15 @@ void PageFilterTest< int32_t numRows = i + 100; totalRows_ += numRows; int dataSize = i + 1024; - this->dataPageHeaderV2_.__set_num_values(numRows); - this->dataPageHeaderV2_.__set_num_rows(numRows); - this->dataPageHeaderV2_.statistics.__set_min_value("A" + std::to_string(i)); - this->dataPageHeaderV2_.statistics.__set_max_value("Z" + std::to_string(i)); - this->dataPageHeaderV2_.statistics.__set_null_count(0); - this->dataPageHeaderV2_.statistics.__set_distinct_count(numRows); - this->dataPageHeaderV2_.__isset.statistics = true; - ASSERT_NO_FATAL_FAILURE( - this->writeDataPageHeaderV2(1024, dataSize, dataSize)); + this->dataPageHeaderV2_.num_values() = numRows; + this->dataPageHeaderV2_.num_rows() = numRows; + this->dataPageHeaderV2_.statistics().ensure(); + this->dataPageHeaderV2_.statistics()->min_value() = "A" + std::to_string(i); + this->dataPageHeaderV2_.statistics()->max_value() = "Z" + std::to_string(i); + this->dataPageHeaderV2_.statistics()->null_count() = 0; + this->dataPageHeaderV2_.statistics()->distinct_count() = numRows; + ASSERT_NO_FATAL_FAILURE(this->writeDataPageHeaderV2( + /*maxSerializedLen=*/1024, dataSize, dataSize)); dataPageHeaders_.push_back(this->dataPageHeaderV2_); // Also write data, to make sure we skip the data correctly. std::vector fauxData(dataSize); @@ -451,8 +465,9 @@ void PageFilterTest< int32_t numRows = 100; totalRows_ += numRows; int dataSize = 1024; - this->dataPageHeader_.__set_num_values(numRows); - ASSERT_NO_FATAL_FAILURE(this->writeDataPageHeader(1024, dataSize, dataSize)); + this->dataPageHeader_.num_values() = numRows; + ASSERT_NO_FATAL_FAILURE(this->writeDataPageHeader( + /*maxSerializedLen=*/1024, dataSize, dataSize)); dataPageHeaders_.push_back(this->dataPageHeader_); std::vector fauxData(dataSize); ASSERT_OK(this->outStream_->Write(fauxData.data(), dataSize)); @@ -465,10 +480,10 @@ void PageFilterTest:: int32_t numRows = 100; totalRows_ += numRows; int dataSize = 1024; - this->dataPageHeaderV2_.__set_num_values(numRows); - this->dataPageHeaderV2_.__set_num_rows(numRows); - ASSERT_NO_FATAL_FAILURE( - this->writeDataPageHeaderV2(1024, dataSize, dataSize)); + this->dataPageHeaderV2_.num_values() = numRows; + this->dataPageHeaderV2_.num_rows() = numRows; + ASSERT_NO_FATAL_FAILURE(this->writeDataPageHeaderV2( + /*maxSerializedLen=*/1024, dataSize, dataSize)); dataPageHeaders_.push_back(this->dataPageHeaderV2_); std::vector fauxData(dataSize); ASSERT_OK(this->outStream_->Write(fauxData.data(), dataSize)); @@ -488,7 +503,7 @@ void PageFilterTest:: checkNumRows( std::optional numRows, const facebook::velox::parquet::thrift::DataPageHeaderV2& header) { - ASSERT_EQ(*numRows, header.num_rows); + ASSERT_EQ(*numRows, *header.num_rows()); } using DataPageHeaderTypes = ::testing::Types< @@ -555,7 +570,7 @@ TYPED_TEST(PageFilterTest, TestPageFilterCallback) { ASSERT_EQ(readStats[i].min(), EncodedStatistics.min()); ASSERT_EQ(readStats[i].nullCount, EncodedStatistics.nullCount); ASSERT_EQ(readStats[i].distinctCount, EncodedStatistics.distinctCount); - ASSERT_EQ(readNumValues[i], this->dataPageHeaders_[i].num_values); + ASSERT_EQ(readNumValues[i], *this->dataPageHeaders_[i].num_values()); this->checkNumRows(readNumRows[i], this->dataPageHeaders_[i]); } ASSERT_EQ(this->pageReader_->nextPage(), nullptr); @@ -689,8 +704,8 @@ TEST_F(TestPageSerde, SkipsNonDataPages) { TEST_F(TestPageSerde, DataPageV2) { int statsSize = 512; const int32_t numRows = 4444; - addDummyStats(statsSize, dataPageHeaderV2_, /*fill_all_stats=*/true); - dataPageHeaderV2_.num_values = numRows; + addDummyStats(statsSize, dataPageHeaderV2_, /*fillAllStats=*/true); + dataPageHeaderV2_.num_values() = numRows; ASSERT_NO_FATAL_FAILURE(writeDataPageHeaderV2()); initSerializedPageReader(numRows); @@ -705,7 +720,7 @@ TEST_F(TestPageSerde, TestLargePageHeaders) { // Any number to verify metadata roundtrip. const int32_t numRows = 4141; - dataPageHeader_.num_values = numRows; + dataPageHeader_.num_values() = numRows; int maxHeaderSize = 512 * 1024; // 512 KB ASSERT_NO_FATAL_FAILURE(writeDataPageHeader(maxHeaderSize)); @@ -750,7 +765,7 @@ void TestPageSerde::testPageCompressionRoundTrip( auto codecTypes = getSupportedCodecTypes(); const int32_t numRows = 32; // dummy value - dataPageHeader_.num_values = numRows; + dataPageHeader_.num_values() = numRows; std::vector> fauxData; int numPages = static_cast(pageSizes.size()); @@ -831,7 +846,7 @@ TEST_F(TestPageSerde, NoCrc) { int statsSize = 512; const int32_t numRows = 4444; addDummyStats(statsSize, dataPageHeader_, true); - dataPageHeader_.num_values = numRows; + dataPageHeader_.num_values() = numRows; ASSERT_NO_FATAL_FAILURE(writeDataPageHeader()); ReaderProperties ReaderProperties; @@ -845,7 +860,7 @@ TEST_F(TestPageSerde, NoCrc) { TEST_F(TestPageSerde, NoCrcDict) { const int32_t numRows = 4444; - dictionaryPageHeader_.num_values = numRows; + dictionaryPageHeader_.num_values() = numRows; ASSERT_NO_FATAL_FAILURE(writeDictionaryPageHeader()); ReaderProperties ReaderProperties; diff --git a/velox/dwio/parquet/writer/arrow/tests/MetadataTest.cpp b/velox/dwio/parquet/writer/arrow/tests/MetadataTest.cpp index 5a7ac085887..61eb989bd4b 100644 --- a/velox/dwio/parquet/writer/arrow/tests/MetadataTest.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/MetadataTest.cpp @@ -290,8 +290,8 @@ TEST(Metadata, TestBuildAccess) { ASSERT_EQ(2, rg2Column1->encodingStats().size()); ASSERT_EQ(3, rg2Column2->encodingStats().size()); - // Test FileMetaData::set_file_path. - ASSERT_TRUE(rg2Column1->filePath().empty()); + // Test FileMetaData::set_file_path + ASSERT_FALSE(rg2Column1->has_file_path()); fAccessors[loopIndex]->setFilePath("/foo/bar/bar.parquet"); ASSERT_EQ("/foo/bar/bar.parquet", rg2Column1->filePath()); } diff --git a/velox/dwio/parquet/writer/arrow/tests/PageIndexTest.cpp b/velox/dwio/parquet/writer/arrow/tests/PageIndexTest.cpp index 1d4797bb4dd..3ee2382a63f 100644 --- a/velox/dwio/parquet/writer/arrow/tests/PageIndexTest.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/PageIndexTest.cpp @@ -34,44 +34,45 @@ struct PageIndexRanges { using RowGroupRanges = std::vector; -/// Creates an FileMetaData object w/ single row group based on data in. -/// 'Row_group_ranges'. It sets the offsets and sizes of the column index and. -/// Offset index members of the row group. It doesn't set the member if the. -/// Input value is -1. -std::shared_ptr constructFakeMetaData( - const RowGroupRanges& rowGroupRanges) { - facebook::velox::parquet::thrift::RowGroup rowGroup; - for (auto& pageIndexRanges : rowGroupRanges) { - facebook::velox::parquet::thrift::ColumnChunk colChunk; - if (pageIndexRanges.columnIndexOffset != -1) { - colChunk.__set_column_index_offset(pageIndexRanges.columnIndexOffset); +/// Creates an FileMetaData object w/ single row group based on data in +/// 'row_group_ranges'. It sets the offsets and sizes of the column index and +/// offset index members of the row group. It doesn't set the member if the +/// input value is -1. +std::shared_ptr ConstructFakeMetaData( + const RowGroupRanges& row_group_ranges) { + facebook::velox::parquet::thrift::RowGroup row_group; + for (auto& page_index_ranges : row_group_ranges) { + facebook::velox::parquet::thrift::ColumnChunk col_chunk; + if (page_index_ranges.columnIndexOffset != -1) { + col_chunk.column_index_offset() = page_index_ranges.columnIndexOffset; } - if (pageIndexRanges.columnIndexLength != -1) { - colChunk.__set_column_index_length( - static_cast(pageIndexRanges.columnIndexLength)); + if (page_index_ranges.columnIndexLength != -1) { + col_chunk.column_index_length() = + static_cast(page_index_ranges.columnIndexLength); } - if (pageIndexRanges.offsetIndexOffset != -1) { - colChunk.__set_offset_index_offset(pageIndexRanges.offsetIndexOffset); + if (page_index_ranges.offsetIndexOffset != -1) { + col_chunk.offset_index_offset() = page_index_ranges.offsetIndexOffset; } - if (pageIndexRanges.offsetIndexLength != -1) { - colChunk.__set_offset_index_length( - static_cast(pageIndexRanges.offsetIndexLength)); + if (page_index_ranges.offsetIndexLength != -1) { + col_chunk.offset_index_length() = + static_cast(page_index_ranges.offsetIndexLength); } - rowGroup.columns.push_back(colChunk); + col_chunk.meta_data().ensure(); + row_group.columns()->push_back(col_chunk); } facebook::velox::parquet::thrift::FileMetaData metadata; - metadata.row_groups.push_back(rowGroup); + metadata.row_groups()->push_back(row_group); - metadata.schema.emplace_back(); + metadata.schema()->emplace_back(); schema::NodeVector fields; - for (size_t i = 0; i < rowGroupRanges.size(); ++i) { + for (size_t i = 0; i < row_group_ranges.size(); ++i) { fields.push_back(schema::int64(std::to_string(i))); - metadata.schema.emplace_back(); - fields.back()->toParquet(&metadata.schema.back()); + metadata.schema()->emplace_back(); + fields.back()->toParquet(&metadata.schema()->back()); } schema::GroupNode::make("schema", Repetition::kRepeated, fields) - ->toParquet(&metadata.schema.front()); + ->toParquet(&metadata.schema()->front()); auto sink = createOutputStream(); ThriftSerializer{}.serialize(&metadata, sink.get()); @@ -92,7 +93,7 @@ void validatePageIndexRange( int expectedCiSize, int expectedOiStart, int expectedOiSize) { - auto fileMetadata = constructFakeMetaData(rowGroupRanges); + auto fileMetadata = ConstructFakeMetaData(rowGroupRanges); auto readRange = PageIndexReader::determinePageIndexRangesInRowGroup( *fileMetadata->rowGroup(0), columnIndices); ASSERT_EQ(expectedHasColumnIndex, readRange.columnIndex.has_value()); diff --git a/velox/dwio/parquet/writer/arrow/tests/SchemaTest.cpp b/velox/dwio/parquet/writer/arrow/tests/SchemaTest.cpp index fb22fc3d681..5cdf77b12ec 100644 --- a/velox/dwio/parquet/writer/arrow/tests/SchemaTest.cpp +++ b/velox/dwio/parquet/writer/arrow/tests/SchemaTest.cpp @@ -44,32 +44,31 @@ namespace schema { static inline SchemaElement newPrimitive( const std::string& name, - FieldRepetitionType::type repetition, + FieldRepetitionType repetition, Type::type type, int fieldId = -1) { SchemaElement result; - result.__set_name(name); - result.__set_repetition_type(repetition); - result.__set_type( - static_cast(type)); + result.name() = name; + result.repetition_type() = repetition; + result.type() = static_cast(type); if (fieldId >= 0) { - result.__set_field_id(fieldId); + result.field_id() = fieldId; } return result; } static inline SchemaElement newGroup( const std::string& name, - FieldRepetitionType::type repetition, + FieldRepetitionType repetition, int numChildren, int fieldId = -1) { SchemaElement result; - result.__set_name(name); - result.__set_repetition_type(repetition); - result.__set_num_children(numChildren); + result.name() = name; + result.repetition_type() = repetition; + result.num_children() = numChildren; if (fieldId >= 0) { - result.__set_field_id(fieldId); + result.field_id() = fieldId; } return result; @@ -200,8 +199,7 @@ TEST_F(TestPrimitiveNode, fromParquet) { // Test a logical type. elt = newPrimitive( name_, FieldRepetitionType::REQUIRED, Type::kByteArray, fieldId_); - elt.__set_converted_type( - facebook::velox::parquet::thrift::ConvertedType::UTF8); + elt.converted_type() = facebook::velox::parquet::thrift::ConvertedType::UTF8; ASSERT_NO_FATAL_FAILURE(convert(&elt)); ASSERT_EQ(Repetition::kRequired, primNode_->repetition()); @@ -211,7 +209,7 @@ TEST_F(TestPrimitiveNode, fromParquet) { // FIXED_LEN_BYTE_ARRAY. elt = newPrimitive( name_, FieldRepetitionType::OPTIONAL, Type::kFixedLenByteArray, fieldId_); - elt.__set_type_length(16); + elt.type_length() = 16; ASSERT_NO_FATAL_FAILURE(convert(&elt)); ASSERT_EQ(name_, primNode_->name()); @@ -223,11 +221,11 @@ TEST_F(TestPrimitiveNode, fromParquet) { // Facebook::velox::parquet::thrift::ConvertedType::Decimal. elt = newPrimitive( name_, FieldRepetitionType::OPTIONAL, Type::kFixedLenByteArray, fieldId_); - elt.__set_converted_type( - facebook::velox::parquet::thrift::ConvertedType::DECIMAL); - elt.__set_type_length(6); - elt.__set_scale(2); - elt.__set_precision(12); + elt.converted_type() = + facebook::velox::parquet::thrift::ConvertedType::DECIMAL; + elt.type_length() = 6; + elt.scale() = 2; + elt.precision() = 12; ASSERT_NO_FATAL_FAILURE(convert(&elt)); ASSERT_EQ(Type::kFixedLenByteArray, primNode_->physicalType()); @@ -589,8 +587,7 @@ TEST_F(TestSchemaConverter, NestedExample) { // 3-Level list encoding, by hand. elt = newGroup("b", FieldRepetitionType::REPEATED, 1, 3); - elt.__set_converted_type( - facebook::velox::parquet::thrift::ConvertedType::LIST); + elt.converted_type() = facebook::velox::parquet::thrift::ConvertedType::LIST; elements.push_back(elt); elements.push_back( newPrimitive("item", FieldRepetitionType::OPTIONAL, Type::kInt64, 4)); @@ -691,9 +688,9 @@ TEST_F(TestSchemaFlatten, DecimalMetadata) { NodePtr group = GroupNode::make( "group", Repetition::kRepeated, {Node}, ConvertedType::kList); flatten(reinterpret_cast(group.get())); - ASSERT_EQ("decimal", elements_[1].name); - ASSERT_TRUE(elements_[1].__isset.precision); - ASSERT_TRUE(elements_[1].__isset.scale); + ASSERT_EQ("decimal", *elements_[1].name()); + ASSERT_TRUE(elements_[1].precision().has_value()); + ASSERT_TRUE(elements_[1].scale().has_value()); elements_.clear(); // ... Including those created with new logical types. @@ -706,18 +703,18 @@ TEST_F(TestSchemaFlatten, DecimalMetadata) { group = GroupNode::make( "group", Repetition::kRepeated, {Node}, ListLogicalType::make()); flatten(reinterpret_cast(group.get())); - ASSERT_EQ("decimal", elements_[1].name); - ASSERT_TRUE(elements_[1].__isset.precision); - ASSERT_TRUE(elements_[1].__isset.scale); + ASSERT_EQ("decimal", *elements_[1].name()); + ASSERT_TRUE(elements_[1].precision().has_value()); + ASSERT_TRUE(elements_[1].scale().has_value()); elements_.clear(); // Not for integers with no logical type. group = GroupNode::make( "group", Repetition::kRepeated, {int64("int64")}, ConvertedType::kList); flatten(reinterpret_cast(group.get())); - ASSERT_EQ("int64", elements_[1].name); - ASSERT_FALSE(elements_[0].__isset.precision); - ASSERT_FALSE(elements_[0].__isset.scale); + ASSERT_EQ("int64", *elements_[1].name()); + ASSERT_FALSE(elements_[0].precision().has_value()); + ASSERT_FALSE(elements_[0].scale().has_value()); } TEST_F(TestSchemaFlatten, NestedExample) { @@ -734,12 +731,11 @@ TEST_F(TestSchemaFlatten, NestedExample) { // 3-Level list encoding, by hand. elt = newGroup("b", FieldRepetitionType::REPEATED, 1, 3); - elt.__set_converted_type( - facebook::velox::parquet::thrift::ConvertedType::LIST); + elt.converted_type() = facebook::velox::parquet::thrift::ConvertedType::LIST; facebook::velox::parquet::thrift::ListType ls; facebook::velox::parquet::thrift::LogicalType lt; - lt.__set_LIST(ls); - elt.__set_logicalType(lt); + lt.set_LIST(ls); + elt.logicalType() = lt; elements.push_back(elt); elements.push_back( newPrimitive("item", FieldRepetitionType::OPTIONAL, Type::kInt64, 4)); @@ -1062,7 +1058,7 @@ TEST(TestSchemaPrinter, Examples) { Repetition::kRequired, DecimalLogicalType::make(10, 5), Type::kInt64, - -1, + /*primitiveLength=*/-1, 7)); NodePtr schema = @@ -2157,45 +2153,6 @@ TEST(TestSchemaNodeCreation, FactoryExceptions) { Node = GroupNode::make("items", Repetition::kRepeated, {}, empty)); ASSERT_TRUE(Node->logicalType()->isNone()); ASSERT_EQ(Node->convertedType(), ConvertedType::kNone); - - // Invalid ConvertedType in deserialized element ... - Node = PrimitiveNode::make( - "string", - Repetition::kRequired, - StringLogicalType::make(), - Type::kByteArray); - ASSERT_EQ(Node->logicalType()->type(), LogicalType::Type::kString); - ASSERT_TRUE(Node->logicalType()->isValid()); - ASSERT_TRUE(Node->logicalType()->isSerialized()); - facebook::velox::parquet::thrift::SchemaElement stringIntermediary; - Node->toParquet(&stringIntermediary); - // ... Corrupt the Thrift intermediary .... - stringIntermediary.logicalType.__isset.STRING = false; - ASSERT_ANY_THROW(Node = PrimitiveNode::fromParquet(&stringIntermediary)); - - // Invalid TimeUnit in deserialized TimeLogicalType ... - Node = PrimitiveNode::make( - "time", - Repetition::kRequired, - TimeLogicalType::make(true, LogicalType::TimeUnit::kNanos), - Type::kInt64); - facebook::velox::parquet::thrift::SchemaElement timeIntermediary; - Node->toParquet(&timeIntermediary); - // ... Corrupt the Thrift intermediary .... - timeIntermediary.logicalType.TIME.unit.__isset.NANOS = false; - ASSERT_ANY_THROW(PrimitiveNode::fromParquet(&timeIntermediary)); - - // Invalid TimeUnit in deserialized TimestampLogicalType ... - Node = PrimitiveNode::make( - "timestamp", - Repetition::kRequired, - TimestampLogicalType::make(true, LogicalType::TimeUnit::kNanos), - Type::kInt64); - facebook::velox::parquet::thrift::SchemaElement timestampIntermediary; - Node->toParquet(×tampIntermediary); - // ... Corrupt the Thrift intermediary .... - timestampIntermediary.logicalType.TIMESTAMP.unit.__isset.NANOS = false; - ASSERT_ANY_THROW(PrimitiveNode::fromParquet(×tampIntermediary)); } struct SchemaElementConstructionArguments { @@ -2282,26 +2239,26 @@ class TestSchemaElementConstruction : public ::testing::Test { } void inspect() { - ASSERT_EQ(element_->name, name_); + ASSERT_EQ(*element_->name(), name_); if (expectConvertedType_) { - ASSERT_TRUE(element_->__isset.converted_type) + ASSERT_TRUE(element_->converted_type().has_value()) << node_->logicalType()->toString() << " logical type unexpectedly failed to generate a converted type in the " "Thrift " "intermediate object"; - ASSERT_EQ(element_->converted_type, toThrift(convertedType_)) + ASSERT_EQ(*element_->converted_type(), toThrift(convertedType_)) << node_->logicalType()->toString() << " logical type unexpectedly failed to generate correct converted type in " "the " "Thrift intermediate object"; } else { - ASSERT_FALSE(element_->__isset.converted_type) + ASSERT_FALSE(element_->converted_type().has_value()) << node_->logicalType()->toString() << " logical type unexpectedly generated a converted type in the Thrift " "intermediate object"; } if (expectLogicaltype_) { - ASSERT_TRUE(element_->__isset.logicalType) + ASSERT_TRUE(element_->logicalType().has_value()) << node_->logicalType()->toString() << " logical type unexpectedly failed to genverate a logicalType in the Thrift " "intermediate object"; @@ -2310,7 +2267,7 @@ class TestSchemaElementConstruction : public ::testing::Test { << " logical type generated incorrect logicalType " "settings in the Thrift intermediate object"; } else { - ASSERT_FALSE(element_->__isset.logicalType) + ASSERT_FALSE(element_->logicalType().has_value()) << node_->logicalType()->toString() << " logical type unexpectedly generated a logicalType in the Thrift " "intermediate object"; @@ -2351,7 +2308,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { true, ConvertedType::kUtf8, true, - [this]() { return element_->logicalType.__isset.STRING; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::STRING; + }}, {"enum", LogicalType::enumType(), Type::kByteArray, @@ -2359,7 +2319,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { true, ConvertedType::kEnum, true, - [this]() { return element_->logicalType.__isset.ENUM; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::ENUM; + }}, {"date", LogicalType::date(), Type::kInt32, @@ -2367,7 +2330,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { true, ConvertedType::kDate, true, - [this]() { return element_->logicalType.__isset.DATE; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::DATE; + }}, {"interval", LogicalType::interval(), Type::kFixedLenByteArray, @@ -2383,7 +2349,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { false, ConvertedType::kNa, true, - [this]() { return element_->logicalType.__isset.UNKNOWN; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::UNKNOWN; + }}, {"json", LogicalType::json(), Type::kByteArray, @@ -2391,7 +2360,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { true, ConvertedType::kJson, true, - [this]() { return element_->logicalType.__isset.JSON; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::JSON; + }}, {"bson", LogicalType::bson(), Type::kByteArray, @@ -2399,7 +2371,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { true, ConvertedType::kBson, true, - [this]() { return element_->logicalType.__isset.BSON; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::BSON; + }}, {"uuid", LogicalType::uuid(), Type::kFixedLenByteArray, @@ -2407,7 +2382,10 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) { false, ConvertedType::kNa, true, - [this]() { return element_->logicalType.__isset.UUID; }}, + [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::UUID; + }}, {"none", LogicalType::none(), Type::kInt64, @@ -2458,10 +2436,10 @@ class TestDecimalSchemaElementConstruction void inspect() { TestSchemaElementConstruction::inspect(); - ASSERT_EQ(element_->precision, precision_); - ASSERT_EQ(element_->scale, scale_); - ASSERT_EQ(element_->logicalType.DECIMAL.precision, precision_); - ASSERT_EQ(element_->logicalType.DECIMAL.scale, scale_); + ASSERT_EQ(*element_->precision(), precision_); + ASSERT_EQ(*element_->scale(), scale_); + ASSERT_EQ(*element_->logicalType()->get_DECIMAL().precision(), precision_); + ASSERT_EQ(*element_->logicalType()->get_DECIMAL().scale(), scale_); return; } @@ -2472,7 +2450,8 @@ class TestDecimalSchemaElementConstruction TEST_F(TestDecimalSchemaElementConstruction, DecimalCases) { auto checkDecimal = [this]() { - return element_->logicalType.__isset.DECIMAL; + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::DECIMAL; }; std::vector cases = { @@ -2583,16 +2562,23 @@ template <> void TestTemporalSchemaElementConstruction::inspect< facebook::velox::parquet::thrift::TimeType>() { TestSchemaElementConstruction::inspect(); - ASSERT_EQ(element_->logicalType.TIME.isAdjustedToUTC, adjusted_); + ASSERT_EQ(*element_->logicalType()->get_TIME().isAdjustedToUTC(), adjusted_); + using TimeUnitType = facebook::velox::parquet::thrift::TimeUnit::Type; switch (unit_) { case LogicalType::TimeUnit::kMillis: - ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MILLIS); + ASSERT_TRUE( + element_->logicalType()->get_TIME().unit()->getType() == + TimeUnitType::MILLIS); break; case LogicalType::TimeUnit::kMicros: - ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.MICROS); + ASSERT_TRUE( + element_->logicalType()->get_TIME().unit()->getType() == + TimeUnitType::MICROS); break; case LogicalType::TimeUnit::kNanos: - ASSERT_TRUE(element_->logicalType.TIME.unit.__isset.NANOS); + ASSERT_TRUE( + element_->logicalType()->get_TIME().unit()->getType() == + TimeUnitType::NANOS); break; case LogicalType::TimeUnit::kUnknown: default: @@ -2605,16 +2591,24 @@ template <> void TestTemporalSchemaElementConstruction::inspect< facebook::velox::parquet::thrift::TimestampType>() { TestSchemaElementConstruction::inspect(); - ASSERT_EQ(element_->logicalType.TIMESTAMP.isAdjustedToUTC, adjusted_); + ASSERT_EQ( + *element_->logicalType()->get_TIMESTAMP().isAdjustedToUTC(), adjusted_); + using TimeUnitType = facebook::velox::parquet::thrift::TimeUnit::Type; switch (unit_) { case LogicalType::TimeUnit::kMillis: - ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MILLIS); + ASSERT_TRUE( + element_->logicalType()->get_TIMESTAMP().unit()->getType() == + TimeUnitType::MILLIS); break; case LogicalType::TimeUnit::kMicros: - ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.MICROS); + ASSERT_TRUE( + element_->logicalType()->get_TIMESTAMP().unit()->getType() == + TimeUnitType::MICROS); break; case LogicalType::TimeUnit::kNanos: - ASSERT_TRUE(element_->logicalType.TIMESTAMP.unit.__isset.NANOS); + ASSERT_TRUE( + element_->logicalType()->get_TIMESTAMP().unit()->getType() == + TimeUnitType::NANOS); break; case LogicalType::TimeUnit::kUnknown: default: @@ -2624,7 +2618,10 @@ void TestTemporalSchemaElementConstruction::inspect< } TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) { - auto checkTime = [this]() { return element_->logicalType.__isset.TIME; }; + auto checkTime = [this]() { + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::TIME; + }; std::vector timeCases = { {"time_T_ms", @@ -2683,7 +2680,8 @@ TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) { } auto checkTimestamp = [this]() { - return element_->logicalType.__isset.TIMESTAMP; + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::TIMESTAMP; }; std::vector timestampCases = { @@ -2776,8 +2774,8 @@ class TestIntegerSchemaElementConstruction void inspect() { TestSchemaElementConstruction::inspect(); - ASSERT_EQ(element_->logicalType.INTEGER.bitWidth, width_); - ASSERT_EQ(element_->logicalType.INTEGER.isSigned, signed_); + ASSERT_EQ(*element_->logicalType()->get_INTEGER().bitWidth(), width_); + ASSERT_EQ(*element_->logicalType()->get_INTEGER().isSigned(), signed_); return; } @@ -2788,7 +2786,8 @@ class TestIntegerSchemaElementConstruction TEST_F(TestIntegerSchemaElementConstruction, IntegerCases) { auto checkInteger = [this]() { - return element_->logicalType.__isset.INTEGER; + return element_->logicalType()->getType() == + facebook::velox::parquet::thrift::LogicalType::Type::INTEGER; }; std::vector cases = { @@ -2911,28 +2910,37 @@ TEST(TestLogicalTypeSerialization, SchemaElementNestedCases) { ListLogicalType::make()); std::vector listElements; toParquet(reinterpret_cast(listNode.get()), &listElements); - ASSERT_EQ(listElements[0].name, "list"); - ASSERT_TRUE(listElements[0].__isset.converted_type); - ASSERT_TRUE(listElements[0].__isset.logicalType); - ASSERT_EQ(listElements[0].converted_type, toThrift(ConvertedType::kList)); - ASSERT_TRUE(listElements[0].logicalType.__isset.LIST); - ASSERT_TRUE(listElements[1].logicalType.__isset.STRING); - ASSERT_TRUE(listElements[2].logicalType.__isset.DATE); - ASSERT_TRUE(listElements[3].logicalType.__isset.JSON); - ASSERT_TRUE(listElements[4].logicalType.__isset.UUID); - ASSERT_TRUE(listElements[5].logicalType.__isset.TIMESTAMP); - ASSERT_TRUE(listElements[6].logicalType.__isset.INTEGER); - ASSERT_TRUE(listElements[7].logicalType.__isset.DECIMAL); + ASSERT_EQ(*listElements[0].name(), "list"); + ASSERT_TRUE(listElements[0].converted_type().has_value()); + ASSERT_TRUE(listElements[0].logicalType().has_value()); + ASSERT_EQ(*listElements[0].converted_type(), toThrift(ConvertedType::kList)); + using LogicalTypeType = facebook::velox::parquet::thrift::LogicalType::Type; + ASSERT_TRUE( + listElements[0].logicalType()->getType() == LogicalTypeType::LIST); + ASSERT_TRUE( + listElements[1].logicalType()->getType() == LogicalTypeType::STRING); + ASSERT_TRUE( + listElements[2].logicalType()->getType() == LogicalTypeType::DATE); + ASSERT_TRUE( + listElements[3].logicalType()->getType() == LogicalTypeType::JSON); + ASSERT_TRUE( + listElements[4].logicalType()->getType() == LogicalTypeType::UUID); + ASSERT_TRUE( + listElements[5].logicalType()->getType() == LogicalTypeType::TIMESTAMP); + ASSERT_TRUE( + listElements[6].logicalType()->getType() == LogicalTypeType::INTEGER); + ASSERT_TRUE( + listElements[7].logicalType()->getType() == LogicalTypeType::DECIMAL); NodePtr mapNode = GroupNode::make("map", Repetition::kRequired, {}, MapLogicalType::make()); std::vector mapElements; toParquet(reinterpret_cast(mapNode.get()), &mapElements); - ASSERT_EQ(mapElements[0].name, "map"); - ASSERT_TRUE(mapElements[0].__isset.converted_type); - ASSERT_TRUE(mapElements[0].__isset.logicalType); - ASSERT_EQ(mapElements[0].converted_type, toThrift(ConvertedType::kMap)); - ASSERT_TRUE(mapElements[0].logicalType.__isset.MAP); + ASSERT_EQ(*mapElements[0].name(), "map"); + ASSERT_TRUE(mapElements[0].converted_type().has_value()); + ASSERT_TRUE(mapElements[0].logicalType().has_value()); + ASSERT_EQ(*mapElements[0].converted_type(), toThrift(ConvertedType::kMap)); + ASSERT_TRUE(mapElements[0].logicalType()->getType() == LogicalTypeType::MAP); } TEST(TestLogicalTypeSerialization, Roundtrips) { diff --git a/velox/exec/benchmarks/CMakeLists.txt b/velox/exec/benchmarks/CMakeLists.txt index 7a721bf91a6..b252868db30 100644 --- a/velox/exec/benchmarks/CMakeLists.txt +++ b/velox/exec/benchmarks/CMakeLists.txt @@ -104,7 +104,6 @@ if(${VELOX_ENABLE_PARQUET}) velox_vector_test_lib Folly::follybenchmark arrow - thrift ) endif() diff --git a/velox/exec/fuzzer/CMakeLists.txt b/velox/exec/fuzzer/CMakeLists.txt index 73b0ccc9d05..8fc7f9b5f1d 100644 --- a/velox/exec/fuzzer/CMakeLists.txt +++ b/velox/exec/fuzzer/CMakeLists.txt @@ -279,7 +279,7 @@ target_link_libraries( velox_vector_fuzzer ) -# LocalRunnerService Library (requires FBThrift support) +# LocalRunnerService Library if(VELOX_ENABLE_REMOTE_FUNCTIONS) add_library(velox_local_runner_service_lib LocalRunnerService.cpp) velox_add_test_headers(velox_local_runner_service_lib LocalRunnerService.h)