Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ab8a43c
fix: Enable CI on IBM repo
zhouyuan Aug 26, 2025
f4c4cfa
[6020 ] Spark sql avg agg function support decimal
liujiayi771 Aug 7, 2023
ed0c7d6
[oap ] Register merge extract companion agg functions without suffix
zhztheplayer Dec 29, 2023
35421e9
Stream input row to hash table when addInput for left semi and anti join
liujiayi771 Nov 24, 2024
5582fc0
[11067] Support scan filter for decimal in ORC
rui-mo Oct 17, 2025
b8709a3
refactor: Move toValues from InPredicate.cpp to Filter.h
yingsu00 Mar 15, 2025
cedfa45
feat(connector): Support reading Iceberg split with equality deletes
yingsu00 May 1, 2024
645489f
Support insert data into iceberg table.
PingLiuPing Oct 3, 2025
5167bb4
Add iceberg partition transforms.
PingLiuPing Jul 1, 2025
c1d565b
Add NaN statistics to parquet writer.
PingLiuPing Sep 4, 2025
9ba32e4
Collect Iceberg data file statistics in dwio.
PingLiuPing Jul 28, 2025
bd44171
Fix incorrect min max stats when the column value are infinity or -in…
PingLiuPing Aug 26, 2025
0df46e9
Integrate Iceberg data file statistics and adding unit test.
PingLiuPing Sep 1, 2025
a118bf7
Support write field_id to parquet metadata SchemaElement.
PingLiuPing Sep 5, 2025
b403c2a
Implement iceberg sort order
PingLiuPing May 30, 2025
709cc56
Add clustered Iceberg writer mode.
PingLiuPing Sep 1, 2025
8c814c4
Support struct schema evolution matching by name
rui-mo Mar 18, 2025
95d5ae8
Allow reading integers into smaller-range types
rui-mo Sep 18, 2025
e742228
fix: Fix smj result mismatch issue in semi, anit and full outer join
zhouyuan Sep 4, 2025
effa889
adding daily tests
zhouyuan Jul 3, 2025
c049ded
fix: remove website folder to bypass the security issues
zhouyuan Jul 9, 2025
b6d2f4e
fix merge issue due to 2 PRs refactor
karthikeyann Oct 31, 2025
bd35936
Merge branch 'ci-fix-pr' into staging-ec1247316-pr
wanglinsong Nov 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions .github/workflows/gluten.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Gluten Build

on:
pull_request:
paths:
- .github/workflows/gluten.yml
env:
MVN_CMD: mvn -ntp

jobs:

gluten-cpp-build:
name: gluten cpp build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'IBM/velox' }}
runs-on: ubuntu-22.04
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
steps:
- uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
BRANCH=$(echo ${GITHUB_REF#refs/heads/})
sed -i 's/oap-project/IBM/g' ep/build-velox/src/get-velox.sh
# sed -i 's/VELOX_BRANCH=2025.*/VELOX_BRANCH=${BRANCH}/g' ep/build-velox/src/get-velox.sh
- name: Build Gluten native libraries
run: |
docker pull apache/gluten:vcpkg-centos-7
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
git config --global --add safe.directory /work
set -e
df -a
cd /work/gluten
export CCACHE_DIR=/work/.ccache
export CCACHE_SLOPPINESS=file_macro,locale,time_macros
mkdir -p /work/.ccache
ccache -sz
source /opt/rh/devtoolset-11/enable
source /opt/rh/rh-git227/enable
export NUM_THREADS=4
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --velox_home=/work
ccache -s
mkdir -p /work/.m2/repository/org/apache/arrow/
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
"
- uses: actions/upload-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./gluten/cpp/build/releases/
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: .m2/repository/org/apache/arrow/
if-no-files-found: error

spark-test-spark32:
needs: gluten-cpp-build
runs-on: ubuntu-22.04
container: apache/gluten:centos-8-jdk8
steps:
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./gluten/cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Build package for Spark 3.2
run: |
cd $GITHUB_WORKSPACE/gluten
export SPARK_SCALA_VERSION=2.12
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean package -Pspark-3.2 -Pbackends-velox -Piceberg -Pdelta -Phudi -DskipTests

spark-test-spark34:
needs: gluten-cpp-build
runs-on: ubuntu-22.04
container: apache/gluten:centos-8-jdk8
steps:
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./gluten/cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare spark.test.home for Spark 3.4.4 (other tests)
run: |
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools==77.0.3 && \
pip3 install pyspark==3.4.4 cython && \
pip3 install pandas==2.2.3 pyarrow==20.0.0
- name: Build and Run unit test for Spark 3.4.4 (other tests)
run: |
cd $GITHUB_WORKSPACE/gluten
export SPARK_SCALA_VERSION=2.12
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l $SPARK_HOME
$MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Pspark-ut \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest \
-DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-golden-files
path: /tmp/tpch-approved-plan/**

spark-test-spark34-slow:
needs: gluten-cpp-build
runs-on: ubuntu-22.04
container: apache/gluten:centos-8-jdk8
steps:
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./gluten/cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Build and Run unit test for Spark 3.4.4 (slow tests)
run: |
cd $GITHUB_WORKSPACE/gluten
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l $SPARK_HOME
$MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Pspark-ut -Phudi \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
-DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*
75 changes: 75 additions & 0 deletions .github/workflows/gluten_daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Gluten Daily Build

on:
push:
branches:
- 'main'

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:

gluten-cpp-build:
name: gluten cpp build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'IBM/velox' }}
runs-on: ubuntu-22.04
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
steps:
- uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
BRANCH=$(echo ${GITHUB_REF#refs/heads/})
sed -i 's/oap-project/IBM/g' ep/build-velox/src/get-velox.sh
#sed -i 's/VELOX_BRANCH=2025.*/VELOX_BRANCH=main/g' ep/build-velox/src/get-velox.sh
- name: Build Gluten native libraries
run: |
docker pull apache/gluten:vcpkg-centos-7
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
git config --global --add safe.directory /work
set -e
df -a
cd /work/gluten
export CCACHE_DIR=/work/.ccache
mkdir -p /work/.ccache
source /opt/rh/devtoolset-11/enable
source /opt/rh/rh-git227/enable
export NUM_THREADS=4
ccache -sz
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --velox_home=/work
ccache -s
mkdir -p /work/.m2/repository/org/apache/arrow/
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
"
- name: Save ccache
uses: actions/cache/save@v4
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
15 changes: 0 additions & 15 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,6 @@
name: Linux Build using GCC

on:
push:
branches:
- main
paths:
- velox/**
- '!velox/docs/**'
- CMakeLists.txt
- CMake/**
- scripts/setup-ubuntu.sh
- scripts/setup-common.sh
- scripts/setup-versions.sh
- scripts/setup-helper-functions.sh
- .github/workflows/linux-build.yml
- .github/workflows/linux-build-base.yml

pull_request:
paths:
- velox/**
Expand Down
2 changes: 1 addition & 1 deletion velox/common/memory/tests/SharedArbitratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ DEBUG_ONLY_TEST_P(
folly::EventCount taskPauseWait;
auto taskPauseWaitKey = taskPauseWait.prepareWait();

const auto fakeAllocationSize = kMemoryCapacity - (32L << 20);
const auto fakeAllocationSize = kMemoryCapacity - (2L << 20);

std::atomic<bool> injectAllocationOnce{true};
fakeOperatorFactory_->setAllocationCallback([&](Operator* op) {
Expand Down
6 changes: 3 additions & 3 deletions velox/connectors/hive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
velox_add_library(velox_hive_config OBJECT HiveConfig.cpp)
velox_link_libraries(velox_hive_config velox_core velox_exception)

add_subdirectory(iceberg)

velox_add_library(
velox_hive_connector
OBJECT
Expand All @@ -33,9 +31,11 @@ velox_add_library(
TableHandle.cpp
)

add_subdirectory(iceberg)

velox_link_libraries(
velox_hive_connector
PUBLIC velox_hive_iceberg_splitreader
PRIVATE velox_hive_iceberg_splitreader
PRIVATE velox_common_io velox_connector velox_dwio_catalog_fbhive velox_hive_partition_function
)

Expand Down
5 changes: 5 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,4 +273,9 @@ std::string HiveConfig::schema(const config::ConfigBase* session) const {
kSchema, config_->get<std::string>(kSchema, ""));
}

bool HiveConfig::fanoutEnabled(const config::ConfigBase* session) const {
return session->get<bool>(
kFanoutEnabledSession, config_->get<bool>(kFanoutEnabled, true));
}

} // namespace facebook::velox::connector::hive
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@ class HiveConfig {
static constexpr const char* kSource = "source";
static constexpr const char* kSchema = "schema";

/// Controls the writer mode, whether the fanout mode writer is enabled,
/// default value is true, setting to false means clustered mode.
/// Currently applies only to the Iceberg writer.
static constexpr const char* kFanoutEnabled = "fanout-enabled";
static constexpr const char* kFanoutEnabledSession = "fanout_enabled";

InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
const config::ConfigBase* session) const;

Expand Down Expand Up @@ -305,6 +311,9 @@ class HiveConfig {
/// Schema of the query. Used for storage logging.
std::string schema(const config::ConfigBase* session) const;

/// Return if fanout writer mode is enabled.
bool fanoutEnabled(const config::ConfigBase* session) const;

HiveConfig(std::shared_ptr<const config::ConfigBase> config) {
VELOX_CHECK_NOT_NULL(
config, "Config is null for HiveConfig initialization");
Expand Down
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
#include "velox/expression/ExprConstants.h"
#include "velox/expression/ExprToSubfieldFilter.h"

#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

namespace facebook::velox::connector::hive {
namespace {

Expand Down Expand Up @@ -946,4 +950,9 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
}
return expr;
}

std::string makeUuid() {
return boost::lexical_cast<std::string>(boost::uuids::random_generator()());
}

} // namespace facebook::velox::connector::hive
Loading