Skip to content

Recurring JMH Benchmarks #11

Recurring JMH Benchmarks

Recurring JMH Benchmarks #11

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
name: "Recurring JMH Benchmarks"
on:
workflow_dispatch:
schedule:
# * is a special character in YAML so you have to quote this string
# this schedules a workflow to run at specific UTC times using POSIX cron syntax -> https://crontab.guru/
# we're running benchmarks every Sunday at 00:00 UTC
- cron: '0 0 * * 0'
permissions:
contents: read
jobs:
run-benchmark:
if: github.repository_owner == 'apache'
runs-on: ubuntu-24.04
strategy:
fail-fast: false
max-parallel: 15
matrix:
# TODO: "IcebergSortCompactionBenchmark" seems to run indefinitely
benchmark: ["SparkParquetReadersFlatDataBenchmark", "SparkParquetReadersNestedDataBenchmark",
"SparkParquetWritersFlatDataBenchmark", "SparkParquetWritersNestedDataBenchmark",
"IcebergSourceFlatParquetDataFilterBenchmark",
"IcebergSourceFlatParquetDataReadBenchmark", "IcebergSourceFlatParquetDataWriteBenchmark",
"IcebergSourceNestedListParquetDataWriteBenchmark", "IcebergSourceNestedParquetDataFilterBenchmark",
"IcebergSourceNestedParquetDataReadBenchmark", "IcebergSourceNestedParquetDataWriteBenchmark",
"IcebergSourceParquetEqDeleteBenchmark", "IcebergSourceParquetMultiDeleteFileBenchmark",
"IcebergSourceParquetPosDeleteBenchmark", "IcebergSourceParquetWithUnrelatedDeleteBenchmark"]
spark: ['4.1']
scala: ['2.13']
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
with:
distribution: zulu
java-version: 17
- uses: gradle/actions/setup-gradle@0723195856401067f7a2779048b490ace7a47d7c # v5.0.2
with:
# Read-only: small job; restore opportunistically from other jobs' caches but never write.
cache-read-only: true
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- name: Run Benchmark
run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala }} :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala }}:jmh -PjmhIncludeRegex=${{ matrix.benchmark }} -PjmhOutputPath=benchmark/${{ matrix.benchmark }}.txt -PjmhJsonOutputPath=benchmark/${{ matrix.benchmark }}.json
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
if: ${{ always() }}
with:
name: benchmark-${{ matrix.benchmark }}
path: |
**/benchmark/*