Skip to content

Commit aa37736

Browse files
authored
build: fix OOM on standard GitHub runners for Spark SQL tests (#4285)
1 parent 53d65cb commit aa37736

12 files changed

Lines changed: 60 additions & 39 deletions

.github/workflows/codeql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ permissions:
3636
jobs:
3737
analyze:
3838
name: Analyze Actions
39-
runs-on: ubuntu-latest
39+
runs-on: ubuntu-24.04
4040
permissions:
4141
contents: read
4242
security-events: write

.github/workflows/miri.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ on:
5151
jobs:
5252
miri:
5353
name: "Miri"
54-
runs-on: ubuntu-latest
54+
runs-on: ubuntu-24.04
5555
steps:
5656
- uses: actions/checkout@v6
5757
- name: Install Build Dependencies

.github/workflows/pr_benchmark_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ env:
4646
jobs:
4747
benchmark-check:
4848
name: Benchmark Compile & Lint Check
49-
runs-on: ubuntu-latest
49+
runs-on: ubuntu-24.04
5050
container:
5151
image: amd64/rust
5252
steps:

.github/workflows/pr_build_linux.yml

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
# Fast lint check - gates all other jobs
6464
lint:
6565
name: Lint
66-
runs-on: ubuntu-latest
66+
runs-on: ubuntu-24.04
6767
container:
6868
image: amd64/rust
6969
steps:
@@ -77,7 +77,7 @@ jobs:
7777
lint-java:
7878
needs: lint
7979
name: Lint Java (${{ matrix.profile.name }})
80-
runs-on: ubuntu-latest
80+
runs-on: ubuntu-24.04
8181
container:
8282
image: amd64/rust
8383
env:
@@ -103,7 +103,6 @@ jobs:
103103
# spark-4.2 profiles.
104104
fail-fast: false
105105
steps:
106-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
107106
- uses: actions/checkout@v6
108107

109108
- name: Setup Rust & Java toolchain
@@ -154,7 +153,7 @@ jobs:
154153
build-spark-4-1:
155154
needs: lint
156155
name: Build Spark 4.1, JDK 17
157-
runs-on: ubuntu-latest
156+
runs-on: ubuntu-24.04
158157
container:
159158
image: amd64/rust
160159
steps:
@@ -183,11 +182,10 @@ jobs:
183182
build-native:
184183
needs: lint
185184
name: Build Native Library
186-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=8,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
185+
runs-on: ubuntu-24.04
187186
container:
188187
image: amd64/rust
189188
steps:
190-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
191189
- uses: actions/checkout@v6
192190
- name: Setup Rust toolchain
193191
uses: ./.github/actions/setup-builder
@@ -236,12 +234,10 @@ jobs:
236234
linux-test-rust:
237235
needs: lint
238236
name: ubuntu-latest/rust-test
239-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
237+
runs-on: ubuntu-24.04
240238
container:
241239
image: amd64/rust
242240
steps:
243-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
244-
245241
- uses: actions/checkout@v6
246242

247243
- name: Setup Rust & Java toolchain
@@ -292,11 +288,6 @@ jobs:
292288
maven_opts: "-Pspark-3.5 -Pscala-2.13"
293289
scan_impl: "native_iceberg_compat"
294290

295-
- name: "Spark 4.0, JDK 17"
296-
java_version: "17"
297-
maven_opts: "-Pspark-4.0"
298-
scan_impl: "auto"
299-
300291
- name: "Spark 4.0, JDK 21"
301292
java_version: "21"
302293
maven_opts: "-Pspark-4.0"
@@ -400,14 +391,13 @@ jobs:
400391
org.apache.spark.sql.CometCollationSuite
401392
fail-fast: false
402393
name: ${{ matrix.profile.name }}/${{ matrix.profile.scan_impl }} [${{ matrix.suite.name }}]
403-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
394+
runs-on: ubuntu-24.04
404395
container:
405396
image: amd64/rust
406397
env:
407398
JAVA_TOOL_OPTIONS: ${{ (matrix.profile.java_version == '17' || matrix.profile.java_version == '21') && '--add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED' || '' }}
408399

409400
steps:
410-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
411401
- uses: actions/checkout@v6
412402

413403
- name: Setup Rust & Java toolchain
@@ -448,14 +438,12 @@ jobs:
448438
verify-benchmark-results-tpch:
449439
needs: build-native
450440
name: Verify TPC-H Results
451-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
441+
runs-on: ubuntu-24.04
452442
container:
453443
image: amd64/rust
454444
env:
455445
JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
456446
steps:
457-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
458-
459447
- uses: actions/checkout@v6
460448

461449
- name: Setup Rust & Java toolchain
@@ -504,7 +492,7 @@ jobs:
504492
verify-benchmark-results-tpcds:
505493
needs: build-native
506494
name: Verify TPC-DS Results (${{ matrix.join }})
507-
runs-on: ${{ github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest' }}
495+
runs-on: ubuntu-24.04
508496
container:
509497
image: amd64/rust
510498
env:
@@ -514,8 +502,6 @@ jobs:
514502
join: [sort_merge, broadcast, hash]
515503
fail-fast: false
516504
steps:
517-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
518-
519505
- uses: actions/checkout@v6
520506

521507
- name: Setup Rust & Java toolchain

.github/workflows/pr_markdown_format.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ on:
2828

2929
jobs:
3030
prettier-check:
31-
runs-on: ubuntu-latest
31+
runs-on: ubuntu-24.04
3232
steps:
3333
- uses: actions/checkout@v6
3434

.github/workflows/pr_missing_suites.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ on:
3030

3131
jobs:
3232
check-missing-suites:
33-
runs-on: ubuntu-latest
33+
runs-on: ubuntu-24.04
3434
steps:
3535
- uses: actions/checkout@v6
3636
- name: Check Missing Suites

.github/workflows/pr_title_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ on:
2727

2828
jobs:
2929
check-pr-title:
30-
runs-on: ubuntu-latest
30+
runs-on: ubuntu-24.04
3131
steps:
3232
- uses: actions/checkout@v6
3333
- name: Check PR title

.github/workflows/spark_sql_test.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ jobs:
126126
matrix:
127127
module:
128128
- {name: "catalyst", args1: "catalyst/test", args2: ""}
129-
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
129+
- {name: "sql_core-1", args1: "", args2: "sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest"}
130130
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
131131
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
132132
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
@@ -137,19 +137,14 @@ jobs:
137137
config:
138138
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto'}
139139
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto'}
140-
- {spark-short: '4.0', spark-full: '4.0.2', java: 17, scan-impl: 'auto'}
141140
- {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'}
142141
- {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'}
143142
fail-fast: false
144143
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }}
145-
# Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite
146-
# relies on an Ivy 'local-m2-cache' resolver that the runs-on.com
147-
# ubuntu24-full-x64 image does not provide, so spark-submit fails there.
148-
runs-on: ${{ startsWith(matrix.module.name, 'sql_hive') && 'ubuntu-24.04' || (github.repository_owner == 'apache' && format('runs-on={0},family=m8a+m7a+c8a,cpu=16,image=ubuntu24-full-x64,extras=s3-cache,disk=large,tag=datafusion-comet', github.run_id) || 'ubuntu-latest') }}
144+
runs-on: ubuntu-24.04
149145
container:
150146
image: amd64/rust
151147
steps:
152-
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
153148
- uses: actions/checkout@v6
154149
- name: Setup Rust & Java toolchain
155150
uses: ./.github/actions/setup-builder
@@ -172,12 +167,18 @@ jobs:
172167
cd apache-spark
173168
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
174169
NOLINT_ON_COMPILE=true ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=${{ matrix.config.scan-impl }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
175-
build/sbt -Dsbt.log.noformat=true -mem 6144 ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
170+
build/sbt -Dsbt.log.noformat=true -mem $SBT_MEM ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
176171
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
177172
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
178173
fi
179174
env:
180175
LC_ALL: "C.UTF-8"
176+
# Standard GitHub runners have 7 GB RAM; cap SBT heap so forked test
177+
# JVMs fit alongside it.
178+
SBT_MEM: "3072"
179+
# Disable parallel test execution to reduce peak memory usage —
180+
# mirrors what apache/spark does on GitHub Actions.
181+
SERIAL_SBT_TESTS: "1"
181182
# Mirror Spark's own JDK 21 / 25 CI workaround. apache/spark's
182183
# build_java21.yml and build_java25.yml set this same env var to
183184
# process-isolate the V1/V2 Parquet and Orc source suites because

.github/workflows/validate_workflows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232

3333
jobs:
3434
validate:
35-
runs-on: ubuntu-latest
35+
runs-on: ubuntu-24.04
3636
steps:
3737
- uses: actions/checkout@v6
3838

dev/diffs/3.4.3.diff

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ index b5b34922694..a72403780c4 100644
918918
protected val baseResourcePath = {
919919
// use the same way as `SQLQueryTestSuite` to get the resource path
920920
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
921-
index 525d97e4998..8a3e7457618 100644
921+
index 525d97e4998..f600e162da3 100644
922922
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
923923
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
924924
@@ -1508,7 +1508,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
@@ -931,6 +931,16 @@ index 525d97e4998..8a3e7457618 100644
931931
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
932932
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
933933
}
934+
@@ -3730,7 +3731,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
935+
}
936+
}
937+
938+
- test("SPARK-33084: Add jar support Ivy URI in SQL") {
939+
+ test("SPARK-33084: Add jar support Ivy URI in SQL",
940+
+ IgnoreComet("Flaky: depends on external Maven Central download of legacy Hadoop/Hive jars")) {
941+
val sc = spark.sparkContext
942+
val hiveVersion = "2.3.9"
943+
// transitive=false, only download specified jar
934944
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
935945
index 48ad10992c5..51d1ee65422 100644
936946
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

0 commit comments

Comments
 (0)