Skip to content

Commit 247f1cc

Browse files
authored
chore: Use MinIO instead of S3 on spiceai-dev-runners (#176)
* always run on spiceai-dev-runners, use minio for storage * setup cc * wip * update * build cache fix
1 parent 71d8439 commit 247f1cc

2 files changed

Lines changed: 41 additions & 49 deletions

File tree

.github/workflows/data_generation_run.yml

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@ on:
3232
required: false
3333
default: '100'
3434
type: string
35-
runner_type:
36-
required: false
37-
default: 'ubuntu-latest'
38-
type: string
3935
secrets:
40-
AWS_ACCESS_KEY_ID:
36+
MINIO_ENDPOINT:
4137
required: true
42-
AWS_SECRET_ACCESS_KEY:
38+
MINIO_ACCESS_KEY_ID:
39+
required: true
40+
MINIO_SECRET_ACCESS_KEY:
4341
required: true
4442
workflow_dispatch:
4543
inputs:
@@ -48,11 +46,6 @@ on:
4846
required: true
4947
default: 'tpch'
5048
type: string
51-
runner_type:
52-
description: 'GitHub runner label to execute on'
53-
required: false
54-
default: 'spiceai-macos'
55-
type: string
5649
scale_factor:
5750
description: 'TPC-H scale factor'
5851
required: true
@@ -61,7 +54,7 @@ on:
6154
bucket:
6255
description: 'S3 bucket name'
6356
required: false
64-
default: 'spiceai-public-datasets'
57+
default: 'spicebench'
6558
type: string
6659
prefix:
6760
description: 'Base S3 key prefix for generated files (scenario is appended automatically)'
@@ -87,7 +80,7 @@ on:
8780
jobs:
8881
run-data-generation:
8982
name: Run data generation
90-
runs-on: ${{ inputs.runner_type || github.event.inputs.runner_type || 'ubuntu-latest' }}
83+
runs-on: spiceai-dev-runners
9184
timeout-minutes: 600
9285
steps:
9386
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
@@ -101,13 +94,26 @@ jobs:
10194
restore-keys: |
10295
data-generation-${{ runner.os }}-
10396
97+
- name: Cache checkpointer binary
98+
id: cache-checkpointer
99+
uses: actions/cache@v4
100+
with:
101+
path: ~/.spice/bin/checkpointer
102+
key: checkpointer-${{ runner.os }}-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml', '**/*.rs') }}
103+
restore-keys: |
104+
checkpointer-${{ runner.os }}-
105+
104106
- name: Setup Rust toolchain
105-
if: steps.cache-data-generation.outputs.cache-hit != 'true'
107+
if: steps.cache-data-generation.outputs.cache-hit != 'true' || steps.cache-checkpointer.outputs.cache-hit != 'true'
106108
uses: actions-rust-lang/setup-rust-toolchain@v1
107109
with:
108110
toolchain: 1.91
109111
cache: false
110112

113+
- name: Setup CC
114+
if: steps.cache-data-generation.outputs.cache-hit != 'true' || steps.cache-checkpointer.outputs.cache-hit != 'true'
115+
uses: ./.github/actions/setup-cc
116+
111117
- name: Build data-generation
112118
if: steps.cache-data-generation.outputs.cache-hit != 'true'
113119
run: |
@@ -123,8 +129,9 @@ jobs:
123129
PREFIX: ${{ inputs.prefix || github.event.inputs.prefix || 'data-gen' }}
124130
REGION: ${{ inputs.region || github.event.inputs.region || 'us-east-1' }}
125131
NUM_STEPS: ${{ inputs.num_steps || github.event.inputs.num_steps || '25' }}
126-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
127-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
132+
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
133+
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
134+
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
128135
RUST_LOG: info
129136
run: |
130137
ARGS="--dataset ${SCENARIO}"
@@ -134,19 +141,11 @@ jobs:
134141
ARGS="${ARGS} --prefix ${PREFIX}"
135142
ARGS="${ARGS} --region ${REGION}"
136143
ARGS="${ARGS} --num-steps ${NUM_STEPS}"
144+
ARGS="${ARGS} --endpoint ${MINIO_ENDPOINT}"
137145
138146
echo "Running: data-generation run ${ARGS}"
139147
~/.spice/bin/data-generation run ${ARGS}
140148
141-
- name: Cache checkpointer binary
142-
id: cache-checkpointer
143-
uses: actions/cache@v4
144-
with:
145-
path: ~/.spice/bin/checkpointer
146-
key: checkpointer-${{ runner.os }}-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml', '**/*.rs') }}
147-
restore-keys: |
148-
checkpointer-${{ runner.os }}-
149-
150149
- name: Build checkpointer
151150
if: steps.cache-checkpointer.outputs.cache-hit != 'true'
152151
run: |
@@ -162,8 +161,9 @@ jobs:
162161
PREFIX: ${{ inputs.prefix || github.event.inputs.prefix || 'data-gen' }}
163162
REGION: ${{ inputs.region || github.event.inputs.region || 'us-east-1' }}
164163
CHECKPOINT_INTERVAL_STEPS: ${{ inputs.checkpoint_interval_steps || github.event.inputs.checkpoint_interval_steps || '100' }}
165-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
166-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
164+
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
165+
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
166+
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
167167
RUST_LOG: info
168168
run: |
169169
VERSION=$(python3 - <<'PY'
@@ -191,6 +191,7 @@ jobs:
191191
fi
192192
193193
ARGS="${ARGS} --checkpoint-interval-steps ${CHECKPOINT_INTERVAL_STEPS}"
194+
ARGS="${ARGS} --endpoint ${MINIO_ENDPOINT}"
194195
195196
echo "Running: checkpointer ${ARGS}"
196197
~/.spice/bin/checkpointer ${ARGS}

.github/workflows/run_spicebench.yml

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ on:
2121
etl_bucket:
2222
description: 'S3 bucket for ETL source and target data'
2323
required: true
24-
default: 'spiceai-public-datasets'
24+
default: 'spicebench'
2525
type: string
2626
etl_prefix:
2727
description: 'S3 key prefix (the {prefix} portion of {prefix}/{scenario}/{version}/)'
@@ -53,14 +53,6 @@ on:
5353
required: false
5454
default: false
5555
type: boolean
56-
runner:
57-
description: 'GitHub runner label to use for this workflow run'
58-
required: true
59-
default: ubuntu-latest
60-
type: choice
61-
options:
62-
- ubuntu-latest
63-
- spiceai-dev-runners
6456
scheduler_state_location:
6557
description: 'S3 URI for shared scheduler state (e.g. s3://bucket/scheduler-state/). If empty, scheduler state is not configured.'
6658
required: false
@@ -70,7 +62,7 @@ on:
7062
jobs:
7163
run-spicebench:
7264
name: Run spicebench
73-
runs-on: ${{ github.event.inputs.runner || 'ubuntu-latest' }}
65+
runs-on: spiceai-dev-runners
7466
timeout-minutes: 600
7567
steps:
7668
- uses: actions/checkout@v6
@@ -80,8 +72,8 @@ jobs:
8072
- uses: ./.github/actions/management-login
8173
if: ${{ github.event.inputs.system_under_test == 'spice_cloud' }}
8274
with:
83-
client-id: ${{ secrets.PEASEE_MANAGEMENT_CLIENT_ID }}
84-
client-secret: ${{ secrets.PEASEE_MANAGEMENT_CLIENT_SECRET }}
75+
client-id: ${{ secrets.SPICE_MANAGEMENT_CLIENT_ID }}
76+
client-secret: ${{ secrets.SPICE_MANAGEMENT_CLIENT_SECRET }}
8577

8678
- name: Log in to GHCR
8779
if: ${{ github.event.inputs.system_under_test == 'spice_cloud' }}
@@ -222,19 +214,15 @@ jobs:
222214
run: |
223215
set -euo pipefail
224216
curl -LsSf https://dbc.columnar.tech/install.sh | sh
225-
if [ "${{ github.event.inputs.runner || 'ubuntu-latest' }}" = "spiceai-dev-runners" ]; then
226-
source "$HOME/.local/bin/env"
227-
fi
217+
source "$HOME/.local/bin/env"
228218
dbc install postgresql
229219
230220
- name: Install ADBC FlightSQL driver
231221
if: ${{ !startsWith(github.event.inputs.system_under_test || 'spice_cloud', 'databricks-') }}
232222
run: |
233223
set -euo pipefail
234224
curl -LsSf https://dbc.columnar.tech/install.sh | sh
235-
if [ "${{ github.event.inputs.runner || 'ubuntu-latest' }}" = "spiceai-dev-runners" ]; then
236-
source "$HOME/.local/bin/env"
237-
fi
225+
source "$HOME/.local/bin/env"
238226
dbc install flightsql
239227
240228
- name: Run spicebench
@@ -261,8 +249,11 @@ jobs:
261249
VALIDATE_CHECKPOINT_RESULTS: ${{ github.event.inputs.validate_checkpoint_results || 'false' }}
262250
ENABLE_MODULE_DEBUG_LOGGING: ${{ github.event.inputs.enable_module_debug_logging || 'false' }}
263251
SPICEAI_BENCHMARK_METRICS_KEY: ${{ secrets.SPICEAI_BENCHMARK_METRICS_KEY }}
264-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
265-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
252+
MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }}
253+
AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }}
254+
AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }}
255+
S3_AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
256+
S3_AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
266257
LAKEBASE_PG_HOST: ${{ secrets.LAKEBASE_PG_HOST }}
267258
LAKEBASE_PG_USER: ${{ secrets.LAKEBASE_PG_USER }}
268259
LAKEBASE_PG_DB_NAME: ${{ secrets.LAKEBASE_PG_DB_NAME }}
@@ -280,7 +271,7 @@ jobs:
280271
281272
TABLE_FORMAT="parquet"
282273
EXECUTOR_INSTANCE_TYPE="github-hosted-ubuntu-latest"
283-
ETL_ENDPOINT=""
274+
ETL_ENDPOINT="${MINIO_ENDPOINT}"
284275
DATABRICKS_TABLE_FORMAT="${TABLE_FORMAT}"
285276
SYSTEM_UNDER_TEST_PREFIX="${SYSTEM_UNDER_TEST%%-*}"
286277
ETL_ARGS="--etl-bucket ${ETL_BUCKET} --scale-factor ${SCALE_FACTOR}"
@@ -327,7 +318,7 @@ jobs:
327318
fi
328319
else
329320
ADAPTER_CMD="docker"
330-
ADAPTER_ARGS="run -i -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM ghcr.io/spiceai/spidapter:latest stdio --verbose --channel nightly"
321+
ADAPTER_ARGS="run -i -e SPICEAI_API_KEY -e SPICE_CLOUD_API_URL -e AWS_ACCESS_KEY_ID=${S3_AWS_ACCESS_KEY_ID} -e AWS_SECRET_ACCESS_KEY=${S3_AWS_SECRET_ACCESS_KEY} -e SPIDAPTER_ICEBERG_REGION -e SPIDAPTER_ICEBERG_CATALOG_FROM ghcr.io/spiceai/spidapter:latest stdio --verbose --channel nightly"
331322
ADAPTER_ENVS=""
332323
fi
333324

0 commit comments

Comments
 (0)