data-generation run - tpch - sf0.1 #95
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: data-generation run | |
| run-name: data-generation run - ${{ inputs.scenario || github.event.inputs.scenario || 'tpch' }} - sf${{ inputs.scale_factor || github.event.inputs.scale_factor || '0.01' }} | |
| on: | |
| workflow_call: | |
| inputs: | |
| scenario: | |
| required: false | |
| default: 'tpch' | |
| type: string | |
| scale_factor: | |
| required: false | |
| default: '0.01' | |
| type: string | |
| bucket: | |
| required: false | |
| default: 'spiceai-public-datasets' | |
| type: string | |
| prefix: | |
| required: false | |
| default: 'data-gen' | |
| type: string | |
| region: | |
| required: false | |
| default: 'us-east-1' | |
| type: string | |
| num_steps: | |
| required: false | |
| default: '25' | |
| type: string | |
| checkpoint_interval_steps: | |
| required: false | |
| default: '100' | |
| type: string | |
| update_ratio: | |
| required: false | |
| default: '0' | |
| type: string | |
| delete_ratio: | |
| required: false | |
| default: '0' | |
| type: string | |
| secrets: | |
| MINIO_ENDPOINT: | |
| required: true | |
| MINIO_ACCESS_KEY_ID: | |
| required: true | |
| MINIO_SECRET_ACCESS_KEY: | |
| required: true | |
| workflow_dispatch: | |
| inputs: | |
| scenario: | |
| description: 'Dataset/scenario to generate (e.g. tpch)' | |
| required: true | |
| default: 'tpch' | |
| type: string | |
| scale_factor: | |
| description: 'TPC-H scale factor' | |
| required: true | |
| default: '0.01' | |
| type: string | |
| bucket: | |
| description: 'S3 bucket name' | |
| required: false | |
| default: 'spicebench' | |
| type: string | |
| prefix: | |
| description: 'Base S3 key prefix for generated files (scenario is appended automatically)' | |
| required: false | |
| default: 'data-gen' | |
| type: string | |
| region: | |
| description: 'AWS region' | |
| required: true | |
| default: 'us-east-1' | |
| type: string | |
| num_steps: | |
| description: 'Number of data generation steps (partitions for TPC-H dbgen)' | |
| required: false | |
| default: '20' | |
| type: string | |
| checkpoint_interval_steps: | |
| description: 'Every N steps to take a checkpoint' | |
| required: false | |
| default: '10' | |
| type: string | |
| update_ratio: | |
| description: 'Ratio of rows to update per step (0.0 to 1.0)' | |
| required: false | |
| default: '0' | |
| type: string | |
| delete_ratio: | |
| description: 'Ratio of rows to delete per step (0.0 to 1.0)' | |
| required: false | |
| default: '0' | |
| type: string | |
| jobs: | |
| run-data-generation: | |
| name: Run data generation | |
| runs-on: spiceai-dev-runners | |
| timeout-minutes: 600 | |
| steps: | |
| - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 | |
| - name: Cache spicebench binary | |
| id: cache-spicebench | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.spice/bin/spicebench | |
| key: spicebench-duckdb-${{ runner.os }}-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml', '**/*.rs') }} | |
| restore-keys: | | |
| spicebench-duckdb-${{ runner.os }}- | |
| - name: Setup Rust toolchain | |
| if: steps.cache-spicebench.outputs.cache-hit != 'true' | |
| uses: actions-rust-lang/setup-rust-toolchain@v1 | |
| with: | |
| toolchain: 1.91 | |
| cache: false | |
| - name: Setup CC | |
| if: steps.cache-spicebench.outputs.cache-hit != 'true' | |
| uses: ./.github/actions/setup-cc | |
| - name: Build spicebench | |
| if: steps.cache-spicebench.outputs.cache-hit != 'true' | |
| run: | | |
| mkdir -p ~/.spice/bin | |
| cargo build --release --features duckdb -p spicebench | |
| install -m 755 target/release/spicebench ~/.spice/bin/spicebench | |
| - name: Run data generation | |
| env: | |
| SCENARIO: ${{ inputs.scenario || github.event.inputs.scenario || 'tpch' }} | |
| SCALE_FACTOR: ${{ inputs.scale_factor || github.event.inputs.scale_factor || '0.01' }} | |
| BUCKET: ${{ inputs.bucket || github.event.inputs.bucket }} | |
| PREFIX: ${{ inputs.prefix || github.event.inputs.prefix || 'data-gen' }} | |
| REGION: ${{ inputs.region || github.event.inputs.region || 'us-east-1' }} | |
| NUM_STEPS: ${{ inputs.num_steps || github.event.inputs.num_steps || '25' }} | |
| MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }} | |
| UPDATE_RATIO: ${{ inputs.update_ratio || github.event.inputs.update_ratio }} | |
| DELETE_RATIO: ${{ inputs.delete_ratio || github.event.inputs.delete_ratio }} | |
| RUST_LOG: info | |
| run: | | |
| ARGS="--dataset ${SCENARIO}" | |
| ARGS="${ARGS} --scenario ${SCENARIO}" | |
| ARGS="${ARGS} --scale-factor ${SCALE_FACTOR}" | |
| ARGS="${ARGS} --bucket ${BUCKET}" | |
| ARGS="${ARGS} --prefix ${PREFIX}" | |
| ARGS="${ARGS} --region ${REGION}" | |
| ARGS="${ARGS} --num-steps ${NUM_STEPS}" | |
| ARGS="${ARGS} --endpoint ${MINIO_ENDPOINT}" | |
| if [ -n "${UPDATE_RATIO}" ]; then | |
| ARGS="${ARGS} --update-ratio ${UPDATE_RATIO}" | |
| fi | |
| if [ -n "${DELETE_RATIO}" ]; then | |
| ARGS="${ARGS} --delete-ratio ${DELETE_RATIO}" | |
| fi | |
| echo "Running: spicebench generate ${ARGS}" | |
| ~/.spice/bin/spicebench generate ${ARGS} | |
| - name: Run checkpointer | |
| env: | |
| SCENARIO: ${{ inputs.scenario || github.event.inputs.scenario || 'tpch' }} | |
| SCALE_FACTOR: ${{ inputs.scale_factor || github.event.inputs.scale_factor || '0.01' }} | |
| BUCKET: ${{ inputs.bucket || github.event.inputs.bucket }} | |
| PREFIX: ${{ inputs.prefix || github.event.inputs.prefix || 'data-gen' }} | |
| REGION: ${{ inputs.region || github.event.inputs.region || 'us-east-1' }} | |
| CHECKPOINT_INTERVAL_STEPS: ${{ inputs.checkpoint_interval_steps || github.event.inputs.checkpoint_interval_steps || '100' }} | |
| MINIO_ENDPOINT: ${{ secrets.MINIO_ENDPOINT }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.MINIO_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.MINIO_SECRET_ACCESS_KEY }} | |
| RUST_LOG: info | |
| run: | | |
| VERSION=$(python3 - <<'PY' | |
| import os | |
| sf = float(os.environ["SCALE_FACTOR"]) | |
| s = format(sf, ".15g") | |
| if "e" in s or "E" in s: | |
| s = ("%.15f" % sf).rstrip("0").rstrip(".") | |
| if "." not in s: | |
| s = f"{s}.0" | |
| print(s) | |
| PY | |
| ) | |
| ARGS="--scenario ${SCENARIO}" | |
| ARGS="${ARGS} --version ${VERSION}" | |
| ARGS="${ARGS} --bucket ${BUCKET}" | |
| ARGS="${ARGS} --prefix ${PREFIX}" | |
| ARGS="${ARGS} --duckdb-path ./checkpointer.duckdb" | |
| ARGS="${ARGS} --checkpoint-dir ./checkpoints" | |
| if [ -n "${REGION}" ]; then | |
| ARGS="${ARGS} --region ${REGION}" | |
| fi | |
| ARGS="${ARGS} --checkpoint-interval-steps ${CHECKPOINT_INTERVAL_STEPS}" | |
| ARGS="${ARGS} --endpoint ${MINIO_ENDPOINT}" | |
| echo "Running: spicebench checkpoint ${ARGS}" | |
| ~/.spice/bin/spicebench checkpoint ${ARGS} |