Skip to content

Commit f16abd3

Browse files
authored
Data Generation Implementation (#11)
* Data ingestion implementation * Fix * Update github workflow * Fix
1 parent daa45b8 commit f16abd3

13 files changed

Lines changed: 1711 additions & 126 deletions

File tree

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: data-generation run
2+
run-name: data-generation run - sf${{ github.event.inputs.scale_factor }} - ${{ github.event.inputs.num_steps }} steps
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
scale_factor:
8+
description: 'TPC-H scale factor'
9+
required: true
10+
default: '1.0'
11+
type: string
12+
num_steps:
13+
description: 'Number of data generation steps'
14+
required: false
15+
default: '100'
16+
type: string
17+
bucket:
18+
description: 'S3 bucket name'
19+
required: true
20+
type: string
21+
prefix:
22+
description: 'S3 key prefix for generated files'
23+
required: false
24+
default: 'data-gen/tpch'
25+
type: string
26+
max_concurrency:
27+
description: 'Maximum number of concurrent S3 writes'
28+
required: true
29+
default: '8'
30+
type: string
31+
region:
32+
description: 'AWS region'
33+
required: true
34+
default: 'us-east-1'
35+
type: string
36+
skip_initial:
37+
description: 'Skip data ingested by running --initial command'
38+
required: false
39+
default: false
40+
type: boolean
41+
42+
jobs:
43+
run-data-generation:
44+
name: Run data generation
45+
runs-on: ${{ github.event.inputs.runner_type }}
46+
timeout-minutes: 600
47+
steps:
48+
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
49+
50+
- name: Install Rust
51+
uses: dtolnay/rust-toolchain@stable
52+
53+
- name: Build data-generation
54+
run: cargo build --release -p data-generation
55+
56+
- name: Run data generation
57+
run: |
58+
ARGS="--scale-factor ${{ github.event.inputs.scale_factor }}"
59+
ARGS="${ARGS} --num-steps ${{ github.event.inputs.num_steps }}"
60+
ARGS="${ARGS} --bucket ${{ github.event.inputs.bucket }}"
61+
ARGS="${ARGS} --prefix ${{ github.event.inputs.prefix }}"
62+
ARGS="${ARGS} --max-concurrency ${{ github.event.inputs.max_concurrency }}"
63+
ARGS="${ARGS} --region ${{ github.event.inputs.region }}"
64+
65+
if [ "${{ github.event.inputs.skip_initial }}" = "true" ]; then
66+
ARGS="${ARGS} --skip-initial"
67+
fi
68+
69+
echo "Running: data-generation run ${ARGS}"
70+
./target/release/data-generation run ${ARGS}
71+
env:
72+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
73+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
74+
RUST_LOG: info

0 commit comments

Comments
 (0)