Skip to content

Commit 73776d4

Browse files
authored
fix: Use unique ETL target prefixes (#53)
* fix: Use unique ETL target prefixes * fix
1 parent 383b3f1 commit 73776d4

6 files changed

Lines changed: 31 additions & 13 deletions

File tree

.github/workflows/run_spicebench.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ on:
2727
required: false
2828
default: 'data-gen/tpch_sf1'
2929
type: string
30-
etl_target_prefix:
31-
description: 'S3 key prefix for ETL target (rehydrated) data'
30+
etl_target_base_prefix:
31+
description: 'Base S3 key prefix for ETL target (rehydrated) data. A random suffix is appended per run.'
3232
required: false
33-
default: 'rehydrated/tpch_sf1'
33+
default: 'rehydrated'
3434
type: string
3535
etl_region:
3636
description: 'AWS region for the ETL S3 bucket'
@@ -197,7 +197,7 @@ jobs:
197197
SYSTEM_ADAPTER: ${{ github.event.inputs.system_adapter || 'spidapter' }}
198198
ETL_BUCKET: ${{ github.event.inputs.etl_bucket }}
199199
ETL_SOURCE_PREFIX: ${{ github.event.inputs.etl_source_prefix }}
200-
ETL_TARGET_PREFIX: ${{ github.event.inputs.etl_target_prefix }}
200+
ETL_TARGET_BASE_PREFIX: ${{ github.event.inputs.etl_target_base_prefix }}
201201
ETL_REGION: ${{ github.event.inputs.etl_region || 'us-east-1' }}
202202
ETL_ENDPOINT: ${{ github.event.inputs.etl_endpoint }}
203203
ETL_NUM_STEPS: ${{ github.event.inputs.etl_num_steps || '25' }}
@@ -209,8 +209,8 @@ jobs:
209209
if [ -n "${ETL_SOURCE_PREFIX}" ]; then
210210
ETL_ARGS="${ETL_ARGS} --etl-source-prefix ${ETL_SOURCE_PREFIX}"
211211
fi
212-
if [ -n "${ETL_TARGET_PREFIX}" ]; then
213-
ETL_ARGS="${ETL_ARGS} --etl-target-prefix ${ETL_TARGET_PREFIX}"
212+
if [ -n "${ETL_TARGET_BASE_PREFIX}" ]; then
213+
ETL_ARGS="${ETL_ARGS} --etl-target-base-prefix ${ETL_TARGET_BASE_PREFIX}"
214214
fi
215215
if [ -n "${ETL_REGION}" ]; then
216216
ETL_ARGS="${ETL_ARGS} --etl-region ${ETL_REGION}"

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/etl/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ tokio.workspace = true
2525
tokio-util.workspace = true
2626
tracing.workspace = true
2727
tracing-subscriber = { workspace = true, features = ["env-filter"] }
28+
uuid = { workspace = true, features = ["v4"] }

crates/etl/src/main.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ struct Cli {
4646
#[arg(long, default_value = "")]
4747
source_prefix: String,
4848

49-
/// S3 key prefix for target (rehydrated) data
49+
/// Base S3 key prefix for target (rehydrated) data.
50+
/// A random suffix is appended automatically to create a unique destination per run.
5051
#[arg(long, default_value = "")]
51-
target_prefix: String,
52+
target_base_prefix: String,
5253

5354
/// AWS region
5455
#[arg(long)]
@@ -88,9 +89,15 @@ impl Cli {
8889
}
8990

9091
fn target_config(&self) -> TargetConfig {
92+
let run_suffix = uuid::Uuid::new_v4().to_string();
93+
let prefix = if self.target_base_prefix.is_empty() {
94+
run_suffix
95+
} else {
96+
format!("{}/{run_suffix}", self.target_base_prefix)
97+
};
9198
TargetConfig {
9299
bucket: self.bucket.clone(),
93-
prefix: self.target_prefix.clone(),
100+
prefix,
94101
region: self.region.clone(),
95102
endpoint: self.endpoint.clone(),
96103
}
@@ -117,7 +124,7 @@ async fn main() -> anyhow::Result<()> {
117124
dataset = %cli.dataset,
118125
bucket = %cli.bucket,
119126
source_prefix = %cli.source_prefix,
120-
target_prefix = %cli.target_prefix,
127+
target_base_prefix = %cli.target_base_prefix,
121128
scale_factor = cli.scale_factor,
122129
num_steps = cli.num_steps,
123130
"Starting ETL pipeline"

src/args/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,10 @@ pub struct CommonArgs {
8585
#[arg(long, default_value = "")]
8686
pub(crate) etl_source_prefix: String,
8787

88-
/// S3 key prefix for the ETL target (rehydrated) data
88+
/// Base S3 key prefix for the ETL target (rehydrated) data.
89+
/// A random suffix is appended automatically to create a unique destination per run.
8990
#[arg(long, default_value = "")]
90-
pub(crate) etl_target_prefix: String,
91+
pub(crate) etl_target_base_prefix: String,
9192

9293
/// AWS region for the ETL S3 bucket
9394
#[arg(long)]

src/main.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,17 @@ async fn main() -> anyhow::Result<()> {
8282
endpoint: cli.common.etl_endpoint.clone(),
8383
};
8484

85+
let run_suffix = Uuid::new_v4().to_string();
86+
let target_prefix = if cli.common.etl_target_base_prefix.is_empty() {
87+
run_suffix.clone()
88+
} else {
89+
format!("{}/{run_suffix}", cli.common.etl_target_base_prefix)
90+
};
91+
tracing::info!(target_prefix = %target_prefix, "Generated unique ETL target prefix");
92+
8593
let target_config = TargetConfig {
8694
bucket: cli.common.etl_bucket.clone(),
87-
prefix: cli.common.etl_target_prefix.clone(),
95+
prefix: target_prefix,
8896
region: cli.common.etl_region.clone(),
8997
endpoint: cli.common.etl_endpoint.clone(),
9098
};

0 commit comments

Comments
 (0)