Skip to content

Commit 54031d2

Browse files
fix: Move TableFormat into spicebench (#61)
* fix: Move TableFormat into spicebench * chore: auto-fix cargo fmt + clippy --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 53f58f3 commit 54031d2

4 files changed

Lines changed: 31 additions & 54 deletions

File tree

crates/data-generation/src/config.rs

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
use clap::{Parser, Subcommand, ValueEnum};
17+
use clap::{Parser, Subcommand};
1818

1919
#[derive(Parser)]
2020
#[command(about = "Spice.ai data generation tool - generates Arrow data and writes to S3")]
@@ -63,14 +63,6 @@ pub struct CommonArgs {
6363
#[arg(long, default_value = "")]
6464
pub prefix: String,
6565

66-
/// Logical table format propagated to system adapters
67-
#[arg(long, value_enum, default_value = "parquet")]
68-
pub table_format: TableFormat,
69-
70-
/// Executor instance type label propagated to adapters for dashboarding
71-
#[arg(long, default_value = "unknown")]
72-
pub executor_instance_type: String,
73-
7466
/// AWS region
7567
#[arg(long)]
7668
pub region: Option<String>,
@@ -93,31 +85,10 @@ pub struct DatasetConfig {
9385
pub struct TargetConfig {
9486
pub bucket: String,
9587
pub prefix: String,
96-
pub table_format: TableFormat,
97-
pub executor_instance_type: String,
9888
pub region: Option<String>,
9989
pub endpoint: Option<String>,
10090
}
10191

102-
#[derive(Clone, Debug, ValueEnum)]
103-
#[value(rename_all = "lower")]
104-
pub enum TableFormat {
105-
Iceberg,
106-
Parquet,
107-
Delta,
108-
}
109-
110-
impl std::fmt::Display for TableFormat {
111-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112-
let value = match self {
113-
Self::Iceberg => "iceberg",
114-
Self::Parquet => "parquet",
115-
Self::Delta => "delta",
116-
};
117-
write!(f, "{value}")
118-
}
119-
}
120-
12192
pub struct IngestorConfig {
12293
pub max_concurrency: usize,
12394
}
@@ -135,8 +106,6 @@ impl CommonArgs {
135106
TargetConfig {
136107
bucket: self.bucket.clone(),
137108
prefix: self.prefix.clone(),
138-
table_format: self.table_format.clone(),
139-
executor_instance_type: self.executor_instance_type.clone(),
140109
region: self.region.clone(),
141110
endpoint: self.endpoint.clone(),
142111
}

crates/etl/src/main.rs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License.
1717
use std::sync::Arc;
1818

1919
use clap::Parser;
20-
use data_generation::config::{DatasetConfig, TableFormat, TargetConfig};
20+
use data_generation::config::{DatasetConfig, TargetConfig};
2121
use data_generation::storage::s3::S3Storage;
2222
use etl::{DatasetSource, ETLPipeline, PipelineState, StopReason};
2323
use tracing_subscriber::EnvFilter;
@@ -50,14 +50,6 @@ struct Cli {
5050
#[arg(long, default_value = "")]
5151
target_base_prefix: String,
5252

53-
/// Logical table format propagated to system adapters
54-
#[arg(long, value_enum, default_value = "parquet")]
55-
table_format: TableFormat,
56-
57-
/// Executor instance type label propagated to adapters for dashboarding
58-
#[arg(long, default_value = "unknown")]
59-
executor_instance_type: String,
60-
6153
/// AWS region
6254
#[arg(long)]
6355
region: Option<String>,
@@ -90,8 +82,6 @@ impl Cli {
9082
TargetConfig {
9183
bucket: self.bucket.clone(),
9284
prefix: self.source_prefix.clone(),
93-
table_format: self.table_format.clone(),
94-
executor_instance_type: self.executor_instance_type.clone(),
9585
region: self.region.clone(),
9686
endpoint: self.endpoint.clone(),
9787
}
@@ -107,8 +97,6 @@ impl Cli {
10797
TargetConfig {
10898
bucket: self.bucket.clone(),
10999
prefix,
110-
table_format: self.table_format.clone(),
111-
executor_instance_type: self.executor_instance_type.clone(),
112100
region: self.region.clone(),
113101
endpoint: self.endpoint.clone(),
114102
}

src/args/mod.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,29 @@ limitations under the License.
1515
*/
1616

1717
use clap::{ArgAction, Parser, ValueEnum};
18-
use data_generation::config::TableFormat;
1918

2019
mod dataset;
2120
use crate::scenario::Scenario;
2221

22+
#[derive(Clone, Debug, ValueEnum)]
23+
#[value(rename_all = "lower")]
24+
pub enum TableFormat {
25+
Iceberg,
26+
Parquet,
27+
Delta,
28+
}
29+
30+
impl std::fmt::Display for TableFormat {
31+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32+
let value = match self {
33+
Self::Iceberg => "iceberg",
34+
Self::Parquet => "parquet",
35+
Self::Delta => "delta",
36+
};
37+
write!(f, "{value}")
38+
}
39+
}
40+
2341
/// Arguments Common to all [`TestCommands`].
2442
#[derive(Parser, Debug, Clone)]
2543
pub struct CommonArgs {

src/main.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ async fn main() -> anyhow::Result<()> {
7979
prefix: cli.common.etl_source_prefix.clone(),
8080
region: cli.common.etl_region.clone(),
8181
endpoint: cli.common.etl_endpoint.clone(),
82-
table_format: cli.common.table_format.clone(),
83-
executor_instance_type: cli.common.executor_instance_type.clone(),
8482
};
8583

8684
let run_suffix = Uuid::new_v4().to_string();
@@ -96,8 +94,6 @@ async fn main() -> anyhow::Result<()> {
9694
prefix: target_prefix,
9795
region: cli.common.etl_region.clone(),
9896
endpoint: cli.common.etl_endpoint.clone(),
99-
table_format: cli.common.table_format.clone(),
100-
executor_instance_type: cli.common.executor_instance_type.clone(),
10197
};
10298

10399
let source = Arc::new(S3Storage::new(&source_config)?);
@@ -121,10 +117,16 @@ async fn main() -> anyhow::Result<()> {
121117
// --- Setup the system adapter (target already has initial data) ---
122118
let run_id = Uuid::new_v4();
123119
let datasets = pipeline.setup_request_datasets();
124-
let setup_metadata = std::collections::HashMap::from([(
125-
"executor_instance_type".to_string(),
126-
serde_json::Value::String(cli.common.executor_instance_type.clone()),
127-
)]);
120+
let setup_metadata = std::collections::HashMap::from([
121+
(
122+
"executor_instance_type".to_string(),
123+
serde_json::Value::String(cli.common.executor_instance_type.clone()),
124+
),
125+
(
126+
"table_format".to_string(),
127+
serde_json::Value::String(cli.common.table_format.to_string()),
128+
),
129+
]);
128130

129131
if let Err(e) = system_adapter_client
130132
.setup(run_id, datasets, setup_metadata)

0 commit comments

Comments
 (0)