Skip to content

Commit 887a63f

Browse files
authored
refactor: replace anyhow with custom StaticSchemaError (#1208)
fixes #1197
1 parent e732821 commit 887a63f

File tree

1 file changed

+72
-15
lines changed

1 file changed

+72
-15
lines changed

src/static_schema.rs

+72-15
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@
1818

1919
use crate::event::DEFAULT_TIMESTAMP_KEY;
2020
use crate::utils::arrow::get_field;
21-
use anyhow::{anyhow, Error as AnyError};
2221
use serde::{Deserialize, Serialize};
2322
use std::str;
2423

2524
use arrow_schema::{DataType, Field, Schema, TimeUnit};
26-
use std::{collections::HashMap, sync::Arc};
25+
use std::{
26+
collections::{HashMap, HashSet},
27+
sync::Arc,
28+
};
29+
2730
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
2831
pub struct StaticSchema {
2932
fields: Vec<SchemaFields>,
@@ -54,13 +57,12 @@ pub struct Fields {
5457
}
5558

5659
#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
57-
5860
pub struct Metadata {}
5961
pub fn convert_static_schema_to_arrow_schema(
6062
static_schema: StaticSchema,
6163
time_partition: &str,
6264
custom_partition: Option<&String>,
63-
) -> Result<Arc<Schema>, AnyError> {
65+
) -> Result<Arc<Schema>, StaticSchemaError> {
6466
let mut parsed_schema = ParsedSchema {
6567
fields: Vec::new(),
6668
metadata: HashMap::new(),
@@ -83,11 +85,17 @@ pub fn convert_static_schema_to_arrow_schema(
8385

8486
for partition in &custom_partition_list {
8587
if !custom_partition_exists.contains_key(*partition) {
86-
return Err(anyhow!("custom partition field {partition} does not exist in the schema for the static schema logstream"));
88+
return Err(StaticSchemaError::MissingCustomPartition(
89+
partition.to_string(),
90+
));
8791
}
8892
}
8993
}
94+
95+
let mut existing_field_names: HashSet<String> = HashSet::new();
96+
9097
for mut field in static_schema.fields {
98+
validate_field_names(&field.name, &mut existing_field_names)?;
9199
if !time_partition.is_empty() && field.name == time_partition {
92100
time_partition_exists = true;
93101
field.data_type = "datetime".to_string();
@@ -127,29 +135,24 @@ pub fn convert_static_schema_to_arrow_schema(
127135
parsed_schema.fields.push(parsed_field);
128136
}
129137
if !time_partition.is_empty() && !time_partition_exists {
130-
return Err(anyhow! {
131-
format!(
132-
"time partition field {time_partition} does not exist in the schema for the static schema logstream"
133-
),
134-
});
138+
return Err(StaticSchemaError::MissingTimePartition(
139+
time_partition.to_string(),
140+
));
135141
}
136142
add_parseable_fields_to_static_schema(parsed_schema)
137143
}
138144

139145
fn add_parseable_fields_to_static_schema(
140146
parsed_schema: ParsedSchema,
141-
) -> Result<Arc<Schema>, AnyError> {
147+
) -> Result<Arc<Schema>, StaticSchemaError> {
142148
let mut schema: Vec<Arc<Field>> = Vec::new();
143149
for field in parsed_schema.fields.iter() {
144150
let field = Field::new(field.name.clone(), field.data_type.clone(), field.nullable);
145151
schema.push(Arc::new(field));
146152
}
147153

148154
if get_field(&schema, DEFAULT_TIMESTAMP_KEY).is_some() {
149-
return Err(anyhow!(
150-
"field {} is a reserved field",
151-
DEFAULT_TIMESTAMP_KEY
152-
));
155+
return Err(StaticSchemaError::ReservedKey(DEFAULT_TIMESTAMP_KEY));
153156
};
154157

155158
// add the p_timestamp field to the event schema to the 0th index
@@ -176,3 +179,57 @@ fn default_dict_id() -> i64 {
176179
fn default_dict_is_ordered() -> bool {
177180
false
178181
}
182+
183+
fn validate_field_names(
184+
field_name: &str,
185+
existing_fields: &mut HashSet<String>,
186+
) -> Result<(), StaticSchemaError> {
187+
if field_name.is_empty() {
188+
return Err(StaticSchemaError::EmptyFieldName);
189+
}
190+
191+
if !existing_fields.insert(field_name.to_string()) {
192+
return Err(StaticSchemaError::DuplicateField(field_name.to_string()));
193+
}
194+
195+
Ok(())
196+
}
197+
198+
#[derive(Debug, thiserror::Error)]
199+
pub enum StaticSchemaError {
200+
#[error(
201+
"custom partition field {0} does not exist in the schema for the static schema logstream"
202+
)]
203+
MissingCustomPartition(String),
204+
205+
#[error(
206+
"time partition field {0} does not exist in the schema for the static schema logstream"
207+
)]
208+
MissingTimePartition(String),
209+
210+
#[error("field {0:?} is a reserved field")]
211+
ReservedKey(&'static str),
212+
213+
#[error("field name cannot be empty")]
214+
EmptyFieldName,
215+
216+
#[error("duplicate field name: {0}")]
217+
DuplicateField(String),
218+
}
219+
220+
#[cfg(test)]
221+
mod tests {
222+
use super::*;
223+
#[test]
224+
fn empty_field_names() {
225+
let mut existing_field_names: HashSet<String> = HashSet::new();
226+
assert!(validate_field_names("", &mut existing_field_names).is_err());
227+
}
228+
229+
#[test]
230+
fn duplicate_field_names() {
231+
let mut existing_field_names: HashSet<String> = HashSet::new();
232+
let _ = validate_field_names("test_field", &mut existing_field_names);
233+
assert!(validate_field_names("test_field", &mut existing_field_names).is_err());
234+
}
235+
}

0 commit comments

Comments
 (0)