diff --git a/changelog.d/22926_enrichment_function_single_bounded_date_range_search.enhancement.md b/changelog.d/22926_enrichment_function_single_bounded_date_range_search.enhancement.md new file mode 100644 index 0000000000000..e20a27c160a15 --- /dev/null +++ b/changelog.d/22926_enrichment_function_single_bounded_date_range_search.enhancement.md @@ -0,0 +1,3 @@ +The [enrichment functions](https://vector.dev/docs/reference/vrl/functions/#enrichment-functions) now support bounded date range filtering using optional `from` and `to` parameters. There are no changes to the function signatures. + +authors: nzxwang diff --git a/lib/enrichment/src/lib.rs b/lib/enrichment/src/lib.rs index 52f2a547f50f1..91195aa77338b 100644 --- a/lib/enrichment/src/lib.rs +++ b/lib/enrichment/src/lib.rs @@ -26,6 +26,16 @@ pub enum Condition<'a> { from: chrono::DateTime, to: chrono::DateTime, }, + /// The date in the field is greater than or equal to `from`. + FromDate { + field: &'a str, + from: chrono::DateTime, + }, + /// The date in the field is less than or equal to `to`. + ToDate { + field: &'a str, + to: chrono::DateTime, + }, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] diff --git a/lib/enrichment/src/vrl_util.rs b/lib/enrichment/src/vrl_util.rs index 61e7044c53c99..df40824cb17c6 100644 --- a/lib/enrichment/src/vrl_util.rs +++ b/lib/enrichment/src/vrl_util.rs @@ -55,6 +55,22 @@ pub(crate) fn evaluate_condition(key: &str, value: Value) -> ExpressionResult Condition::FromDate { + field: key, + from: *map + .get("from") + .expect("should contain from") + .as_timestamp() + .ok_or("from in condition must be a timestamp")?, + }, + Value::Object(map) if map.contains_key("to") => Condition::ToDate { + field: key, + to: *map + .get("to") + .expect("should contain to") + .as_timestamp() + .ok_or("to in condition must be a timestamp")?, + }, _ => Condition::Equals { field: key, value }, }) } @@ -71,7 +87,12 @@ pub(crate) fn add_index( .filter_map(|(field, value)| match value { expression::Expr::Container(expression::Container { variant: expression::Variant::Object(map), - }) if map.contains_key("from") && map.contains_key("to") => None, + }) if (map.contains_key("from") && map.contains_key("to")) + || map.contains_key("from") + || map.contains_key("to") => + { + None + } _ => Some(field.as_ref()), }) .collect::>(); diff --git a/src/enrichment_tables/file.rs b/src/enrichment_tables/file.rs index 32ec9318b88e0..20abb4fa8a4d1 100644 --- a/src/enrichment_tables/file.rs +++ b/src/enrichment_tables/file.rs @@ -317,6 +317,20 @@ impl File { _ => false, }, }, + Condition::FromDate { field, from } => match self.column_index(field) { + None => false, + Some(idx) => match row[idx] { + Value::Timestamp(date) => from <= &date, + _ => false, + }, + }, + Condition::ToDate { field, to } => match self.column_index(field) { + None => false, + Some(idx) => match row[idx] { + Value::Timestamp(date) => &date <= to, + _ => false, + }, + }, }) } @@ -1030,7 +1044,7 @@ mod tests { } #[test] - fn finds_row_with_dates() { + fn finds_row_between_dates() { let mut file = File::new( Default::default(), FileData { @@ -1096,6 +1110,132 @@ mod tests { ); } + #[test] + fn finds_row_from_date() { + let mut file = File::new( + Default::default(), + FileData { + modified: SystemTime::now(), + data: vec![ + vec![ + "zip".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2015, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp"), + ), + ], + vec![ + "zip".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2016, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp"), + ), + ], + ], + headers: vec!["field1".to_string(), "field2".to_string()], + }, + ); + + let handle = file.add_index(Case::Sensitive, &["field1"]).unwrap(); + + let conditions = [ + Condition::Equals { + field: "field1", + value: "zip".into(), + }, + Condition::FromDate { + field: "field2", + from: chrono::Utc + .with_ymd_and_hms(2016, 1, 1, 0, 0, 0) + .single() + .expect("invalid timestamp"), + }, + ]; + + assert_eq!( + Ok(ObjectMap::from([ + ("field1".into(), Value::from("zip")), + ( + "field2".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2016, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp") + ) + ) + ])), + file.find_table_row(Case::Sensitive, &conditions, None, Some(handle)) + ); + } + + #[test] + fn finds_row_to_date() { + let mut file = File::new( + Default::default(), + FileData { + modified: SystemTime::now(), + data: vec![ + vec![ + "zip".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2015, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp"), + ), + ], + vec![ + "zip".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2016, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp"), + ), + ], + ], + headers: vec!["field1".to_string(), "field2".to_string()], + }, + ); + + let handle = file.add_index(Case::Sensitive, &["field1"]).unwrap(); + + let conditions = [ + Condition::Equals { + field: "field1", + value: "zip".into(), + }, + Condition::ToDate { + field: "field2", + to: chrono::Utc + .with_ymd_and_hms(2016, 1, 1, 0, 0, 0) + .single() + .expect("invalid timestamp"), + }, + ]; + + assert_eq!( + Ok(ObjectMap::from([ + ("field1".into(), Value::from("zip")), + ( + "field2".into(), + Value::Timestamp( + chrono::Utc + .with_ymd_and_hms(2015, 12, 7, 0, 0, 0) + .single() + .expect("invalid timestamp") + ) + ) + ])), + file.find_table_row(Case::Sensitive, &conditions, None, Some(handle)) + ); + } + #[test] fn doesnt_find_row() { let file = File::new( diff --git a/website/cue/reference/remap/functions.cue b/website/cue/reference/remap/functions.cue index d739d474fec60..18ed9457a673c 100644 --- a/website/cue/reference/remap/functions.cue +++ b/website/cue/reference/remap/functions.cue @@ -74,7 +74,7 @@ remap: { performance perspective. 2. **Date range search**. The given field must be greater than or equal to the `from` date - and less than or equal to the `to` date. A date range search involves + and/or less than or equal to the `to` date. A date range search involves sequentially scanning through the rows that have been located using any exact match criteria. This can be an expensive operation if there are many rows returned by any exact match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment