From 70290f1a01e78ed30fc87e56aeb1cc45283e1b8e Mon Sep 17 00:00:00 2001 From: TCeason Date: Wed, 28 May 2025 17:52:46 +0800 Subject: [PATCH 1/2] feat(query): enhance datetime functions --- Cargo.lock | 1 + src/query/ast/src/parser/expr.rs | 10 ++- src/query/ast/src/parser/token.rs | 3 + src/query/ast/tests/it/parser.rs | 2 + .../ast/tests/it/testdata/expr-error.txt | 4 +- src/query/ast/tests/it/testdata/expr.txt | 86 +++++++++++++++++++ .../ast/tests/it/testdata/stmt-error.txt | 4 +- src/query/expression/src/function.rs | 4 + src/query/expression/src/utils/date_helper.rs | 4 +- .../src/scalars/timestamp/Cargo.toml | 1 + .../src/scalars/timestamp/src/datetime.rs | 74 ++++++++++++++-- src/query/service/src/sessions/query_ctx.rs | 4 + src/query/settings/src/settings_default.rs | 14 +++ .../settings/src/settings_getter_setter.rs | 8 ++ .../sql/src/planner/semantic/type_check.rs | 5 +- .../functions/02_0012_function_datetimes.test | 30 +++++++ 16 files changed, 239 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e8a29b614e70..ec9948c6e4408 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5008,6 +5008,7 @@ dependencies = [ "dtparse", "jiff 0.2.13", "num-traits", + "regex", ] [[package]] diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index eb5aa48d946d7..965b15dbacd5c 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1214,6 +1214,13 @@ pub fn expr_element(i: Input) -> IResult> { |(_, _, unit, _, date, _)| ExprElement::DateTrunc { unit, date }, ); + let trunc = map( + rule! { + TRUNC ~ "(" ~ #subexpr(0) ~ "," ~ #interval_kind ~ ")" + }, + |(_, _, date, _, unit, _)| ExprElement::DateTrunc { unit, date }, + ); + let last_day = map( rule! { LAST_DAY ~ "(" ~ #subexpr(0) ~ ("," ~ #interval_kind)? ~ ")" @@ -1326,7 +1333,8 @@ pub fn expr_element(i: Input) -> IResult> { | #date_diff : "`DATE_DIFF(..., ..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW))`" | #date_sub : "`DATE_SUB(..., ..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW))`" | #date_between : "`DATE_BETWEEN((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW), ..., ...,)`" - | #date_trunc : "`DATE_TRUNC((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND), ...)`" + | #date_trunc : "`DATE_TRUNC((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK), ...)`" + | #trunc : "`TRUNC(..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK))`" | #last_day : "`LAST_DAY(..., (YEAR | QUARTER | MONTH | WEEK)))`" | #previous_day : "`PREVIOUS_DAY(..., (Sunday | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday))`" | #next_day : "`NEXT_DAY(..., (Sunday | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday))`" diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 0a24944e297f1..cf0c6efeafe7d 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -517,6 +517,8 @@ pub enum TokenKind { DATESUB, #[token("DATE_TRUNC", ignore(ascii_case))] DATE_TRUNC, + #[token("TRUNC", ignore(ascii_case))] + TRUNC, #[token("DATETIME", ignore(ascii_case))] DATETIME, #[token("DAY", ignore(ascii_case))] @@ -1672,6 +1674,7 @@ impl TokenKind { | TokenKind::DATE_SUB | TokenKind::DATE_BETWEEN | TokenKind::DATE_TRUNC + | TokenKind::TRUNC | TokenKind::LAST_DAY | TokenKind::PREVIOUS_DAY | TokenKind::NEXT_DAY diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index 9ead0aad35702..16cf3931855e5 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -1264,6 +1264,8 @@ fn test_expr() { r#"extract(year from d)"#, r#"date_part(year, d)"#, r#"datepart(year, d)"#, + r#"date_trunc(week, to_timestamp(1630812366))"#, + r#"trunc(to_timestamp(1630812366), week)"#, r#"DATEDIFF(SECOND, to_timestamp('2024-01-01 21:01:35.423179'), to_timestamp('2023-12-31 09:38:18.165575'))"#, r#"last_day(to_date('2024-10-22'), week)"#, r#"last_day(to_date('2024-10-22'))"#, diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index f5a862dd3c974..4a68196ef4ad0 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -52,7 +52,7 @@ error: --> SQL:1:10 | 1 | CAST(col1) - | ---- ^ unexpected `)`, expecting `AS`, `,`, `(`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, or 41 more ... + | ---- ^ unexpected `)`, expecting `AS`, `,`, `(`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, or 42 more ... | | | while parsing `CAST(... AS ...)` | while parsing expression @@ -81,7 +81,7 @@ error: 1 | $ abc + 3 | ^ | | - | unexpected `$`, expecting `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, `DATE_DIFF`, `DATEDIFF`, `DATESUB`, or 39 more ... + | unexpected `$`, expecting `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, `DATE_DIFF`, `DATEDIFF`, `DATESUB`, or 40 more ... | while parsing expression diff --git a/src/query/ast/tests/it/testdata/expr.txt b/src/query/ast/tests/it/testdata/expr.txt index 4978b44314538..cac8a7acd49bd 100644 --- a/src/query/ast/tests/it/testdata/expr.txt +++ b/src/query/ast/tests/it/testdata/expr.txt @@ -1842,6 +1842,92 @@ DatePart { } +---------- Input ---------- +date_trunc(week, to_timestamp(1630812366)) +---------- Output --------- +DATE_TRUNC(WEEK, to_timestamp(1630812366)) +---------- AST ------------ +DateTrunc { + span: Some( + 0..42, + ), + unit: Week, + date: FunctionCall { + span: Some( + 17..41, + ), + func: FunctionCall { + distinct: false, + name: Identifier { + span: Some( + 17..29, + ), + name: "to_timestamp", + quote: None, + ident_type: None, + }, + args: [ + Literal { + span: Some( + 30..40, + ), + value: UInt64( + 1630812366, + ), + }, + ], + params: [], + order_by: [], + window: None, + lambda: None, + }, + }, +} + + +---------- Input ---------- +trunc(to_timestamp(1630812366), week) +---------- Output --------- +DATE_TRUNC(WEEK, to_timestamp(1630812366)) +---------- AST ------------ +DateTrunc { + span: Some( + 0..37, + ), + unit: Week, + date: FunctionCall { + span: Some( + 6..30, + ), + func: FunctionCall { + distinct: false, + name: Identifier { + span: Some( + 6..18, + ), + name: "to_timestamp", + quote: None, + ident_type: None, + }, + args: [ + Literal { + span: Some( + 19..29, + ), + value: UInt64( + 1630812366, + ), + }, + ], + params: [], + order_by: [], + window: None, + lambda: None, + }, + }, +} + + ---------- Input ---------- DATEDIFF(SECOND, to_timestamp('2024-01-01 21:01:35.423179'), to_timestamp('2023-12-31 09:38:18.165575')) ---------- Output --------- diff --git a/src/query/ast/tests/it/testdata/stmt-error.txt b/src/query/ast/tests/it/testdata/stmt-error.txt index fde2d3396b889..c641e43391438 100644 --- a/src/query/ast/tests/it/testdata/stmt-error.txt +++ b/src/query/ast/tests/it/testdata/stmt-error.txt @@ -560,7 +560,7 @@ error: --> SQL:1:41 | 1 | SELECT * FROM t GROUP BY GROUPING SETS () - | ------ ^ unexpected `)`, expecting `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, `DATE_DIFF`, `DATEDIFF`, or 39 more ... + | ------ ^ unexpected `)`, expecting `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, `POSITION`, `IdentVariable`, `DATE_ADD`, `DATE_DIFF`, `DATEDIFF`, or 40 more ... | | | while parsing `SELECT ...` @@ -982,7 +982,7 @@ error: --> SQL:1:65 | 1 | CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p) - | ------ -- ---- ^ unexpected end of input, expecting `)`, `(`, `WITHIN`, `IGNORE`, `RESPECT`, `OVER`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, or 45 more ... + | ------ -- ---- ^ unexpected end of input, expecting `)`, `(`, `WITHIN`, `IGNORE`, `RESPECT`, `OVER`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, , , , , , `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, `#-`, , , , , , `CAST`, `TRY_CAST`, `::`, or 46 more ... | | | | | | | | | while parsing `( [, ...])` | | | while parsing expression diff --git a/src/query/expression/src/function.rs b/src/query/expression/src/function.rs index 58430d472a52b..0988582ca7a83 100755 --- a/src/query/expression/src/function.rs +++ b/src/query/expression/src/function.rs @@ -172,6 +172,8 @@ pub struct FunctionContext { pub parse_datetime_ignore_remainder: bool, pub enable_strict_datetime_parser: bool, pub random_function_seed: bool, + pub week_start: u8, + pub date_format_style: String, } impl Default for FunctionContext { @@ -192,6 +194,8 @@ impl Default for FunctionContext { parse_datetime_ignore_remainder: false, enable_strict_datetime_parser: true, random_function_seed: false, + week_start: 0, + date_format_style: "mysql".to_string(), } } } diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs index 9411bb3b09dbf..fbee42903ce6a 100644 --- a/src/query/expression/src/utils/date_helper.rs +++ b/src/query/expression/src/utils/date_helper.rs @@ -1324,9 +1324,9 @@ impl PGDateTimeFormatter { format_map.push(("YYYY", |dt| dt.strftime("%Y").to_string())); format_map.push(("YY", |dt| dt.strftime("%y").to_string())); - format_map.push(("MM", |dt| dt.strftime("%m").to_string())); - format_map.push(("MON", |dt| dt.strftime("%b").to_string())); format_map.push(("MMMM", |dt| dt.strftime("%B").to_string())); + format_map.push(("MON", |dt| dt.strftime("%b").to_string())); + format_map.push(("MM", |dt| dt.strftime("%m").to_string())); format_map.push(("DD", |dt| dt.strftime("%d").to_string())); format_map.push(("DY", |dt| dt.strftime("%a").to_string())); format_map.push(("HH24", |dt| dt.strftime("%H").to_string())); diff --git a/src/query/functions/src/scalars/timestamp/Cargo.toml b/src/query/functions/src/scalars/timestamp/Cargo.toml index 39eaeb14123b9..a015fcb45f598 100644 --- a/src/query/functions/src/scalars/timestamp/Cargo.toml +++ b/src/query/functions/src/scalars/timestamp/Cargo.toml @@ -11,3 +11,4 @@ databend-common-expression = { workspace = true } dtparse = { workspace = true } jiff = { workspace = true } num-traits = { workspace = true } +regex = { workspace = true } diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs index 9b492bd1a0ab4..1bdc748bf15ad 100644 --- a/src/query/functions/src/scalars/timestamp/src/datetime.rs +++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs @@ -351,11 +351,17 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { "to_date", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::>( - |date, format, output, ctx| { + |date_string, format, output, ctx| { if format.is_empty() { output.push_null(); } else { - match NaiveDate::parse_from_str(date, format) { + let format = if ctx.func_ctx.date_format_style == *"mysql" { + format.to_string() + } else { + pg_format_to_strftime(format) + }; + println!("format is {}", format.clone()); + match NaiveDate::parse_from_str(date_string, &format) { Ok(res) => { output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); } @@ -372,11 +378,16 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { "try_to_date", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::>( - |date, format, output, _| { + |date, format, output, ctx| { if format.is_empty() { output.push_null(); } else { - match NaiveDate::parse_from_str(date, format) { + let format = if ctx.func_ctx.date_format_style == *"mysql" { + format.to_string() + } else { + pg_format_to_strftime(format) + }; + match NaiveDate::parse_from_str(date, &format) { Ok(res) => { output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); } @@ -400,7 +411,13 @@ fn string_to_format_datetime( return Ok((0, true)); } - let (mut tm, offset) = BrokenDownTime::parse_prefix(format, timestamp) + let format = if ctx.func_ctx.date_format_style == *"mysql" { + format.to_string() + } else { + pg_format_to_strftime(format) + }; + + let (mut tm, offset) = BrokenDownTime::parse_prefix(&format, timestamp) .map_err(|err| Box::new(ErrorCode::BadArguments(format!("{err}"))))?; if !ctx.func_ctx.parse_datetime_ignore_remainder && offset != timestamp.len() { @@ -705,7 +722,12 @@ fn register_to_string(registry: &mut FunctionRegistry) { vectorize_with_builder_2_arg::>( |micros, format, output, ctx| { let ts = micros.to_timestamp(ctx.func_ctx.tz.clone()); - let format = replace_time_format(format); + let format = if ctx.func_ctx.date_format_style == *"mysql" { + format.to_string() + } else { + pg_format_to_strftime(format) + }; + let format = replace_time_format(&format); let mut buf = String::new(); let mut formatter = fmt::Formatter::new(&mut buf, FormattingOptions::new()); if Display::fmt(&ts.strftime(format.as_ref()), &mut formatter).is_err() { @@ -2387,3 +2409,43 @@ where T: ToNumber { }), ); } + +#[inline] +pub fn pg_format_to_strftime(pg_format_string: &str) -> String { + let mut result = pg_format_string.to_string(); + + let mut mappings = vec![ + ("YYYY", "%Y"), + ("YY", "%y"), + ("MMMM", "%B"), + ("MON", "%b"), + ("MM", "%m"), + ("DD", "%d"), + ("DY", "%a"), + ("HH24", "%H"), + ("HH12", "%I"), + ("AM", "%p"), + ("PM", "%p"), // AM/PM both map to %p + ("MI", "%M"), + ("SS", "%S"), + ("FF", "%f"), + ("UUUU", "%G"), + ("TZH", "%z"), + ("TZM", "%z"), + ]; + mappings.sort_by(|a, b| b.0.len().cmp(&a.0.len())); + + for (pg_key, strftime_code) in mappings { + let pattern = if pg_key == "MON" { + // should keep "month". Only "MON" as a single string escape it. + format!(r"(?i)\b{}\b", regex::escape(pg_key)) + } else { + format!(r"(?i){}", regex::escape(pg_key)) + }; + let reg = regex::Regex::new(&pattern).expect("Failed to compile regex for format key"); + + // Use replace_all to substitute all occurrences of the PG key with the strftime code. + result = reg.replace_all(&result, strftime_code).to_string(); + } + result +} diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 4776407fa0ea5..c210d218cd185 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -966,6 +966,8 @@ impl TableContext for QueryContext { let geometry_output_format = settings.get_geometry_output_format()?; let parse_datetime_ignore_remainder = settings.get_parse_datetime_ignore_remainder()?; let enable_strict_datetime_parser = settings.get_enable_strict_datetime_parser()?; + let week_start = settings.get_week_start()? as u8; + let date_format_style = settings.get_date_format_style()?; let query_config = &GlobalConfig::instance().query; let random_function_seed = settings.get_random_function_seed()?; @@ -986,6 +988,8 @@ impl TableContext for QueryContext { parse_datetime_ignore_remainder, enable_strict_datetime_parser, random_function_seed, + week_start, + date_format_style, }) } diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 5c8fcf7680712..2a7b446e5ef1b 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -150,6 +150,13 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::Numeric(1..=u64::MAX)), }), + ("week_start", DefaultSettingValue { + value: UserSettingValue::UInt64(1), + desc: "Specifies the first day of the week.(Used by week-related date functions)", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::Numeric(0..=1)), + }), ("parquet_max_block_size", DefaultSettingValue { value: UserSettingValue::UInt64(8192), desc: "Max block size for parquet reader", @@ -324,6 +331,13 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::String(vec!["PostgreSQL".into(), "MySQL".into(), "Experimental".into(), "Hive".into(), "Prql".into()])), }), + ("date_format_style", DefaultSettingValue { + value: UserSettingValue::String("MySQL".to_owned()), + desc: "Sets the date format style(Used by datetime functions). Available values include \"MySQL\", \"Oracle\".", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::String(vec!["Oracle".into(), "MySQL".into()])), + }), ("query_tag", DefaultSettingValue { value: UserSettingValue::String("".to_owned()), desc: "Sets the query tag for this session.", diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index 20b9b1f560bd8..2fd13a4af4acf 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -403,6 +403,10 @@ impl Settings { } } + pub fn get_date_format_style(&self) -> Result { + Ok(self.try_get_string("date_format_style")?.to_lowercase()) + } + pub fn get_collation(&self) -> Result<&str> { match self.try_get_string("collation")?.to_lowercase().as_str() { "utf8" => Ok("utf8"), @@ -762,6 +766,10 @@ impl Settings { self.try_get_u64("cost_factor_aggregate_per_row") } + pub fn get_week_start(&self) -> Result { + self.try_get_u64("week_start") + } + pub fn get_cost_factor_network_per_row(&self) -> Result { self.try_get_u64("cost_factor_network_per_row") } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index d360da7beac5f..8ac38f857f853 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -3268,12 +3268,13 @@ impl<'a> TypeChecker<'a> { ) } ASTIntervalKind::Week => { + let week_start = self.func_ctx.week_start; self.resolve_function( span, "to_start_of_week", vec![], &[date, &Expr::Literal { span: None, - value: Literal::UInt64(1) + value: Literal::UInt64(week_start as u64) }], ) } @@ -3305,7 +3306,7 @@ impl<'a> TypeChecker<'a> { &[date], ) } - _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, quarter, month, day, hour, minute, second]".to_string()).set_span(span)), + _ => Err(ErrorCode::SemanticError("Only these interval types are currently supported: [year, quarter, month, day, hour, minute, second, week]".to_string()).set_span(span)), } } diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test index d98e0ddb21b46..dce3887e21b21 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test @@ -282,6 +282,11 @@ select date_trunc('week', '2025-02-05 00:01:00'); ---- 2025-02-03 +query T +select trunc('2025-02-05 00:01:00', week); +---- +2025-02-03 + query FF SELECT MONTHS_BETWEEN('2019-03-15'::DATE, @@ -1420,6 +1425,31 @@ select * from t order by b statement ok drop table t +query T +settings (date_format_style='Oracle') select to_string('2022-02-02', '精彩的YYYY年,美丽的MMmonth,激动のDDd'); +---- +精彩的2022年,美丽的02month,激动の02d + +query T +settings (date_format_style='Oracle') select to_string('2024-04-05T00:00:00'::TIMESTAMP, 'mon dd HH12AM:MI:SS, yy'); +---- +Apr 05 12AM:00:00, 24 + +query T +settings (date_format_style='Oracle') select str_to_date('精彩的2022年,美丽的02month,激动の02d', '精彩的YYYY年,美丽的MM month,激动のDDd'); +---- +2022-02-02 + +query T +settings (date_format_style='Oracle') select to_date('精彩的2022年,美丽的02month,激动の02d', '精彩的YYYY年,美丽的MM month,激动のDDd'); +---- +2022-02-02 + +query T +settings (date_format_style='Oracle') select str_to_timestamp('2022年02月04日,03时58分59秒', 'YYYY年MM月DD日,HH24时MI分SS秒'); +---- +2022-02-04 03:58:59.000000 + query T select to_string('2022-02-02', '精彩的%Y年,美丽的%mmonth,激动の%dd'); ---- From 6273f80ad8ba5e48d2067c92be020525ce4ae961 Mon Sep 17 00:00:00 2001 From: TCeason Date: Thu, 29 May 2025 12:08:38 +0800 Subject: [PATCH 2/2] 1. to_char as alias of to_string 2. delete TZM --- Cargo.lock | 1 - src/query/expression/src/utils/date_helper.rs | 82 ++++++++++--------- src/query/functions/src/scalars/other.rs | 22 +---- .../src/scalars/timestamp/Cargo.toml | 1 - .../src/scalars/timestamp/src/datetime.rs | 43 +--------- .../it/scalars/testdata/function_list.txt | 27 +++--- .../functions/02_0012_function_datetimes.test | 6 +- 7 files changed, 65 insertions(+), 117 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec9948c6e4408..1e8a29b614e70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5008,7 +5008,6 @@ dependencies = [ "dtparse", "jiff 0.2.13", "num-traits", - "regex", ] [[package]] diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs index fbee42903ce6a..c1a6a6c0a8168 100644 --- a/src/query/expression/src/utils/date_helper.rs +++ b/src/query/expression/src/utils/date_helper.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::LazyLock; + use databend_common_exception::Result; use jiff::civil::date; use jiff::civil::datetime; @@ -1315,43 +1317,47 @@ pub fn previous_or_next_day(dt: &Zoned, target: Weekday, is_previous: bool) -> i datetime_to_date_inner_number(dt) + dir * days_diff } -pub struct PGDateTimeFormatter; - -impl PGDateTimeFormatter { - pub fn format(dt: Zoned, format_string: &str) -> String { - let mut result = format_string.to_string(); - let mut format_map: Vec<(&str, fn(&Zoned) -> String)> = Vec::new(); - - format_map.push(("YYYY", |dt| dt.strftime("%Y").to_string())); - format_map.push(("YY", |dt| dt.strftime("%y").to_string())); - format_map.push(("MMMM", |dt| dt.strftime("%B").to_string())); - format_map.push(("MON", |dt| dt.strftime("%b").to_string())); - format_map.push(("MM", |dt| dt.strftime("%m").to_string())); - format_map.push(("DD", |dt| dt.strftime("%d").to_string())); - format_map.push(("DY", |dt| dt.strftime("%a").to_string())); - format_map.push(("HH24", |dt| dt.strftime("%H").to_string())); - format_map.push(("HH12", |dt| dt.strftime("%I").to_string())); - format_map.push(("AM", |dt| dt.strftime("%p").to_string())); - format_map.push(("PM", |dt| dt.strftime("%p").to_string())); - format_map.push(("MI", |dt| dt.strftime("%M").to_string())); - format_map.push(("SS", |dt| dt.strftime("%S").to_string())); - format_map.push(("FF", |dt| dt.strftime("%f").to_string())); - format_map.push(("TZH", |dt| { - dt.strftime("%z").to_string().chars().take(3).collect() - })); - format_map.push(("TZM", |dt| { - dt.strftime("%z") - .to_string() - .chars() - .skip(3) - .take(2) - .collect() - })); - format_map.push(("UUUU", |dt| dt.strftime("%G").to_string())); - for (key, func) in &format_map { - let reg = regex::Regex::new(&format!(r"(?i){}", key)).unwrap(); - result = reg.replace_all(&result, func(&dt)).to_string(); - } - result +static PG_STRFTIME_MAPPINGS: LazyLock> = LazyLock::new(|| { + let mut mappings = vec![ + ("YYYY", "%Y"), + ("YY", "%y"), + ("MMMM", "%B"), + ("MON", "%b"), + ("MM", "%m"), + ("DD", "%d"), + ("DY", "%a"), + ("HH24", "%H"), + ("HH12", "%I"), + ("AM", "%p"), + ("PM", "%p"), + ("MI", "%M"), + ("SS", "%S"), + ("FF", "%f"), + ("UUUU", "%G"), + ("TZHTZM", "%z"), + ("TZH:TZM", "%z"), + ("TZH", "%:::z"), + ]; + // Sort by key length in descending order to ensure + // longer patterns are matched first and to avoid short patterns replacing part of long patterns prematurely. + mappings.sort_by(|a, b| b.0.len().cmp(&a.0.len())); + mappings +}); + +#[inline] +pub fn pg_format_to_strftime(pg_format_string: &str) -> String { + let mut result = pg_format_string.to_string(); + for (pg_key, strftime_code) in PG_STRFTIME_MAPPINGS.iter() { + let pattern = if *pg_key == "MON" { + // should keep "month". Only "MON" as a single string escape it. + format!(r"(?i)\b{}\b", regex::escape(pg_key)) + } else { + format!(r"(?i){}", regex::escape(pg_key)) + }; + let reg = regex::Regex::new(&pattern).expect("Failed to compile regex for format key"); + + // Use replace_all to substitute all occurrences of the PG key with the strftime code. + result = reg.replace_all(&result, *strftime_code).to_string(); } + result } diff --git a/src/query/functions/src/scalars/other.rs b/src/query/functions/src/scalars/other.rs index c62ec12646544..81fa21d7c5bac 100644 --- a/src/query/functions/src/scalars/other.rs +++ b/src/query/functions/src/scalars/other.rs @@ -21,8 +21,6 @@ use databend_common_base::base::convert_byte_size; use databend_common_base::base::convert_number_size; use databend_common_base::base::uuid::Uuid; use databend_common_base::base::OrderedFloat; -use databend_common_expression::date_helper::DateConverter; -use databend_common_expression::date_helper::PGDateTimeFormatter; use databend_common_expression::error_to_null; use databend_common_expression::types::boolean::BooleanDomain; use databend_common_expression::types::nullable::NullableColumn; @@ -397,8 +395,9 @@ fn register_grouping(registry: &mut FunctionRegistry) { } fn register_num_to_char(registry: &mut FunctionRegistry) { + registry.register_aliases("to_string", &["to_char"]); registry.register_passthrough_nullable_2_arg::( - "to_char", + "to_string", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |value, fmt, builder, ctx| { @@ -427,7 +426,7 @@ fn register_num_to_char(registry: &mut FunctionRegistry) { ); registry.register_passthrough_nullable_2_arg::( - "to_char", + "to_string", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |value, fmt, builder, ctx| { @@ -457,7 +456,7 @@ fn register_num_to_char(registry: &mut FunctionRegistry) { ); registry.register_passthrough_nullable_2_arg::( - "to_char", + "to_string", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |value, fmt, builder, ctx| { @@ -485,19 +484,6 @@ fn register_num_to_char(registry: &mut FunctionRegistry) { }, ), ); - - registry.register_passthrough_nullable_2_arg::( - "to_char", - |_, _, _| FunctionDomain::Full, - vectorize_with_builder_2_arg::( - |micros, format, output, ctx| { - let ts = micros.to_timestamp(ctx.func_ctx.tz.clone()); - let res = PGDateTimeFormatter::format(ts, format); - output.put_str(&res); - output.commit_row(); - }, - ), - ); } /// Compute `grouping` by `grouping_id` and `cols`. diff --git a/src/query/functions/src/scalars/timestamp/Cargo.toml b/src/query/functions/src/scalars/timestamp/Cargo.toml index a015fcb45f598..39eaeb14123b9 100644 --- a/src/query/functions/src/scalars/timestamp/Cargo.toml +++ b/src/query/functions/src/scalars/timestamp/Cargo.toml @@ -11,4 +11,3 @@ databend-common-expression = { workspace = true } dtparse = { workspace = true } jiff = { workspace = true } num-traits = { workspace = true } -regex = { workspace = true } diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs index 1bdc748bf15ad..137d418c27a31 100644 --- a/src/query/functions/src/scalars/timestamp/src/datetime.rs +++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs @@ -360,7 +360,6 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { } else { pg_format_to_strftime(format) }; - println!("format is {}", format.clone()); match NaiveDate::parse_from_str(date_string, &format) { Ok(res) => { output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); @@ -715,7 +714,7 @@ fn register_number_to_date(registry: &mut FunctionRegistry) { } fn register_to_string(registry: &mut FunctionRegistry) { - registry.register_aliases("to_string", &["date_format", "strftime"]); + registry.register_aliases("to_string", &["date_format", "strftime", "to_char"]); registry.register_combine_nullable_2_arg::( "to_string", |_, _, _| FunctionDomain::MayThrow, @@ -2409,43 +2408,3 @@ where T: ToNumber { }), ); } - -#[inline] -pub fn pg_format_to_strftime(pg_format_string: &str) -> String { - let mut result = pg_format_string.to_string(); - - let mut mappings = vec![ - ("YYYY", "%Y"), - ("YY", "%y"), - ("MMMM", "%B"), - ("MON", "%b"), - ("MM", "%m"), - ("DD", "%d"), - ("DY", "%a"), - ("HH24", "%H"), - ("HH12", "%I"), - ("AM", "%p"), - ("PM", "%p"), // AM/PM both map to %p - ("MI", "%M"), - ("SS", "%S"), - ("FF", "%f"), - ("UUUU", "%G"), - ("TZH", "%z"), - ("TZM", "%z"), - ]; - mappings.sort_by(|a, b| b.0.len().cmp(&a.0.len())); - - for (pg_key, strftime_code) in mappings { - let pattern = if pg_key == "MON" { - // should keep "month". Only "MON" as a single string escape it. - format!(r"(?i)\b{}\b", regex::escape(pg_key)) - } else { - format!(r"(?i){}", regex::escape(pg_key)) - }; - let reg = regex::Regex::new(&pattern).expect("Failed to compile regex for format key"); - - // Use replace_all to substitute all occurrences of the PG key with the strftime code. - result = reg.replace_all(&result, strftime_code).to_string(); - } - result -} diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 7da7edaa72281..5bf48cc295008 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -72,6 +72,7 @@ substr_utf8 -> substr substring -> substr substring_utf8 -> substr subtract -> minus +to_char -> to_string to_datetime -> to_timestamp to_text -> to_string to_varchar -> to_string @@ -3845,14 +3846,6 @@ Functions overloads: 23 to_boolean(Float64 NULL) :: Boolean NULL 0 to_centuries(Int64) :: Interval 1 to_centuries(Int64 NULL) :: Interval NULL -0 to_char(Int64, String) :: String -1 to_char(Int64 NULL, String NULL) :: String NULL -2 to_char(Float32, String) :: String -3 to_char(Float32 NULL, String NULL) :: String NULL -4 to_char(Float64, String) :: String -5 to_char(Float64 NULL, String NULL) :: String NULL -6 to_char(Timestamp, String) :: String -7 to_char(Timestamp NULL, String NULL) :: String NULL 0 to_date(Variant) :: Date NULL 1 to_date(Variant NULL) :: Date NULL 2 to_date(String, String) :: Date NULL @@ -4258,12 +4251,18 @@ Functions overloads: 30 to_string(Timestamp NULL) :: String NULL 31 to_string(Binary) :: String 32 to_string(Binary NULL) :: String NULL -33 to_string(Bitmap) :: String -34 to_string(Bitmap NULL) :: String NULL -35 to_string(Geometry) :: String -36 to_string(Geometry NULL) :: String NULL -37 to_string(Interval) :: String -38 to_string(Interval NULL) :: String NULL +33 to_string(Int64, String) :: String +34 to_string(Int64 NULL, String NULL) :: String NULL +35 to_string(Float32, String) :: String +36 to_string(Float32 NULL, String NULL) :: String NULL +37 to_string(Float64, String) :: String +38 to_string(Float64 NULL, String NULL) :: String NULL +39 to_string(Bitmap) :: String +40 to_string(Bitmap NULL) :: String NULL +41 to_string(Geometry) :: String +42 to_string(Geometry NULL) :: String NULL +43 to_string(Interval) :: String +44 to_string(Interval NULL) :: String NULL 0 to_timestamp(Variant) :: Timestamp NULL 1 to_timestamp(Variant NULL) :: Timestamp NULL 2 to_timestamp(String) :: Timestamp diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test index dce3887e21b21..dd087ba04c46c 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test @@ -1471,17 +1471,17 @@ select date_format('2022-02-04T03:58:59', '%x'), strftime('2022-02-04T03:58:59', 2022-02-04 03:58:59 2022-02-04 03:58:59 query TTT -select to_char('2024-04-05'::DATE, 'mon dd, yyyy'); +settings (date_format_style='Oracle') select to_char('2024-04-05'::DATE, 'mon dd, yyyy'); ---- Apr 05, 2024 query TTT -settings (timezone = 'Asia/Shanghai') select to_char('2024-04-05'::DATE, 'mon dd, yyyy TZH'); +settings (timezone = 'Asia/Shanghai', date_format_style='Oracle') select to_char('2024-04-05'::DATE, 'mon dd, yyyy TZH'); ---- Apr 05, 2024 +08 query TTT -select to_char('2024-04-05T00:00:00'::TIMESTAMP, 'mon dd HH12AM:MI:SS, yy'); +settings (date_format_style='Oracle') select to_char('2024-04-05T00:00:00'::TIMESTAMP, 'mon dd HH12AM:MI:SS, yy'); ---- Apr 05 12AM:00:00, 24