Skip to content

Commit 3fe7927

Browse files
dataflow-expression: Add missing datatypes for MySQL data types conversion
- Currently, for any non parametrized binary operations (column vs column) we always figure out some common datatype, the operands might be coerced to. Prior to the fix, the temporal datatypes, CHAR/VARCHAR, BOOL, and certain combinations of the numerical types were wrongly defaulted to type DOUBLE, what caused issues later on. The fix added support for the missing datatypes combinations. Note, the fix still does not handle DECIMAL vs DECIMAL correctly. Fixes: REA-4440 Change-Id: I1474a36536d9a70f01c2c3089095fa9848ef2437 Reviewed-on: https://gerrit.readyset.name/c/readyset/+/7584 Tested-by: Buildkite CI Reviewed-by: Marcelo Altmann <[email protected]>
1 parent d169608 commit 3fe7927

File tree

3 files changed

+180
-77
lines changed

3 files changed

+180
-77
lines changed

dataflow-expression/src/lower.rs

Lines changed: 134 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
use std::iter;
1+
use std::{cmp, iter};
22

33
use nom_sql::{
44
BinaryOperator as SqlBinaryOperator, Column, DialectDisplay, Expr as AstExpr, FunctionExpr,
55
InValue, Relation, UnaryOperator,
66
};
77
use readyset_data::dialect::SqlEngine;
8-
use readyset_data::{DfType, DfValue};
8+
use readyset_data::{Collation, DfType, DfValue};
99
use readyset_errors::{
1010
internal, internal_err, invalid_query, invalid_query_err, unsupported, ReadySetError,
1111
ReadySetResult,
@@ -459,72 +459,140 @@ impl BuiltinFunction {
459459
}
460460
}
461461

462+
fn mysql_temporal_types_cvt(left: &DfType, right: &DfType) -> Option<DfType> {
463+
let left_is_date_and_time = left.is_date_and_time();
464+
let right_is_date_and_time = right.is_date_and_time();
465+
466+
let left_is_temporal = left_is_date_and_time || left.is_any_temporal();
467+
let right_is_temporal = right_is_date_and_time || right.is_any_temporal();
468+
469+
if left_is_temporal && right_is_temporal {
470+
if left_is_date_and_time && right_is_date_and_time {
471+
if let DfType::DateTime { subsecond_digits } = left {
472+
Some(DfType::DateTime {
473+
subsecond_digits: cmp::max(*subsecond_digits, right.subsecond_digits()?),
474+
})
475+
} else if let DfType::DateTime { subsecond_digits } = right {
476+
Some(DfType::DateTime {
477+
subsecond_digits: cmp::max(*subsecond_digits, left.subsecond_digits()?),
478+
})
479+
} else if let DfType::TimestampTz { .. } = left
480+
&& let DfType::TimestampTz { .. } = right
481+
{
482+
Some(DfType::TimestampTz {
483+
subsecond_digits: cmp::max(left.subsecond_digits()?, right.subsecond_digits()?),
484+
})
485+
} else {
486+
Some(DfType::Timestamp {
487+
subsecond_digits: cmp::max(left.subsecond_digits()?, right.subsecond_digits()?),
488+
})
489+
}
490+
} else if let DfType::Time {
491+
subsecond_digits: left_subsec_digs,
492+
} = left
493+
&& let DfType::Time {
494+
subsecond_digits: right_subsec_digs,
495+
} = right
496+
{
497+
Some(DfType::Time {
498+
subsecond_digits: cmp::max(*left_subsec_digs, *right_subsec_digs),
499+
})
500+
} else if (matches!(left, DfType::Time { .. }) && right.is_any_int())
501+
|| (matches!(right, DfType::Time { .. }) && left.is_any_int())
502+
{
503+
Some(DfType::BigInt)
504+
} else if left_is_date_and_time {
505+
Some(left.clone())
506+
} else if right_is_date_and_time {
507+
Some(right.clone())
508+
} else {
509+
Some(DfType::Date)
510+
}
511+
} else if (left_is_date_and_time && right.is_any_int())
512+
|| (right_is_date_and_time && left.is_any_int())
513+
{
514+
Some(DfType::BigInt)
515+
} else if left_is_temporal && right.is_any_text() {
516+
Some(left.clone())
517+
} else if right_is_temporal && left.is_any_text() {
518+
Some(right.clone())
519+
} else {
520+
None
521+
}
522+
}
523+
524+
fn get_text_type_max_length(ty: &DfType) -> Option<u16> {
525+
match ty {
526+
DfType::Text(..) => Some(65535),
527+
DfType::VarChar(ln, _) => Some(*ln),
528+
DfType::Char(ln, _) => Some(*ln),
529+
_ => None,
530+
}
531+
}
532+
533+
fn mysql_text_type_cvt(left: &DfType, right: &DfType) -> Option<DfType> {
534+
if left.is_any_text() && right.is_any_text() {
535+
if matches!(left, DfType::Text(..)) || matches!(right, DfType::Text(..)) {
536+
Some(DfType::DEFAULT_TEXT)
537+
} else {
538+
let left_len = get_text_type_max_length(left)?;
539+
let right_len = get_text_type_max_length(right)?;
540+
if let DfType::Char(..) = left
541+
&& let DfType::Char(..) = right
542+
{
543+
Some(DfType::Char(
544+
cmp::max(left_len, right_len),
545+
Collation::default(),
546+
))
547+
} else {
548+
Some(DfType::VarChar(
549+
cmp::max(left_len, right_len),
550+
Collation::default(),
551+
))
552+
}
553+
}
554+
} else {
555+
None
556+
}
557+
}
558+
559+
fn mysql_numerical_type_cvt(left: &DfType, right: &DfType) -> Option<DfType> {
560+
if left.is_any_float() || right.is_any_float() {
561+
Some(DfType::Double)
562+
} else if left.is_any_int() && right.is_any_int() {
563+
if left.is_any_unsigned_int() && right.is_any_unsigned_int() {
564+
Some(DfType::UnsignedBigInt)
565+
} else {
566+
Some(DfType::BigInt)
567+
}
568+
} else {
569+
let left_is_decimal = left.is_numeric();
570+
let right_is_decimal = right.is_numeric();
571+
if left_is_decimal && right_is_decimal {
572+
// TODO: should return decimal, capable of storing max pres and scale
573+
Some(left.clone())
574+
} else if left_is_decimal && right.is_any_exact_number() {
575+
Some(left.clone())
576+
} else if left.is_any_exact_number() && right_is_decimal {
577+
Some(right.clone())
578+
} else {
579+
None
580+
}
581+
}
582+
}
583+
462584
/// <https://dev.mysql.com/doc/refman/8.0/en/type-conversion.html>
463585
fn mysql_type_conversion(left_ty: &DfType, right_ty: &DfType) -> DfType {
464-
match (left_ty, right_ty) {
465-
// If both arguments in a comparison operation are strings, they are compared as strings.
466-
(DfType::Text(_), DfType::Text(_)) => DfType::DEFAULT_TEXT,
467-
468-
// If both arguments are integers, they are compared as integers.
469-
(
470-
DfType::TinyInt
471-
| DfType::UnsignedTinyInt
472-
| DfType::SmallInt
473-
| DfType::UnsignedSmallInt
474-
| DfType::MediumInt
475-
| DfType::UnsignedMediumInt
476-
| DfType::Int
477-
| DfType::UnsignedInt
478-
| DfType::BigInt
479-
| DfType::UnsignedBigInt,
480-
DfType::TinyInt
481-
| DfType::UnsignedTinyInt
482-
| DfType::SmallInt
483-
| DfType::UnsignedSmallInt
484-
| DfType::MediumInt
485-
| DfType::UnsignedMediumInt
486-
| DfType::Int
487-
| DfType::UnsignedInt
488-
| DfType::BigInt
489-
| DfType::UnsignedBigInt,
490-
) => DfType::BigInt,
491-
492-
// > Hexadecimal values are treated as binary strings if not compared to a number.
493-
// TODO
494-
495-
// > If one of the arguments is a TIMESTAMP or DATETIME column and the other argument is a
496-
// > constant, the constant is converted to a timestamp before the comparison is performed.
497-
// > This is done to be more ODBC-friendly. This is not done for the arguments to IN()
498-
// TODO
499-
500-
// > If one of the arguments is a decimal value, comparison depends on the other argument.
501-
// > The arguments are compared as decimal values if the other argument is a decimal or
502-
// > integer value...
503-
(
504-
decimal @ DfType::Numeric { .. },
505-
DfType::Numeric { .. }
506-
| DfType::TinyInt
507-
| DfType::UnsignedTinyInt
508-
| DfType::SmallInt
509-
| DfType::UnsignedSmallInt
510-
| DfType::MediumInt
511-
| DfType::UnsignedMediumInt
512-
| DfType::Int
513-
| DfType::UnsignedInt
514-
| DfType::BigInt
515-
| DfType::UnsignedBigInt,
516-
) => decimal.clone(),
517-
518-
// > or as floating-point values if the other argument is a floating-point value.
519-
(DfType::Numeric { .. }, DfType::Float | DfType::Double)
520-
| (DfType::Float | DfType::Double, DfType::Numeric { .. }) => DfType::Double,
521-
(DfType::DateTime { subsecond_digits }, DfType::Text(_)) => DfType::DateTime {
522-
subsecond_digits: *subsecond_digits,
523-
},
524-
(DfType::Bool, DfType::Bool) => DfType::Bool,
525-
// > In all other cases, the arguments are compared as floating-point (double-precision)
526-
// > numbers.
527-
_ => DfType::Double,
586+
if left_ty.is_bool() && right_ty.is_bool() {
587+
DfType::Bool
588+
} else if let Some(ty) = mysql_text_type_cvt(left_ty, right_ty) {
589+
ty
590+
} else if let Some(ty) = mysql_temporal_types_cvt(left_ty, right_ty) {
591+
ty
592+
} else if let Some(ty) = mysql_numerical_type_cvt(left_ty, right_ty) {
593+
ty
594+
} else {
595+
DfType::Double
528596
}
529597
}
530598

dataflow-expression/tests/mysql_oracle.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ async fn example_exprs_eval_same_as_mysql() {
116116
"'7' + 5",
117117
"5 < '5.0'",
118118
"'5.0' > 5",
119+
"'2004-01-01 12:00:00' = convert_tz('2004-01-01 12:00:00','GMT','MET')",
120+
"'asdfadsf' = 'asdfadsf'",
121+
"'12345' > '00012345'",
122+
"timediff('2004-01-01 12:30:00', '2004-01-01 12:00:00') = '00:30:00'",
119123
"convert_tz('2004-01-01 12:00:00','GMT','MET')",
120124
"convert_tz('asdfadsf','asdf','MET')",
121125
"convert_tz('asdfadsf','asdf',null)",

readyset-data/src/type.rs

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -467,17 +467,7 @@ impl DfType {
467467
/// Returns `true` if this is any `*int` type.
468468
#[inline]
469469
pub fn is_any_int(&self) -> bool {
470-
matches!(
471-
*self,
472-
Self::TinyInt
473-
| Self::UnsignedTinyInt
474-
| Self::SmallInt
475-
| Self::UnsignedSmallInt
476-
| Self::Int
477-
| Self::UnsignedInt
478-
| Self::BigInt
479-
| Self::UnsignedBigInt
480-
)
470+
self.is_any_normal_int() || self.is_any_bigint()
481471
}
482472

483473
/// Returns `true` if this is any large integer type (bigint/int8).
@@ -502,6 +492,19 @@ impl DfType {
502492
)
503493
}
504494

495+
/// Returns `true` if this is any unsigned integer type.
496+
#[inline]
497+
pub fn is_any_unsigned_int(&self) -> bool {
498+
matches!(
499+
*self,
500+
Self::UnsignedTinyInt
501+
| Self::UnsignedSmallInt
502+
| Self::UnsignedInt
503+
| Self::UnsignedMediumInt
504+
| Self::UnsignedBigInt
505+
)
506+
}
507+
505508
/// Returns `true` if this is any IEEE 754 floating-point type.
506509
#[inline]
507510
pub fn is_any_float(&self) -> bool {
@@ -526,6 +529,34 @@ impl DfType {
526529
matches!(*self, Self::Numeric { .. })
527530
}
528531

532+
/// Returns `true` if this is DATE/TIMESTAMP/TIMESTAMPTZ/DATETIME type.
533+
#[inline]
534+
pub fn is_any_temporal(&self) -> bool {
535+
matches!(
536+
*self,
537+
Self::DateTime { .. }
538+
| Self::Date
539+
| Self::Timestamp { .. }
540+
| Self::TimestampTz { .. }
541+
| Self::Time { .. }
542+
)
543+
}
544+
545+
/// Returns `true` if this is any exact number type (INTEGER(s), DECIMAL).
546+
#[inline]
547+
pub fn is_any_exact_number(&self) -> bool {
548+
self.is_any_int() || self.is_numeric()
549+
}
550+
551+
/// Returns `true` if this is DATETIME/TIMESTAMP/TIMESTAMPTZ.
552+
#[inline]
553+
pub fn is_date_and_time(&self) -> bool {
554+
matches!(
555+
*self,
556+
Self::DateTime { .. } | Self::Timestamp { .. } | Self::TimestampTz { .. }
557+
)
558+
}
559+
529560
/// Returns `true` if this is any PostgreSQL array type.
530561
#[inline]
531562
pub fn is_array(&self) -> bool {

0 commit comments

Comments
 (0)