diff --git a/extensions/functions_string.yaml b/extensions/functions_string.yaml index 399fb3797..7899929c9 100644 --- a/extensions/functions_string.yaml +++ b/extensions/functions_string.yaml @@ -1558,6 +1558,60 @@ scalar_functions: dotall: values: [ DOTALL_DISABLED, DOTALL_ENABLED ] return: "List" + - + name: split_part + description: >- + Split a string using a delimiter and return the `field`-th substring (starting at 1). If `field` + is larger than the number of substrings, an empty string is returned. + impls: + - args: + - value: "varchar" + name: "input" + - value: "varchar" + name: "delimiter" + - value: i32 + name: "field" + return: "varchar" + - args: + - value: "string" + name: "input" + - value: "string" + name: "delimiter" + - value: i32 + name: "field" + return: "string" + - + name: chr + description: >- + Return a single character whose codepoint is the specified integer. Behaviour is undefined if + the `codepoint` does not correspond to a valid Unicode scalar value. + impls: + - args: + - value: i64 + name: "codepoint" + return: "string" + - + name: translate + description: >- + Replace each occurrence of characters from `from` with the corresponding character in `to`. + If `to` is shorter than `from`, extra characters are removed from the result. Similar to the Unix `tr` command. + impls: + - args: + - value: "varchar" + name: "input" + - value: "varchar" + name: "from" + - value: "varchar" + name: "to" + return: "varchar" + - args: + - value: "string" + name: "input" + - value: "string" + name: "from" + - value: "string" + name: "to" + return: "string" aggregate_functions: diff --git a/tests/baseline.json b/tests/baseline.json index bde8b2093..596e31ed3 100644 --- a/tests/baseline.json +++ b/tests/baseline.json @@ -1,16 +1,16 @@ { - "registry": { - "dependency_count": 13, - "extension_count": 13, - "function_count": 165, - "num_aggregate_functions": 29, - "num_scalar_functions": 158, - "num_window_functions": 11, - "num_function_overloads": 517 - }, - "coverage": { - "total_test_count": 1086, - "num_function_variants": 517, - "num_covered_function_variants": 229 - } + "registry": { + "dependency_count": 13, + "extension_count": 13, + "function_count": 165, + "num_aggregate_functions": 29, + "num_scalar_functions": 158, + "num_window_functions": 11, + "num_function_overloads": 517 + }, + "coverage": { + "total_test_count": 1164, + "num_function_variants": 532, + "num_covered_function_variants": 242 + } } diff --git a/tests/cases/string/chr.test b/tests/cases/string/chr.test new file mode 100644 index 000000000..d22bbc34f --- /dev/null +++ b/tests/cases/string/chr.test @@ -0,0 +1,12 @@ +### SUBSTRAIT_SCALAR_TEST: v1.0 +### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' + +# basic: Basic examples without any special cases +chr(65::i64) = 'A'::str +chr(97::i64) = 'a'::str +chr(48::i64) = '0'::str +chr(8364::i64) = '€'::str +chr(128512::i64) = '😀'::str + +# null_input: Examples with null as input +chr(null::i64) = null::str diff --git a/tests/cases/string/split_part.test b/tests/cases/string/split_part.test new file mode 100644 index 000000000..ddf4083bf --- /dev/null +++ b/tests/cases/string/split_part.test @@ -0,0 +1,31 @@ +### SUBSTRAIT_SCALAR_TEST: v1.0 +### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' + +# basic: Basic examples, no special cases +split_part('abc,def,ghi'::str, ','::str, 1::i32) = 'abc'::str +split_part('abc,def,ghi'::str, ','::str, 2::i32) = 'def'::str +split_part('abc,def,ghi'::str, ','::str, 3::i32) = 'ghi'::str +split_part('abc,def,ghi'::str, ','::str, 4::i32) = ''::str +split_part('a|b|c|d'::str, '|'::str, 1::i32) = 'a'::str +split_part('a|b|c|d'::str, '|'::str, 2::i32) = 'b'::str +split_part('a|b|c|d'::str, '|'::str, 3::i32) = 'c'::str +split_part('a|b|c|d'::str, '|'::str, 4::i32) = 'd'::str +split_part('a|b|c|d'::str, '|'::str, 5::i32) = ''::str +split_part('hello world test'::str, ' '::str, 1::i32) = 'hello'::str +split_part('hello world test'::str, ' '::str, 2::i32) = 'world'::str +split_part('hello world test'::str, ' '::str, 3::i32) = 'test'::str + +# basic_delimiters: Basic examples without any special cases, multi-delimiters +split_part('abc~@~def~@~ghi'::str, '~@~'::str, 1::i32) = 'abc'::str +split_part('abc~@~def~@~ghi'::str, '~@~'::str, 2::i32) = 'def'::str +split_part('abc~@~def~@~ghi'::str, '~@~'::str, 3::i32) = 'ghi'::str +split_part('abc~@~def~@~ghi'::str, '~@~'::str, 4::i32) = ''::str + +# missing_delimiter: Examples where delimiter not present +split_part('abc'::str, ','::str, 1::i32) = 'abc'::str +split_part('abc'::str, ','::str, 2::i32) = ''::str + +# null_input: Examples with null as input +split_part(null::str, ','::str, 1::i32) = null::str +split_part('abc,def'::str, null::str, 1::i32) = null::str +split_part('abc,def'::str, ','::str, null::i32) = null::str diff --git a/tests/cases/string/translate.test b/tests/cases/string/translate.test new file mode 100644 index 000000000..27390f0cf --- /dev/null +++ b/tests/cases/string/translate.test @@ -0,0 +1,18 @@ +### SUBSTRAIT_SCALAR_TEST: v1.0 +### SUBSTRAIT_INCLUDE: '/extensions/functions_string.yaml' + +# basic: Basic examples without any special cases +translate('banana'::str, 'an'::str, 'oy'::str) = 'boyoyo'::str +translate('Hello World!'::str, ' !'::str, 'x'::str) = 'HelloxWorld'::str + +# removal: Examples where replacement string shorter than source, resulting in removal +translate('hello'::str, 'aeiou'::str, ''::str) = 'hll'::str +translate('aabbcc'::str, 'abc'::str, 'a'::str) = 'aaaaaa'::str + +# null_input: Examples with null as input +translate(null::str, 'a'::str, 'b'::str) = null::str +translate('hello'::str, null::str, 'b'::str) = null::str +translate('hello'::str, 'l'::str, null::str) = null::str + +# unicode: Examples with unicode characters +translate('àéà'::str, 'à'::str, 'a'::str) = 'aéa'::str diff --git a/tests/coverage/test_coverage.py b/tests/coverage/test_coverage.py index 5aae0eb73..f209a4e85 100644 --- a/tests/coverage/test_coverage.py +++ b/tests/coverage/test_coverage.py @@ -426,6 +426,15 @@ def test_parse_errors_with_bad_aggregate_testcases(input_func_test, expected_mes "bitwise_and(-31766::dec<5, 0>, 900::dec<3, 0>) = 896::dec<5, 0>", "or(true::bool, true::bool) = true::bool", "between(5::i8, 0::i8, 127::i8) = true::bool", + "split_part('a,b,c'::str, ','::str, 2::i32) = 'b'::str", + "split_part('hello world'::varchar<20>, ' '::varchar<5>, 1::i32) = 'hello'::varchar<20>", + "split_part('one|two|three'::string, '|'::string, 3::i32) = 'three'::string", + "chr(65::i64) = 'A'::string", + "chr(8364::i64) = '€'::string", + "chr(128512::i64) = '😀'::string", + "translate('hello'::str, 'el'::str, 'XY'::str) = 'hXYYo'::str", + "translate('abcdef'::varchar<10>, 'ace'::varchar<5>, 'XYZ'::varchar<5>) = 'XbYdZf'::varchar<10>", + "translate('test'::string, 'ts'::string, 'XY'::string) = 'XeYX'::string", ], ) def test_parse_various_scalar_func_argument_types(input_func_test):