Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

branch-3.0: [fix](Nereids) string constant folding process regex delim by mistake #48783 #48822

Open
wants to merge 1 commit into
base: branch-3.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
import org.apache.doris.nereids.types.ArrayType;

import com.google.common.collect.ImmutableList;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
Expand All @@ -50,6 +53,7 @@
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
* executable functions:
Expand Down Expand Up @@ -658,14 +662,18 @@ public static Expression space(IntegerLiteral first) {
}

/**
* Executable arithmetic functions split_by_char
* Executable arithmetic functions split_by_string
*/
@ExecFunction(name = "split_by_char")
public static Expression splitByChar(StringLikeLiteral first, StringLikeLiteral second) {
String[] result = first.getValue().split(second.getValue(), -1);
@ExecFunction(name = "split_by_string")
public static Expression splitByString(StringLikeLiteral first, StringLikeLiteral second) {
if (first.getValue().isEmpty()) {
return new ArrayLiteral(ImmutableList.of(), ArrayType.of(first.getDataType()));
}
int limit = second.getValue().isEmpty() ? 0 : -1;
String[] result = first.getValue().split(Pattern.quote(second.getValue()), limit);
List<Literal> items = new ArrayList<>();
for (int i = 1; i < result.length; i++) {
items.add((Literal) castStringLikeLiteral(first, result[i]));
for (String s : result) {
items.add((Literal) castStringLikeLiteral(first, s));
}
return new ArrayLiteral(items);
}
Expand All @@ -675,35 +683,34 @@ public static Expression splitByChar(StringLikeLiteral first, StringLikeLiteral
*/
@ExecFunction(name = "split_part")
public static Expression splitPart(StringLikeLiteral first, StringLikeLiteral chr, IntegerLiteral number) {
if (number.getValue() == 0) {
return new NullLiteral(first.getDataType());
}
if (chr.getValue().isEmpty()) {
return castStringLikeLiteral(first, "");
}
if (first.getValue().isEmpty()) {
return new NullLiteral(first.getDataType());
}
if (first.getValue().equals(chr.getValue())) {
if (Math.abs(number.getValue()) == 1 || Math.abs(number.getValue()) == 2) {
return castStringLikeLiteral(first, "");
} else {
return new NullLiteral(first.getDataType());
}
}
String separator = chr.getValue();
String[] parts = null;
String[] parts;
if (number.getValue() < 0) {
StringBuilder sb = new StringBuilder(first.getValue());
StringBuilder seperatorBuilder = new StringBuilder(separator);
separator = seperatorBuilder.reverse().toString();
if (".$|()[{^?*+\\".contains(separator) || separator.startsWith("\\")) {
separator = "\\" + separator;
}
parts = sb.reverse().toString().split(separator, -1);
StringBuilder separatorBuilder = new StringBuilder(separator);
separator = separatorBuilder.reverse().toString();
parts = sb.reverse().toString().split(Pattern.quote(separator), -1);
} else {
if (".$|()[{^?*+\\".contains(separator) || separator.startsWith("\\")) {
separator = "\\" + separator;
}
parts = first.getValue().split(separator, -1);
parts = first.getValue().split(Pattern.quote(separator), -1);
}

if (parts.length < Math.abs(number.getValue()) || number.getValue() == 0) {
if (parts.length == Math.abs(number.getValue())) {
if (number.getValue() < 0 && first.getValue().startsWith(chr.getValue())
|| number.getValue() > 0 && first.getValue().endsWith(chr.getValue())) {
return castStringLikeLiteral(first, "");
}
}
if (parts.length < Math.abs(number.getValue())) {
return new NullLiteral(first.getDataType());
} else if (number.getValue() < 0) {
StringBuilder result = new StringBuilder(parts[Math.abs(number.getValue()) - 1]);
Expand All @@ -721,7 +728,7 @@ public static Expression substringIndex(StringLikeLiteral first, StringLikeLiter
if (chr.getValue().isEmpty()) {
return chr;
}
String[] parts = first.getValue().split(chr.getValue(), -1);
String[] parts = first.getValue().split(Pattern.quote(chr.getValue()), -1);
if (Math.abs(number.getValue()) >= parts.length) {
return first;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,18 +461,80 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select split_by_string(cast('abc' as string), cast('::' as string))")
testFoldConst("select split_by_string('上海天津北京杭州', '北')")
testFoldConst("select split_by_string('abccccc', 'c')")
testFoldConst("select split_by_string('abcde','')")
testFoldConst("select split_by_string('你a好b世c界','')")
testFoldConst("select split_by_string('12553','')")
testFoldConst("select split_by_string('','')")
testFoldConst("select split_by_string('',',')")
testFoldConst("select split_by_string('','a')")
testFoldConst("select split_by_string('','abc')")
testFoldConst("select split_by_string('abc','')")
testFoldConst("select split_by_string('a1b1c1d','1')")
testFoldConst("select split_by_string(',,,',',')")
testFoldConst("select split_by_string('a,b,c,abcde',',')")
testFoldConst("select split_by_string(',,a,b,c,',',')")
testFoldConst("select split_by_string('null',',')")
testFoldConst("select split_by_string('1,,2,3,,4,5,,abcde', ',,')")
testFoldConst("select split_by_string('abcde','')")
testFoldConst("select split_by_string('1,,2,3,,,,,,4,5, abcde', ',,')")
testFoldConst("select split_by_string(',,,,',',,')")
testFoldConst("select split_by_string('a,,b,,c',',,')")
testFoldConst("select split_by_string('a,,b,,c,,',',,')")
testFoldConst("select split_by_string(',,a,,b,,c,,',',,')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++')")
testFoldConst("SELECT split_by_string('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\')")

// split_part
testFoldConst("select split_part('a,b,c', ',', -1)")
testFoldConst("select split_part('abc##123###xyz', '##', 0)")
testFoldConst("select split_part('a,b,c', '', -2)")
testFoldConst("select split_part('a,b,c', '', -1)")
testFoldConst("select split_part('a,b,c', '', 0)")
testFoldConst("select split_part('a,b,c', '', 1)")
testFoldConst("select split_part('a,b,c', '', 2)")
testFoldConst("select split_part('', '', -2)")
testFoldConst("select split_part('', '', -1)")
testFoldConst("select split_part('', '', 0)")
testFoldConst("select split_part('', '', 1)")
testFoldConst("select split_part('', '', 2)")
testFoldConst("select split_part('', 'abc', -2)")
testFoldConst("select split_part('', 'abc', -1)")
testFoldConst("select split_part('', 'abc', 0)")
testFoldConst("select split_part('', 'abc', 1)")
testFoldConst("select split_part('', 'abc', 2)")
testFoldConst("select split_part('abc##123###xyz', '##', -10)")
testFoldConst("select split_part('abc##123###xyz', '##', -4)")
testFoldConst("select split_part('abc##123###xyz', '##', -3)")
testFoldConst("select split_part('abc##123###xyz', '##', -2)")
testFoldConst("select split_part('abc##123###xyz', '##', -1)")
testFoldConst("select split_part('abc##123###xyz', '##', 0)")
testFoldConst("select split_part('abc##123###xyz', '##', 1)")
testFoldConst("select split_part('abc##123###xyz', '##', -2)")
testFoldConst("select split_part('abc##123###xyz', '##', 2)")
testFoldConst("select split_part('abc##123###xyz', '##', 3)")
testFoldConst("select split_part('abc##123###xyz', '##', -4)")
testFoldConst("select split_part('abc##123###xyz', '##', 5)")
testFoldConst("select split_part('abc##123###xyz', '##', 4)")
testFoldConst("select split_part('abc##123###xyz', '##', 10)")
testFoldConst("select split_part('a,b,c', ',', -100)")
testFoldConst("select split_part('a,b,c', ',', -5)")
testFoldConst("select split_part('a,b,c', ',', -4)")
testFoldConst("select split_part('a,b,c', ',', -3)")
testFoldConst("select split_part('a,b,c', ',', -2)")
testFoldConst("select split_part('a,b,c', ',', -1)")
testFoldConst("select split_part('a,b,c', ',', -0)")
testFoldConst("select split_part('a,b,c', ',', 0)")
testFoldConst("select split_part('a,b,c', ',', 1)")
testFoldConst("select split_part('a,b,c', ',', 2)")
testFoldConst("select split_part('a,b,c', ',', 3)")
testFoldConst("select split_part('a,b,c', ',', 4)")
testFoldConst("select split_part('a,b,c', ',', 5)")
testFoldConst("select split_part('a,b,c', ',', 100)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as string), -1)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as string), 2)")
testFoldConst("select split_part(cast('a,b,c' as string), cast(',' as string), 5)")
Expand All @@ -485,6 +547,7 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("select split_part('hello world', ' ', -2)")
testFoldConst("select split_part('hello world', ' ', 2)")
testFoldConst("select split_part('hello world', ' ', -3)")
testFoldConst("select split_part('hello world', ' ', -3)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -5)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -4)")
testFoldConst("SELECT split_part('哈哈哈AAA','A', -3)")
Expand All @@ -505,7 +568,31 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("SELECT split_part('哈哈哈AA+','A', 2)")
testFoldConst("SELECT split_part('哈哈哈AA+','A', 3)")
testFoldConst("SELECT split_part('哈哈哈AA+','A', 4)")

testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
testFoldConst("SELECT split_part('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")

// starts_with
testFoldConst("select starts_with('hello world','hello')")
testFoldConst("select starts_with('hello world',null)")
Expand Down Expand Up @@ -650,6 +737,30 @@ suite("fold_constant_string_arithmatic") {
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 2)")
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 3)")
testFoldConst("SELECT substring_index('哈哈哈AA+','A', 4)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 1)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','..', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\$\$', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','||', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','((', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','))', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','[[', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','{{', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','^^', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','??', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','**', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','++', 2)")
testFoldConst("SELECT substring_index('a..b\$\$c||d((e))f[[g{{h^^i??j**k++l\\\\m','\\\\', 2)")

// trim
testFoldConst("select trim('11111', 11)")
Expand Down