Skip to content

Commit b3586ef

Browse files
authored
Add support for like() function (#130)
* add like support * Update README.md * Update SplToCatalystTest.scala
1 parent 14789b6 commit b3586ef

File tree

3 files changed

+58
-1
lines changed

3 files changed

+58
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ There's basic support for the most used commands like `addtotals`, `bin`, `colle
1919
`streamstats`, `table`, `where`.
2020

2121
There's also basic support for functions like `auto()`, `cidr_match()`, `coalesce()`, `count()`,
22-
`ctime()`, `earliest()`, `if()`, `isnotnull()`, `latest()`, `len()`, `lower()`, `max()`,
22+
`ctime()`, `earliest()`, `if()`, `isnotnull()`, `latest()`, `len()`, `like()`, `lower()`, `max()`,
2323
`memk()`, `min()`, `mvappend()`, `mvcount()`, `mvfilter()`, `mvindex()`, `none()`,
2424
`null()`, `num()`, `replace()`, `rmcomma()`, `rmunit()`, `round()`, `strftime()`,
2525
`substr()`, `sum()`, `term()`, `values()`.

src/main/scala/com/databricks/labs/transpiler/spl/catalyst/SplToCatalyst.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,30 @@ object SplToCatalyst extends Logging {
173173
determineMax(ctx, call)
174174
case "len" =>
175175
Length(attrOrExpr(ctx, call.args.head))
176+
case "like" =>
177+
val field = attrOrExpr(ctx, call.args.head)
178+
val pattern = attrOrExpr(ctx, call.args(1))
179+
pattern match {
180+
case Literal(patternLiteral: UTF8String, StringType) =>
181+
val patternString = patternLiteral.toString
182+
// If the pattern is a simple LIKE (%foo%) pattern, we can convert it into a CONTAINS
183+
// expression.
184+
// For this to be safe, the pattern must start with %, end with % (unescaped), and contain
185+
// exactly 2 instances of the wildcard character %. Note that this approach is
186+
// conservative, as there may exist cases like %foo\%bar% that can be safely converted
187+
// (as the wildcard in the middle of the string is escaped).
188+
if (patternString.length > 2 &&
189+
patternString.charAt(0) == '%' &&
190+
patternString.charAt(patternString.length - 1) == '%' &&
191+
patternString.charAt(patternString.length - 2) != '\\' &&
192+
patternString.count(_ == '%') == 2) {
193+
Contains(field,
194+
Literal(patternString.substring(1, patternString.length - 1)))
195+
} else {
196+
Like(field, pattern, '\\')
197+
}
198+
case _ => Like(field, pattern, '\\')
199+
}
176200
case "substr" =>
177201
val str = attrOrExpr(ctx, call.args.head)
178202
val pos = expression(ctx, call.args(1))

src/test/scala/com/databricks/labs/transpiler/spl/catalyst/SplToCatalystTest.scala

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,39 @@ class SplToCatalystTest extends AnyFunSuite with PlanTestBase {
10961096
)
10971097
}
10981098

1099+
test("simple LIKE converted to CONTAINS") {
1100+
check(ast.SearchCommand(
1101+
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo%")))),
1102+
(_, tree) =>
1103+
Filter(
1104+
Contains(
1105+
UnresolvedAttribute("a"),
1106+
Literal.create("foo")),
1107+
tree)
1108+
)
1109+
}
1110+
1111+
test("complex LIKE not converted to CONTAINS") {
1112+
check(ast.SearchCommand(
1113+
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo%bar%")))),
1114+
(_, tree) =>
1115+
Filter(
1116+
Like(
1117+
UnresolvedAttribute("a"),
1118+
Literal.create("%foo%bar%"), '\\'),
1119+
tree)
1120+
)
1121+
check(ast.SearchCommand(
1122+
ast.Call("like", Seq(ast.Field("a"), ast.StrValue("%foo\\%")))),
1123+
(_, tree) =>
1124+
Filter(
1125+
Like(
1126+
UnresolvedAttribute("a"),
1127+
Literal.create("%foo\\%"), '\\'),
1128+
tree)
1129+
)
1130+
}
1131+
10991132
test("eventstats max(colA) AS maxA by colC") {
11001133
check(ast.EventStatsCommand(
11011134
allNum = false,

0 commit comments

Comments
 (0)