Skip to content

Commit cb67319

Browse files
committed
Fix regexp replacement error assertions
Signed-off-by: Allen Xu <allxu@nvidia.com>
1 parent bef5bad commit cb67319

1 file changed

Lines changed: 11 additions & 4 deletions

File tree

integration_tests/src/main/python/regexp_test.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import re
16+
1517
import pytest
1618

1719
from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, \
@@ -32,6 +34,10 @@
3234
def mk_str_gen(pattern):
3335
return StringGen(pattern).with_special_case('').with_special_pattern('.{0,10}')
3436

37+
def _regexp_replace_error_message(java_message):
38+
return re.compile(
39+
re.escape(java_message) + r'|\[INVALID_REGEXP_REPLACE\] Could not perform regexp_replace')
40+
3541
def test_split_re_negative_limit():
3642
data_gen = mk_str_gen('([bf]o{0,2}:){1,7}') \
3743
.with_special_case('boo:and:foo')
@@ -469,7 +475,7 @@ def test_regexp_replace_subbug2_trailing_backslash_throws_14742():
469475
lambda spark: spark.createDataFrame([("a",)], ["a"]).select(
470476
regexp_replace(col("a"), "a", "\\")).collect(),
471477
conf=_regexp_conf,
472-
error_message="character to be escaped is missing")
478+
error_message=_regexp_replace_error_message("character to be escaped is missing"))
473479

474480

475481
@allow_non_gpu('ProjectExec', 'RegExpReplace')
@@ -479,7 +485,7 @@ def test_regexp_replace_subbug3_dollar_non_digit_throws_14742():
479485
lambda spark: spark.createDataFrame([("a",)], ["a"]).select(
480486
regexp_replace(col("a"), "a", "$x")).collect(),
481487
conf=_regexp_conf,
482-
error_message="Illegal group reference")
488+
error_message=_regexp_replace_error_message("Illegal group reference"))
483489

484490

485491
@allow_non_gpu('ProjectExec', 'RegExpReplace')
@@ -489,7 +495,8 @@ def test_regexp_replace_subbug4_digit_leading_named_group_throws_14742():
489495
lambda spark: spark.createDataFrame([("a",)], ["a"]).select(
490496
regexp_replace(col("a"), "(a)", "${1}")).collect(),
491497
conf=_regexp_conf,
492-
error_message="capturing group name {1} starts with digit character")
498+
error_message=_regexp_replace_error_message(
499+
"capturing group name {1} starts with digit character"))
493500

494501

495502
@allow_non_gpu('ProjectExec', 'RegExpReplace')
@@ -499,7 +506,7 @@ def test_regexp_replace_subbug5_unknown_named_group_throws_14742():
499506
lambda spark: spark.createDataFrame([("a",)], ["a"]).select(
500507
regexp_replace(col("a"), "(a)", "${name}")).collect(),
501508
conf=_regexp_conf,
502-
error_message="No group with name")
509+
error_message=_regexp_replace_error_message("No group with name"))
503510

504511

505512
@pytest.mark.skipif(is_before_spark_320(), reason='regexp is synonym for RLike starting in Spark 3.2.0')

0 commit comments

Comments
 (0)