1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15+ import re
16+
1517import pytest
1618
1719from asserts import assert_gpu_and_cpu_are_equal_collect , assert_gpu_fallback_collect , \
3234def mk_str_gen (pattern ):
3335 return StringGen (pattern ).with_special_case ('' ).with_special_pattern ('.{0,10}' )
3436
37+ def _regexp_replace_error_message (java_message ):
38+ return re .compile (
39+ re .escape (java_message ) + r'|\[INVALID_REGEXP_REPLACE\] Could not perform regexp_replace' )
40+
3541def test_split_re_negative_limit ():
3642 data_gen = mk_str_gen ('([bf]o{0,2}:){1,7}' ) \
3743 .with_special_case ('boo:and:foo' )
@@ -469,7 +475,7 @@ def test_regexp_replace_subbug2_trailing_backslash_throws_14742():
469475 lambda spark : spark .createDataFrame ([("a" ,)], ["a" ]).select (
470476 regexp_replace (col ("a" ), "a" , "\\ " )).collect (),
471477 conf = _regexp_conf ,
472- error_message = "character to be escaped is missing" )
478+ error_message = _regexp_replace_error_message ( "character to be escaped is missing" ) )
473479
474480
475481@allow_non_gpu ('ProjectExec' , 'RegExpReplace' )
@@ -479,7 +485,7 @@ def test_regexp_replace_subbug3_dollar_non_digit_throws_14742():
479485 lambda spark : spark .createDataFrame ([("a" ,)], ["a" ]).select (
480486 regexp_replace (col ("a" ), "a" , "$x" )).collect (),
481487 conf = _regexp_conf ,
482- error_message = "Illegal group reference" )
488+ error_message = _regexp_replace_error_message ( "Illegal group reference" ) )
483489
484490
485491@allow_non_gpu ('ProjectExec' , 'RegExpReplace' )
@@ -489,7 +495,8 @@ def test_regexp_replace_subbug4_digit_leading_named_group_throws_14742():
489495 lambda spark : spark .createDataFrame ([("a" ,)], ["a" ]).select (
490496 regexp_replace (col ("a" ), "(a)" , "${1}" )).collect (),
491497 conf = _regexp_conf ,
492- error_message = "capturing group name {1} starts with digit character" )
498+ error_message = _regexp_replace_error_message (
499+ "capturing group name {1} starts with digit character" ))
493500
494501
495502@allow_non_gpu ('ProjectExec' , 'RegExpReplace' )
@@ -499,7 +506,7 @@ def test_regexp_replace_subbug5_unknown_named_group_throws_14742():
499506 lambda spark : spark .createDataFrame ([("a" ,)], ["a" ]).select (
500507 regexp_replace (col ("a" ), "(a)" , "${name}" )).collect (),
501508 conf = _regexp_conf ,
502- error_message = "No group with name" )
509+ error_message = _regexp_replace_error_message ( "No group with name" ) )
503510
504511
505512@pytest .mark .skipif (is_before_spark_320 (), reason = 'regexp is synonym for RLike starting in Spark 3.2.0' )
0 commit comments