Skip to content

Commit 2f64aab

Browse files
avecaseyAbby VeCaseymroeschkepre-commit-ci[bot]
authored andcommitted
ENH: Added isascii() string method fixing issue #59091 (#60532)
* first * second * Update object_array.py * third * ascii * ascii2 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * ascii3 * style * style * style * style * docs * reset * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/source/whatsnew/v3.0.0.rst --------- Co-authored-by: Abby VeCasey <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f1b71d8 commit 2f64aab

File tree

8 files changed

+61
-1
lines changed

8 files changed

+61
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Other enhancements
6161
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
6262
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
6363
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
64+
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
6465
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
6566
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
6667
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)

pandas/core/arrays/_arrow_string_mixins.py

+4
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ def _str_isalpha(self):
253253
result = pc.utf8_is_alpha(self._pa_array)
254254
return self._convert_bool_result(result)
255255

256+
def _str_isascii(self):
257+
result = pc.string_is_ascii(self._pa_array)
258+
return self._convert_bool_result(result)
259+
256260
def _str_isdecimal(self):
257261
result = pc.utf8_is_decimal(self._pa_array)
258262
return self._convert_bool_result(result)

pandas/core/strings/accessor.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -3415,7 +3415,8 @@ def len(self):
34153415
# cases:
34163416
# upper, lower, title, capitalize, swapcase, casefold
34173417
# boolean:
3418-
# isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle
3418+
# isalpha, isnumeric isalnum isdigit isdecimal isspace islower
3419+
# isupper istitle isascii
34193420
# _doc_args holds dict of strings to use in substituting casemethod docs
34203421
_doc_args: dict[str, dict[str, str]] = {}
34213422
_doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""}
@@ -3495,6 +3496,7 @@ def casefold(self):
34953496
Series.str.isdecimal : Check whether all characters are decimal.
34963497
Series.str.isspace : Check whether all characters are whitespace.
34973498
Series.str.islower : Check whether all characters are lowercase.
3499+
Series.str.isascii : Check whether all characters are ascii.
34983500
Series.str.isupper : Check whether all characters are uppercase.
34993501
Series.str.istitle : Check whether all characters are titlecase.
35003502
@@ -3518,6 +3520,7 @@ def casefold(self):
35183520
Series.str.isdecimal : Check whether all characters are decimal.
35193521
Series.str.isspace : Check whether all characters are whitespace.
35203522
Series.str.islower : Check whether all characters are lowercase.
3523+
Series.str.isascii : Check whether all characters are ascii.
35213524
Series.str.isupper : Check whether all characters are uppercase.
35223525
Series.str.istitle : Check whether all characters are titlecase.
35233526
@@ -3544,6 +3547,7 @@ def casefold(self):
35443547
Series.str.isdecimal : Check whether all characters are decimal.
35453548
Series.str.isspace : Check whether all characters are whitespace.
35463549
Series.str.islower : Check whether all characters are lowercase.
3550+
Series.str.isascii : Check whether all characters are ascii.
35473551
Series.str.isupper : Check whether all characters are uppercase.
35483552
Series.str.istitle : Check whether all characters are titlecase.
35493553
@@ -3576,6 +3580,7 @@ def casefold(self):
35763580
Series.str.isdigit : Check whether all characters are digits.
35773581
Series.str.isspace : Check whether all characters are whitespace.
35783582
Series.str.islower : Check whether all characters are lowercase.
3583+
Series.str.isascii : Check whether all characters are ascii.
35793584
Series.str.isupper : Check whether all characters are uppercase.
35803585
Series.str.istitle : Check whether all characters are titlecase.
35813586
@@ -3601,6 +3606,7 @@ def casefold(self):
36013606
Series.str.isdecimal : Check whether all characters are decimal.
36023607
Series.str.isspace : Check whether all characters are whitespace.
36033608
Series.str.islower : Check whether all characters are lowercase.
3609+
Series.str.isascii : Check whether all characters are ascii.
36043610
Series.str.isupper : Check whether all characters are uppercase.
36053611
Series.str.istitle : Check whether all characters are titlecase.
36063612
@@ -3627,6 +3633,7 @@ def casefold(self):
36273633
Series.str.isdigit : Check whether all characters are digits.
36283634
Series.str.isdecimal : Check whether all characters are decimal.
36293635
Series.str.islower : Check whether all characters are lowercase.
3636+
Series.str.isascii : Check whether all characters are ascii.
36303637
Series.str.isupper : Check whether all characters are uppercase.
36313638
Series.str.istitle : Check whether all characters are titlecase.
36323639
@@ -3649,6 +3656,7 @@ def casefold(self):
36493656
Series.str.isdigit : Check whether all characters are digits.
36503657
Series.str.isdecimal : Check whether all characters are decimal.
36513658
Series.str.isspace : Check whether all characters are whitespace.
3659+
Series.str.isascii : Check whether all characters are ascii.
36523660
Series.str.isupper : Check whether all characters are uppercase.
36533661
Series.str.istitle : Check whether all characters are titlecase.
36543662
@@ -3674,6 +3682,7 @@ def casefold(self):
36743682
Series.str.isdecimal : Check whether all characters are decimal.
36753683
Series.str.isspace : Check whether all characters are whitespace.
36763684
Series.str.islower : Check whether all characters are lowercase.
3685+
Series.str.isascii : Check whether all characters are ascii.
36773686
Series.str.istitle : Check whether all characters are titlecase.
36783687
36793688
Examples
@@ -3697,6 +3706,7 @@ def casefold(self):
36973706
Series.str.isdecimal : Check whether all characters are decimal.
36983707
Series.str.isspace : Check whether all characters are whitespace.
36993708
Series.str.islower : Check whether all characters are lowercase.
3709+
Series.str.isascii : Check whether all characters are ascii.
37003710
Series.str.isupper : Check whether all characters are uppercase.
37013711
37023712
Examples
@@ -3714,11 +3724,40 @@ def casefold(self):
37143724
3 False
37153725
dtype: bool
37163726
"""
3727+
_shared_docs["isascii"] = """
3728+
See Also
3729+
--------
3730+
Series.str.isalpha : Check whether all characters are alphabetic.
3731+
Series.str.isnumeric : Check whether all characters are numeric.
3732+
Series.str.isalnum : Check whether all characters are alphanumeric.
3733+
Series.str.isdigit : Check whether all characters are digits.
3734+
Series.str.isdecimal : Check whether all characters are decimal.
3735+
Series.str.isspace : Check whether all characters are whitespace.
3736+
Series.str.islower : Check whether all characters are lowercase.
3737+
Series.str.istitle : Check whether all characters are titlecase.
3738+
Series.str.isupper : Check whether all characters are uppercase.
3739+
3740+
Examples
3741+
------------
3742+
The ``s5.str.isascii`` method checks for whether all characters are ascii
3743+
characters, which includes digits 0-9, capital and lowercase letters A-Z,
3744+
and some other special characters.
3745+
3746+
>>> s5 = pd.Series(['ö', 'see123', 'hello world', ''])
3747+
>>> s5.str.isascii()
3748+
0 False
3749+
1 True
3750+
2 True
3751+
3 True
3752+
dtype: bool
3753+
"""
3754+
37173755
_doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"}
37183756
_doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"}
37193757
_doc_args["isdigit"] = {"type": "digits", "method": "isdigit"}
37203758
_doc_args["isspace"] = {"type": "whitespace", "method": "isspace"}
37213759
_doc_args["islower"] = {"type": "lowercase", "method": "islower"}
3760+
_doc_args["isascii"] = {"type": "ascii", "method": "isascii"}
37223761
_doc_args["isupper"] = {"type": "uppercase", "method": "isupper"}
37233762
_doc_args["istitle"] = {"type": "titlecase", "method": "istitle"}
37243763
_doc_args["isnumeric"] = {"type": "numeric", "method": "isnumeric"}
@@ -3750,6 +3789,11 @@ def casefold(self):
37503789
docstring=_shared_docs["ismethods"] % _doc_args["islower"]
37513790
+ _shared_docs["islower"],
37523791
)
3792+
isascii = _map_and_wrap(
3793+
"isascii",
3794+
docstring=_shared_docs["ismethods"] % _doc_args["isascii"]
3795+
+ _shared_docs["isascii"],
3796+
)
37533797
isupper = _map_and_wrap(
37543798
"isupper",
37553799
docstring=_shared_docs["ismethods"] % _doc_args["isupper"]

pandas/core/strings/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ def _str_isalnum(self):
179179
def _str_isalpha(self):
180180
pass
181181

182+
@abc.abstractmethod
183+
def _str_isascii(self):
184+
pass
185+
182186
@abc.abstractmethod
183187
def _str_isdecimal(self):
184188
pass

pandas/core/strings/object_array.py

+3
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,9 @@ def _str_isalnum(self):
455455
def _str_isalpha(self):
456456
return self._str_map(str.isalpha, dtype="bool")
457457

458+
def _str_isascii(self):
459+
return self._str_map(str.isascii, dtype="bool")
460+
458461
def _str_isdecimal(self):
459462
return self._str_map(str.isdecimal, dtype="bool")
460463

pandas/tests/strings/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
"get_dummies",
6969
"isalnum",
7070
"isalpha",
71+
"isascii",
7172
"isdecimal",
7273
"isdigit",
7374
"islower",

pandas/tests/strings/test_string_array.py

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def test_string_array_numeric_integer_array(nullable_string_dtype, method, expec
8383
[
8484
("isdigit", [False, None, True]),
8585
("isalpha", [True, None, False]),
86+
("isascii", [True, None, True]),
8687
("isalnum", [True, None, True]),
8788
("isnumeric", [False, None, True]),
8889
],

pandas/tests/strings/test_strings.py

+2
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def test_empty_str_methods(any_string_dtype):
159159
# ismethods should always return boolean (GH 29624)
160160
tm.assert_series_equal(empty_bool, empty.str.isalnum())
161161
tm.assert_series_equal(empty_bool, empty.str.isalpha())
162+
tm.assert_series_equal(empty_bool, empty.str.isascii())
162163
tm.assert_series_equal(empty_bool, empty.str.isdigit())
163164
tm.assert_series_equal(empty_bool, empty.str.isspace())
164165
tm.assert_series_equal(empty_bool, empty.str.islower())
@@ -177,6 +178,7 @@ def test_empty_str_methods(any_string_dtype):
177178
@pytest.mark.parametrize(
178179
"method, expected",
179180
[
181+
("isascii", [True, True, True, True, True, True, True, True, True, True]),
180182
("isalnum", [True, True, True, True, True, False, True, True, False, False]),
181183
("isalpha", [True, True, True, False, False, False, True, False, False, False]),
182184
(

0 commit comments

Comments
 (0)