1
1
import numpy as np
2
2
import pytest
3
3
4
- from pandas ._config import using_string_dtype
5
-
6
4
from pandas .compat import HAS_PYARROW
7
5
8
6
import pandas as pd
@@ -716,14 +714,18 @@ def test_head_tail(method):
716
714
tm .assert_frame_equal (df , df_orig )
717
715
718
716
719
- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
720
- def test_infer_objects ():
721
- df = DataFrame ({"a" : [1 , 2 ], "b" : "c" , "c" : 1 , "d" : "x" })
717
+ def test_infer_objects (using_infer_string ):
718
+ df = DataFrame (
719
+ {"a" : [1 , 2 ], "b" : Series (["x" , "y" ], dtype = object ), "c" : 1 , "d" : "x" }
720
+ )
722
721
df_orig = df .copy ()
723
722
df2 = df .infer_objects ()
724
723
725
724
assert np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
726
- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
725
+ if using_infer_string and HAS_PYARROW :
726
+ assert not tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
727
+ else :
728
+ assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
727
729
728
730
df2 .iloc [0 , 0 ] = 0
729
731
df2 .iloc [0 , 1 ] = "d"
@@ -732,19 +734,16 @@ def test_infer_objects():
732
734
tm .assert_frame_equal (df , df_orig )
733
735
734
736
735
- @pytest .mark .xfail (
736
- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)"
737
- )
738
- def test_infer_objects_no_reference ():
737
+ def test_infer_objects_no_reference (using_infer_string ):
739
738
df = DataFrame (
740
739
{
741
740
"a" : [1 , 2 ],
742
- "b" : "c" ,
741
+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
743
742
"c" : 1 ,
744
743
"d" : Series (
745
744
[Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
746
745
),
747
- "e" : "b" ,
746
+ "e" : Series ([ "z" , "w" ], dtype = object ) ,
748
747
}
749
748
)
750
749
df = df .infer_objects ()
@@ -757,16 +756,22 @@ def test_infer_objects_no_reference():
757
756
df .iloc [0 , 1 ] = "d"
758
757
df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
759
758
assert np .shares_memory (arr_a , get_array (df , "a" ))
760
- # TODO(CoW): Block splitting causes references here
761
- assert not np .shares_memory (arr_b , get_array (df , "b" ))
759
+ if using_infer_string and HAS_PYARROW :
760
+ # note that the underlying memory of arr_b has been copied anyway
761
+ # because of the assignment, but the EA is updated inplace so still
762
+ # appears the share memory
763
+ assert tm .shares_memory (arr_b , get_array (df , "b" ))
764
+ else :
765
+ # TODO(CoW): Block splitting causes references here
766
+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
762
767
assert np .shares_memory (arr_d , get_array (df , "d" ))
763
768
764
769
765
770
def test_infer_objects_reference ():
766
771
df = DataFrame (
767
772
{
768
773
"a" : [1 , 2 ],
769
- "b" : "c" ,
774
+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
770
775
"c" : 1 ,
771
776
"d" : Series (
772
777
[Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
@@ -904,14 +909,13 @@ def test_sort_values_inplace(obj, kwargs):
904
909
tm .assert_equal (view , obj_orig )
905
910
906
911
907
- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
908
912
@pytest .mark .parametrize ("decimals" , [- 1 , 0 , 1 ])
909
913
def test_round (decimals ):
910
914
df = DataFrame ({"a" : [1 , 2 ], "b" : "c" })
911
915
df_orig = df .copy ()
912
916
df2 = df .round (decimals = decimals )
913
917
914
- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
918
+ assert tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
915
919
# TODO: Make inplace by using out parameter of ndarray.round?
916
920
if decimals >= 0 :
917
921
# Ensure lazy copy if no-op
0 commit comments