25
25
ensure_clean_store ,
26
26
)
27
27
28
- pytestmark = [
29
- pytest .mark .single_cpu ,
30
- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
31
- ]
28
+ pytestmark = [pytest .mark .single_cpu ]
32
29
33
30
tables = pytest .importorskip ("tables" )
34
31
@@ -40,7 +37,7 @@ def test_append(setup_path):
40
37
# tables.NaturalNameWarning):
41
38
df = DataFrame (
42
39
np .random .default_rng (2 ).standard_normal ((20 , 4 )),
43
- columns = Index (list ("ABCD" ), dtype = object ),
40
+ columns = Index (list ("ABCD" )),
44
41
index = date_range ("2000-01-01" , periods = 20 , freq = "B" ),
45
42
)
46
43
_maybe_remove (store , "df1" )
@@ -201,7 +198,7 @@ def test_append_some_nans(setup_path):
201
198
tm .assert_frame_equal (store ["df3" ], df3 , check_index_type = True )
202
199
203
200
204
- def test_append_all_nans (setup_path ):
201
+ def test_append_all_nans (setup_path , using_infer_string ):
205
202
with ensure_clean_store (setup_path ) as store :
206
203
df = DataFrame (
207
204
{
@@ -253,7 +250,13 @@ def test_append_all_nans(setup_path):
253
250
_maybe_remove (store , "df" )
254
251
store .append ("df" , df [:10 ], dropna = True )
255
252
store .append ("df" , df [10 :], dropna = True )
256
- tm .assert_frame_equal (store ["df" ], df , check_index_type = True )
253
+ result = store ["df" ]
254
+ expected = df
255
+ if using_infer_string :
256
+ # TODO: Test is incorrect when not using_infer_string.
257
+ # Should take the last 4 rows uncondiationally.
258
+ expected = expected [- 4 :]
259
+ tm .assert_frame_equal (result , expected , check_index_type = True )
257
260
258
261
_maybe_remove (store , "df2" )
259
262
store .append ("df2" , df [:10 ], dropna = False )
@@ -292,7 +295,7 @@ def test_append_frame_column_oriented(setup_path):
292
295
# column oriented
293
296
df = DataFrame (
294
297
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
295
- columns = Index (list ("ABCD" ), dtype = object ),
298
+ columns = Index (list ("ABCD" )),
296
299
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
297
300
)
298
301
df .index = df .index ._with_freq (None ) # freq doesn't round-trip
@@ -417,7 +420,7 @@ def check_col(key, name, size):
417
420
{
418
421
"A" : [0.0 , 1.0 , 2.0 , 3.0 , 4.0 ],
419
422
"B" : [0.0 , 1.0 , 0.0 , 1.0 , 0.0 ],
420
- "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ], dtype = object ),
423
+ "C" : Index (["foo1" , "foo2" , "foo3" , "foo4" , "foo5" ]),
421
424
"D" : date_range ("20130101" , periods = 5 ),
422
425
}
423
426
).set_index ("C" )
@@ -444,7 +447,7 @@ def check_col(key, name, size):
444
447
_maybe_remove (store , "df" )
445
448
df = DataFrame (
446
449
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
447
- columns = Index (list ("ABCD" ), dtype = object ),
450
+ columns = Index (list ("ABCD" )),
448
451
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
449
452
)
450
453
df ["string" ] = "foo"
@@ -504,11 +507,12 @@ def test_append_with_empty_string(setup_path):
504
507
tm .assert_frame_equal (store .select ("df" ), df )
505
508
506
509
510
+ @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
507
511
def test_append_with_data_columns (setup_path ):
508
512
with ensure_clean_store (setup_path ) as store :
509
513
df = DataFrame (
510
514
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
511
- columns = Index (list ("ABCD" ), dtype = object ),
515
+ columns = Index (list ("ABCD" )),
512
516
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
513
517
)
514
518
df .iloc [0 , df .columns .get_loc ("B" )] = 1.0
@@ -684,8 +688,8 @@ def test_append_misc(setup_path):
684
688
with ensure_clean_store (setup_path ) as store :
685
689
df = DataFrame (
686
690
1.1 * np .arange (120 ).reshape ((30 , 4 )),
687
- columns = Index (list ("ABCD" ), dtype = object ),
688
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
691
+ columns = Index (list ("ABCD" )),
692
+ index = Index ([f"i-{ i } " for i in range (30 )]),
689
693
)
690
694
store .append ("df" , df , chunksize = 1 )
691
695
result = store .select ("df" )
@@ -701,8 +705,8 @@ def test_append_misc_chunksize(setup_path, chunksize):
701
705
# more chunksize in append tests
702
706
df = DataFrame (
703
707
1.1 * np .arange (120 ).reshape ((30 , 4 )),
704
- columns = Index (list ("ABCD" ), dtype = object ),
705
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
708
+ columns = Index (list ("ABCD" )),
709
+ index = Index ([f"i-{ i } " for i in range (30 )]),
706
710
)
707
711
df ["string" ] = "foo"
708
712
df ["float322" ] = 1.0
@@ -742,15 +746,15 @@ def test_append_misc_empty_frame(setup_path):
742
746
# the conversion from AM->BM converts the invalid object dtype column into
743
747
# a datetime64 column no longer raising an error
744
748
@td .skip_array_manager_not_yet_implemented
745
- def test_append_raise (setup_path ):
749
+ def test_append_raise (setup_path , using_infer_string ):
746
750
with ensure_clean_store (setup_path ) as store :
747
751
# test append with invalid input to get good error messages
748
752
749
753
# list in column
750
754
df = DataFrame (
751
755
1.1 * np .arange (120 ).reshape ((30 , 4 )),
752
- columns = Index (list ("ABCD" ), dtype = object ),
753
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
756
+ columns = Index (list ("ABCD" )),
757
+ index = Index ([f"i-{ i } " for i in range (30 )]),
754
758
)
755
759
df ["invalid" ] = [["a" ]] * len (df )
756
760
assert df .dtypes ["invalid" ] == np .object_
@@ -770,8 +774,8 @@ def test_append_raise(setup_path):
770
774
# datetime with embedded nans as object
771
775
df = DataFrame (
772
776
1.1 * np .arange (120 ).reshape ((30 , 4 )),
773
- columns = Index (list ("ABCD" ), dtype = object ),
774
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
777
+ columns = Index (list ("ABCD" )),
778
+ index = Index ([f"i-{ i } " for i in range (30 )]),
775
779
)
776
780
s = Series (datetime .datetime (2001 , 1 , 2 ), index = df .index )
777
781
s = s .astype (object )
@@ -798,8 +802,8 @@ def test_append_raise(setup_path):
798
802
# appending an incompatible table
799
803
df = DataFrame (
800
804
1.1 * np .arange (120 ).reshape ((30 , 4 )),
801
- columns = Index (list ("ABCD" ), dtype = object ),
802
- index = Index ([f"i-{ i } " for i in range (30 )], dtype = object ),
805
+ columns = Index (list ("ABCD" )),
806
+ index = Index ([f"i-{ i } " for i in range (30 )]),
803
807
)
804
808
store .append ("df" , df )
805
809
@@ -876,7 +880,7 @@ def test_append_with_timedelta(setup_path):
876
880
def test_append_to_multiple (setup_path ):
877
881
df1 = DataFrame (
878
882
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
879
- columns = Index (list ("ABCD" ), dtype = object ),
883
+ columns = Index (list ("ABCD" )),
880
884
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
881
885
)
882
886
df2 = df1 .copy ().rename (columns = "{}_2" .format )
@@ -913,12 +917,12 @@ def test_append_to_multiple(setup_path):
913
917
def test_append_to_multiple_dropna (setup_path ):
914
918
df1 = DataFrame (
915
919
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
916
- columns = Index (list ("ABCD" ), dtype = object ),
920
+ columns = Index (list ("ABCD" )),
917
921
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
918
922
)
919
923
df2 = DataFrame (
920
924
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
921
- columns = Index (list ("ABCD" ), dtype = object ),
925
+ columns = Index (list ("ABCD" )),
922
926
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
923
927
).rename (columns = "{}_2" .format )
924
928
df1 .iloc [1 , df1 .columns .get_indexer (["A" , "B" ])] = np .nan
@@ -938,7 +942,7 @@ def test_append_to_multiple_dropna(setup_path):
938
942
def test_append_to_multiple_dropna_false (setup_path ):
939
943
df1 = DataFrame (
940
944
np .random .default_rng (2 ).standard_normal ((10 , 4 )),
941
- columns = Index (list ("ABCD" ), dtype = object ),
945
+ columns = Index (list ("ABCD" )),
942
946
index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
943
947
)
944
948
df2 = df1 .copy ().rename (columns = "{}_2" .format )
0 commit comments