11"""Test that the code in all the test notebooks work, including README.md"""
22
3- import itertools
4-
53import pandas as pd
64import pytest
75
8- from itables .downsample import downsample
6+ from itables .downsample import downsample , shrink_towards_target_aspect_ratio
97
108
11- def large_tables (N = 1000 ):
9+ def large_tables (N = 1000 , M = 1000 ):
1210 return [
13- pd .DataFrame (5 , columns = range (N ), index = range (N )),
14- pd .DataFrame (3.14159 , columns = range (N ), index = range (N )),
15- pd .DataFrame ("abcdefg" , columns = range (N ), index = range (N )),
11+ pd .DataFrame (5 , columns = range (M ), index = range (N )),
12+ pd .DataFrame (3.14159 , columns = range (M ), index = range (N )),
13+ pd .DataFrame ("abcdefg" , columns = range (M ), index = range (N )),
1614 ]
1715
1816
19- @pytest .mark .parametrize ("df,max_rows" , itertools .product (large_tables (), [99 , 100 ]))
17+ @pytest .mark .parametrize ("df" , large_tables ())
18+ @pytest .mark .parametrize ("max_rows" , [99 , 100 ])
2019def test_max_rows (df , max_rows ):
2120 dn = downsample (df , max_rows = max_rows )
2221 assert len (dn .index ) == max_rows
2322 pd .testing .assert_index_equal (dn .columns , df .columns )
2423
2524
26- @pytest .mark .parametrize ("df,max_columns" , itertools .product (large_tables (), [99 , 100 ]))
25+ @pytest .mark .parametrize ("df" , large_tables ())
26+ @pytest .mark .parametrize ("max_columns" , [99 , 100 ])
2727def test_max_columns (df , max_columns ):
2828 dn = downsample (df , max_columns = max_columns )
2929 pd .testing .assert_index_equal (dn .index , df .index )
3030 assert len (dn .columns ) == max_columns
3131
3232
33- @pytest .mark .parametrize (
34- "df,max_bytes" , itertools .product (large_tables (), [10 , 1e2 , 1e3 , 1e4 , 1e5 ])
35- )
33+ @pytest .mark .parametrize ("df" , large_tables ())
34+ @pytest .mark .parametrize ("max_bytes" , [10 , 1e2 , 1e3 , 1e4 , 1e5 ])
3635def test_max_bytes (df , max_bytes ):
3736 dn = downsample (df , max_bytes = max_bytes )
3837 assert dn .values .nbytes <= max_bytes
@@ -44,3 +43,43 @@ def test_max_one_byte(df, max_bytes=1):
4443 dn = downsample (df , max_bytes = max_bytes )
4544 assert len (dn .columns ) == len (dn .index ) == 1
4645 assert dn .iloc [0 , 0 ] == "..."
46+
47+
48+ def test_shrink_towards_target_aspect_ratio ():
49+ # Shrink on rows only
50+ assert shrink_towards_target_aspect_ratio (100 , 10 , 0.1 , 1.0 ) == (10 , 10 )
51+ assert shrink_towards_target_aspect_ratio (200 , 10 , 0.1 , 1.0 ) == (20 , 10 )
52+
53+ # Shrink on columns only
54+ assert shrink_towards_target_aspect_ratio (10 , 100 , 0.1 , 1.0 ) == (10 , 10 )
55+ assert shrink_towards_target_aspect_ratio (10 , 200 , 0.1 , 1.0 ) == (10 , 20 )
56+
57+ # Shrink on rows and columns and achieve target aspect ratio
58+ assert shrink_towards_target_aspect_ratio (100 , 10 , 0.1 / 4 , 1.0 ) == (5 , 5 )
59+ assert shrink_towards_target_aspect_ratio (200 , 10 , 0.1 / 8 , 1.0 ) == (5 , 5 )
60+
61+ # Aspect ratio not one
62+ assert shrink_towards_target_aspect_ratio (100 , 10 , 0.1 / 2 , 2.0 ) == (10 , 5 )
63+ assert shrink_towards_target_aspect_ratio (200 , 10 , 0.1 / 4 , 2.0 ) == (10 , 5 )
64+
65+
66+ @pytest .mark .parametrize ("df" , large_tables (N = 10000 , M = 100 ))
67+ @pytest .mark .parametrize ("max_bytes" , [1e3 , 1e4 , 1e5 ])
68+ def test_df_with_many_rows_is_downsampled_preferentially_on_rows (df , max_bytes ):
69+ dn = downsample (df , max_bytes = max_bytes )
70+ if max_bytes == 1e5 :
71+ assert len (dn .index ) < len (df .index ) and len (dn .columns ) == len (df .columns )
72+ else :
73+ # aspect ratio is close to 1
74+ assert 0.5 < len (dn .index ) / len (dn .columns ) < 2
75+
76+
77+ @pytest .mark .parametrize ("df" , large_tables (N = 100 , M = 10000 ))
78+ @pytest .mark .parametrize ("max_bytes" , [1e3 , 1e4 , 1e5 ])
79+ def test_df_with_many_columns_is_downsampled_preferentially_on_columns (df , max_bytes ):
80+ dn = downsample (df , max_bytes = max_bytes )
81+ if max_bytes == 1e5 :
82+ assert len (dn .index ) == len (df .index ) and len (dn .columns ) < len (df .columns )
83+ else :
84+ # aspect ratio is close to 1
85+ assert 0.5 < len (dn .index ) / len (dn .columns ) < 2
0 commit comments