38
38
logger = logging .getLogger ("Memory_tests" )
39
39
40
40
## IMPORTANT !!!
41
- ##
41
+ ##
42
42
## All memory tests MUST be done with fixtures that return Library object
43
- ## and not NativeVersionStore. Reason is that the last is thick wrapper which
44
- ## is hiding some possible problems, therefore all tests have to be done with what
43
+ ## and not NativeVersionStore. Reason is that the last is thick wrapper which
44
+ ## is hiding some possible problems, therefore all tests have to be done with what
45
45
## customer works on
46
46
47
47
@@ -362,7 +362,7 @@ def proc_to_examine():
362
362
run the test from command line again to assure it runs ok before commit
363
363
364
364
"""
365
- max_mem_bytes = 295_623_040
365
+ max_mem_bytes = 340_623_040
366
366
367
367
check_process_memory_leaks (proc_to_examine , 20 , max_mem_bytes , 80.0 )
368
368
@@ -609,7 +609,7 @@ def is_relevant(stack: Stack) -> bool:
609
609
610
610
if "folly::CPUThreadPoolExecutor::CPUTask" in frame_info_str :
611
611
logger .warning (f"Frame excluded : { frame_info_str } " )
612
- logger .warning (f''' Explanation : These are on purpose, and they come from the interaction of
612
+ logger .warning (f""" Explanation : These are on purpose, and they come from the interaction of
613
613
multi-threading and forking. When Python forks, the task-scheduler has a linked-list
614
614
of tasks to execute, but there is a global lock held that protects the thread-local state.
615
615
We can't free the list without accessing the global thread-local storage singleton,
@@ -626,9 +626,9 @@ def is_relevant(stack: Stack) -> bool:
626
626
find something better
627
627
628
628
Great that it is catching this, as it's the one case in the whole project where I know
629
- for certain that it does leak memory (and only because there's no alternative''' )
629
+ for certain that it does leak memory (and only because there's no alternative""" )
630
630
return False
631
-
631
+
632
632
pass
633
633
return True
634
634
@@ -776,7 +776,7 @@ def test_mem_leak_read_all_arctic_lib_memray(library_with_big_symbol_):
776
776
def lmdb_library (lmdb_storage , lib_name , request ) -> Generator [Library , None , None ]:
777
777
"""
778
778
Allows passing library creation parameters as parameters of the test or other fixture.
779
- Example:
779
+ Example:
780
780
781
781
782
782
@pytest.mark.parametrize("lmdb_library_any", [
@@ -785,12 +785,11 @@ def lmdb_library(lmdb_storage, lib_name, request) -> Generator[Library, None, No
785
785
def test_my_test(lmdb_library_any):
786
786
.....
787
787
"""
788
- params = request .param if hasattr (request , ' param' ) else {}
788
+ params = request .param if hasattr (request , " param" ) else {}
789
789
yield lmdb_storage .create_arctic ().create_library (name = lib_name , ** params )
790
790
791
-
792
791
@pytest .fixture
793
- def prepare_head_tails_symbol (lmdb_library ):
792
+ def prepare_head_tails_symbol (lmdb_library ):
794
793
"""
795
794
This fixture is part of test `test_mem_leak_head_tail_memray`
796
795
@@ -801,17 +800,18 @@ def prepare_head_tails_symbol(lmdb_library):
801
800
Should not be reused
802
801
"""
803
802
lib : Library = lmdb_library
804
- opts = lib .options ()
805
-
803
+ opts = lib .options ()
804
+
806
805
total_number_columns = 1002
807
806
symbol = "asdf12345"
808
- num_rows_list = [279 ,199 ,1 , 350 ,999 ,0 , 1001 ]
807
+ num_rows_list = [279 , 199 , 1 , 350 , 999 , 0 , 1001 ]
809
808
snapshot_names = []
810
809
for rows in num_rows_list :
811
810
st = time .time ()
812
- df = DFGenerator .generate_wide_dataframe (num_rows = rows , num_cols = total_number_columns , num_string_cols = 25 ,
813
- start_time = pd .Timestamp (0 ),seed = 64578 )
814
- lib .write (symbol ,df )
811
+ df = DFGenerator .generate_wide_dataframe (
812
+ num_rows = rows , num_cols = total_number_columns , num_string_cols = 25 , start_time = pd .Timestamp (0 ), seed = 64578
813
+ )
814
+ lib .write (symbol , df )
815
815
snap = f"{ symbol } _{ rows } "
816
816
lib .snapshot (snap )
817
817
snapshot_names .append (snap )
@@ -821,20 +821,37 @@ def prepare_head_tails_symbol(lmdb_library):
821
821
# characteristic
822
822
total_number_columns += 20
823
823
logger .info (f"Total number of columns increased to { total_number_columns } " )
824
-
824
+
825
825
all_columns = df .columns .to_list ()
826
826
yield (lib , symbol , num_rows_list , snapshot_names , all_columns )
827
827
lib .delete (symbol = symbol )
828
828
829
-
830
829
@MEMRAY_TESTS_MARK
831
830
@SLOW_TESTS_MARK
832
831
## Linux is having quite huge location there will be separate issue to investigate why
833
832
@pytest .mark .limit_leaks (location_limit = "1000 KB" if LINUX else "52 KB" , filter_fn = is_relevant )
834
- @pytest .mark .parametrize ("lmdb_library" , [
835
- {'library_options' : LibraryOptions (rows_per_segment = 233 , columns_per_segment = 197 , dynamic_schema = True , encoding_version = EncodingVersion .V2 )},
836
- {'library_options' : LibraryOptions (rows_per_segment = 99 , columns_per_segment = 99 , dynamic_schema = False , encoding_version = EncodingVersion .V1 )}
837
- ], indirect = True )
833
+ @pytest .mark .parametrize (
834
+ "lmdb_library" ,
835
+ [
836
+ {
837
+ "library_options" : LibraryOptions (
838
+ rows_per_segment = 233 ,
839
+ columns_per_segment = 197 ,
840
+ dynamic_schema = True ,
841
+ encoding_version = EncodingVersion .V2 ,
842
+ )
843
+ },
844
+ {
845
+ "library_options" : LibraryOptions (
846
+ rows_per_segment = 99 ,
847
+ columns_per_segment = 99 ,
848
+ dynamic_schema = False ,
849
+ encoding_version = EncodingVersion .V1 ,
850
+ )
851
+ },
852
+ ],
853
+ indirect = True ,
854
+ )
838
855
def test_mem_leak_head_tail_memray (prepare_head_tails_symbol ):
839
856
"""
840
857
This test aims to test `head` and `tail` functions if they do leak memory.
@@ -845,33 +862,35 @@ def test_mem_leak_head_tail_memray(prepare_head_tails_symbol):
845
862
symbol : str
846
863
num_rows_list : List [int ]
847
864
store : NativeVersionStore = None
848
- snapshot_names : List [str ]
865
+ snapshot_names : List [str ]
849
866
all_columns : List [str ]
850
867
(store , symbol , num_rows_list , snapshot_names , all_columns ) = prepare_head_tails_symbol
851
-
868
+
852
869
start_test : float = time .time ()
853
- max_rows :int = max (num_rows_list )
870
+ max_rows : int = max (num_rows_list )
854
871
855
872
np .random .seed (959034 )
856
873
# constructing a list of head and tail rows to be selected
857
874
num_rows_to_select = []
858
- important_values = [0 , 1 , 0 - 1 , 2 , - 2 , max_rows , - max_rows ] # some boundary cases
875
+ important_values = [0 , 1 , 0 - 1 , 2 , - 2 , max_rows , - max_rows ] # some boundary cases
859
876
num_rows_to_select .extend (important_values )
860
- num_rows_to_select .extend (np .random .randint (low = 5 , high = 99 , size = 7 )) # add 7 more random values
877
+ num_rows_to_select .extend (np .random .randint (low = 5 , high = 99 , size = 7 )) # add 7 more random values
861
878
# number of iterations will be the list length/size
862
879
iterations = len (num_rows_to_select )
863
880
# constructing a random list of values for snapshot names for each iteration
864
- snapshots_list : List [str ] = np .random .choice (snapshot_names , iterations )
881
+ snapshots_list : List [str ] = np .random .choice (snapshot_names , iterations )
865
882
# constructing a random list of values for versions names for each iteration
866
- versions_list : List [int ] = np .random .randint (0 , len (num_rows_list ) - 1 , iterations )
883
+ versions_list : List [int ] = np .random .randint (0 , len (num_rows_list ) - 1 , iterations )
867
884
# constructing a random list of number of columns to be selected
868
- number_columns_for_selection_list : List [int ] = np .random .randint (0 , len (all_columns )- 1 , iterations )
885
+ number_columns_for_selection_list : List [int ] = np .random .randint (0 , len (all_columns ) - 1 , iterations )
869
886
870
887
count : int = 0
871
888
# We will execute several time all head/tail operations with specific number of columns.
872
889
# the number of columns consist of random columns and boundary cases see definition above
873
890
for rows in num_rows_to_select :
874
- selected_columns :List [str ] = np .random .choice (all_columns , number_columns_for_selection_list [count ], replace = False ).tolist ()
891
+ selected_columns : List [str ] = np .random .choice (
892
+ all_columns , number_columns_for_selection_list [count ], replace = False
893
+ ).tolist ()
875
894
snap : str = snapshots_list [count ]
876
895
ver : str = int (versions_list [count ])
877
896
logger .info (f"rows { rows } / snapshot { snap } " )
@@ -887,11 +906,9 @@ def test_mem_leak_head_tail_memray(prepare_head_tails_symbol):
887
906
logger .info (f"Iteration { count } / { iterations } completed" )
888
907
count += 1
889
908
del selected_columns , df1 , df2 , df3 , df4
890
-
909
+
891
910
del store , symbol , num_rows_list , snapshot_names , all_columns
892
911
del num_rows_to_select , important_values , snapshots_list , versions_list , number_columns_for_selection_list
893
912
gc .collect ()
894
- time .sleep (10 ) # collection is not immediate
895
- logger .info (f"Test completed in { time .time () - start_test } " )
896
-
897
-
913
+ time .sleep (10 ) # collection is not immediate
914
+ logger .info (f"Test completed in { time .time () - start_test } " )
0 commit comments