Skip to content

Commit ae1abf6

Browse files
committed
Increase the memory threshold for test_mem_leak_read_all_arctic_lib
1 parent 79df70a commit ae1abf6

File tree

1 file changed

+54
-37
lines changed

1 file changed

+54
-37
lines changed

python/tests/stress/arcticdb/version_store/test_mem_leaks.py

+54-37
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@
3838
logger = logging.getLogger("Memory_tests")
3939

4040
## IMPORTANT !!!
41-
##
41+
##
4242
## All memory tests MUST be done with fixtures that return Library object
43-
## and not NativeVersionStore. Reason is that the last is thick wrapper which
44-
## is hiding some possible problems, therefore all tests have to be done with what
43+
## and not NativeVersionStore. Reason is that the last is thick wrapper which
44+
## is hiding some possible problems, therefore all tests have to be done with what
4545
## customer works on
4646

4747

@@ -362,7 +362,7 @@ def proc_to_examine():
362362
run the test from command line again to assure it runs ok before commit
363363
364364
"""
365-
max_mem_bytes = 295_623_040
365+
max_mem_bytes = 340_623_040
366366

367367
check_process_memory_leaks(proc_to_examine, 20, max_mem_bytes, 80.0)
368368

@@ -609,7 +609,7 @@ def is_relevant(stack: Stack) -> bool:
609609

610610
if "folly::CPUThreadPoolExecutor::CPUTask" in frame_info_str:
611611
logger.warning(f"Frame excluded : {frame_info_str}")
612-
logger.warning(f'''Explanation : These are on purpose, and they come from the interaction of
612+
logger.warning(f"""Explanation : These are on purpose, and they come from the interaction of
613613
multi-threading and forking. When Python forks, the task-scheduler has a linked-list
614614
of tasks to execute, but there is a global lock held that protects the thread-local state.
615615
We can't free the list without accessing the global thread-local storage singleton,
@@ -626,9 +626,9 @@ def is_relevant(stack: Stack) -> bool:
626626
find something better
627627
628628
Great that it is catching this, as it's the one case in the whole project where I know
629-
for certain that it does leak memory (and only because there's no alternative''')
629+
for certain that it does leak memory (and only because there's no alternative""")
630630
return False
631-
631+
632632
pass
633633
return True
634634

@@ -776,7 +776,7 @@ def test_mem_leak_read_all_arctic_lib_memray(library_with_big_symbol_):
776776
def lmdb_library(lmdb_storage, lib_name, request) -> Generator[Library, None, None]:
777777
"""
778778
Allows passing library creation parameters as parameters of the test or other fixture.
779-
Example:
779+
Example:
780780
781781
782782
@pytest.mark.parametrize("lmdb_library_any", [
@@ -785,12 +785,11 @@ def lmdb_library(lmdb_storage, lib_name, request) -> Generator[Library, None, No
785785
def test_my_test(lmdb_library_any):
786786
.....
787787
"""
788-
params = request.param if hasattr(request, 'param') else {}
788+
params = request.param if hasattr(request, "param") else {}
789789
yield lmdb_storage.create_arctic().create_library(name=lib_name, **params)
790790

791-
792791
@pytest.fixture
793-
def prepare_head_tails_symbol(lmdb_library):
792+
def prepare_head_tails_symbol(lmdb_library):
794793
"""
795794
This fixture is part of test `test_mem_leak_head_tail_memray`
796795
@@ -801,17 +800,18 @@ def prepare_head_tails_symbol(lmdb_library):
801800
Should not be reused
802801
"""
803802
lib: Library = lmdb_library
804-
opts = lib.options()
805-
803+
opts = lib.options()
804+
806805
total_number_columns = 1002
807806
symbol = "asdf12345"
808-
num_rows_list = [279,199,1,350,999,0,1001]
807+
num_rows_list = [279, 199, 1, 350, 999, 0, 1001]
809808
snapshot_names = []
810809
for rows in num_rows_list:
811810
st = time.time()
812-
df = DFGenerator.generate_wide_dataframe(num_rows=rows, num_cols=total_number_columns, num_string_cols=25,
813-
start_time=pd.Timestamp(0),seed=64578)
814-
lib.write(symbol,df)
811+
df = DFGenerator.generate_wide_dataframe(
812+
num_rows=rows, num_cols=total_number_columns, num_string_cols=25, start_time=pd.Timestamp(0), seed=64578
813+
)
814+
lib.write(symbol, df)
815815
snap = f"{symbol}_{rows}"
816816
lib.snapshot(snap)
817817
snapshot_names.append(snap)
@@ -821,20 +821,37 @@ def prepare_head_tails_symbol(lmdb_library):
821821
# characteristic
822822
total_number_columns += 20
823823
logger.info(f"Total number of columns increased to {total_number_columns}")
824-
824+
825825
all_columns = df.columns.to_list()
826826
yield (lib, symbol, num_rows_list, snapshot_names, all_columns)
827827
lib.delete(symbol=symbol)
828828

829-
830829
@MEMRAY_TESTS_MARK
831830
@SLOW_TESTS_MARK
832831
## Linux is having quite huge location there will be separate issue to investigate why
833832
@pytest.mark.limit_leaks(location_limit="1000 KB" if LINUX else "52 KB", filter_fn=is_relevant)
834-
@pytest.mark.parametrize("lmdb_library", [
835-
{'library_options': LibraryOptions(rows_per_segment=233, columns_per_segment=197, dynamic_schema=True, encoding_version=EncodingVersion.V2)},
836-
{'library_options': LibraryOptions(rows_per_segment=99, columns_per_segment=99, dynamic_schema=False, encoding_version=EncodingVersion.V1)}
837-
], indirect=True)
833+
@pytest.mark.parametrize(
834+
"lmdb_library",
835+
[
836+
{
837+
"library_options": LibraryOptions(
838+
rows_per_segment=233,
839+
columns_per_segment=197,
840+
dynamic_schema=True,
841+
encoding_version=EncodingVersion.V2,
842+
)
843+
},
844+
{
845+
"library_options": LibraryOptions(
846+
rows_per_segment=99,
847+
columns_per_segment=99,
848+
dynamic_schema=False,
849+
encoding_version=EncodingVersion.V1,
850+
)
851+
},
852+
],
853+
indirect=True,
854+
)
838855
def test_mem_leak_head_tail_memray(prepare_head_tails_symbol):
839856
"""
840857
This test aims to test `head` and `tail` functions if they do leak memory.
@@ -845,33 +862,35 @@ def test_mem_leak_head_tail_memray(prepare_head_tails_symbol):
845862
symbol: str
846863
num_rows_list: List[int]
847864
store: NativeVersionStore = None
848-
snapshot_names: List[str]
865+
snapshot_names: List[str]
849866
all_columns: List[str]
850867
(store, symbol, num_rows_list, snapshot_names, all_columns) = prepare_head_tails_symbol
851-
868+
852869
start_test: float = time.time()
853-
max_rows:int = max(num_rows_list)
870+
max_rows: int = max(num_rows_list)
854871

855872
np.random.seed(959034)
856873
# constructing a list of head and tail rows to be selected
857874
num_rows_to_select = []
858-
important_values = [0, 1, 0 -1, 2, -2, max_rows, -max_rows ] # some boundary cases
875+
important_values = [0, 1, 0 - 1, 2, -2, max_rows, -max_rows] # some boundary cases
859876
num_rows_to_select.extend(important_values)
860-
num_rows_to_select.extend(np.random.randint(low=5, high=99, size=7)) # add 7 more random values
877+
num_rows_to_select.extend(np.random.randint(low=5, high=99, size=7)) # add 7 more random values
861878
# number of iterations will be the list length/size
862879
iterations = len(num_rows_to_select)
863880
# constructing a random list of values for snapshot names for each iteration
864-
snapshots_list: List[str] = np.random.choice(snapshot_names, iterations)
881+
snapshots_list: List[str] = np.random.choice(snapshot_names, iterations)
865882
# constructing a random list of values for versions names for each iteration
866-
versions_list: List[int] = np.random.randint(0, len(num_rows_list) - 1, iterations)
883+
versions_list: List[int] = np.random.randint(0, len(num_rows_list) - 1, iterations)
867884
# constructing a random list of number of columns to be selected
868-
number_columns_for_selection_list: List[int] = np.random.randint(0, len(all_columns)-1, iterations)
885+
number_columns_for_selection_list: List[int] = np.random.randint(0, len(all_columns) - 1, iterations)
869886

870887
count: int = 0
871888
# We will execute several time all head/tail operations with specific number of columns.
872889
# the number of columns consist of random columns and boundary cases see definition above
873890
for rows in num_rows_to_select:
874-
selected_columns:List[str] = np.random.choice(all_columns, number_columns_for_selection_list[count], replace=False).tolist()
891+
selected_columns: List[str] = np.random.choice(
892+
all_columns, number_columns_for_selection_list[count], replace=False
893+
).tolist()
875894
snap: str = snapshots_list[count]
876895
ver: str = int(versions_list[count])
877896
logger.info(f"rows {rows} / snapshot {snap}")
@@ -887,11 +906,9 @@ def test_mem_leak_head_tail_memray(prepare_head_tails_symbol):
887906
logger.info(f"Iteration {count} / {iterations} completed")
888907
count += 1
889908
del selected_columns, df1, df2, df3, df4
890-
909+
891910
del store, symbol, num_rows_list, snapshot_names, all_columns
892911
del num_rows_to_select, important_values, snapshots_list, versions_list, number_columns_for_selection_list
893912
gc.collect()
894-
time.sleep(10) # collection is not immediate
895-
logger.info(f"Test completed in {time.time() - start_test}")
896-
897-
913+
time.sleep(10) # collection is not immediate
914+
logger.info(f"Test completed in {time.time() - start_test}")

0 commit comments

Comments
 (0)