1
+ """
2
+ Copyright 2024 Man Group Operations Limited
3
+
4
+ Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
5
+
6
+ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
7
+ """
8
+
9
+ import time
10
+ from arcticdb .util .utils import DFGenerator
11
+ from arcticdb .util .environment_setup import TestLibraryManager , LibraryPopulationPolicy , LibraryType , Storage , populate_library
12
+ from arcticdb .util .utils import DataRangeUtils
13
+ from benchmarks .common import AsvBase
14
+
15
+
16
+ class AzureReadWrite (AsvBase ):
17
+ """
18
+ This class is for general read write tests on Azure Blob Storage
19
+ """
20
+ rounds = 1
21
+ number = 3 # invokes 3 times the test runs between each setup-teardown
22
+ repeat = 1 # defines the number of times the measurements will invoke setup-teardown
23
+ min_run_count = 1
24
+ warmup_time = 0
25
+
26
+ timeout = 1200
27
+
28
+ library_manager = TestLibraryManager (storage = Storage .AZURE , name_benchmark = "READ_WRITE" )
29
+ library_type = LibraryType .PERSISTENT
30
+
31
+ param_names = ["num_rows" ]
32
+ params = [10_000_000 ] # 10M rows
33
+
34
+ number_columns = 100 # 100 columns
35
+
36
+ def get_library_manager (self ) -> TestLibraryManager :
37
+ return AzureReadWrite .library_manager
38
+
39
+ def get_population_policy (self ) -> LibraryPopulationPolicy :
40
+ lpp = LibraryPopulationPolicy (self .get_logger ())
41
+ lpp .set_parameters (AzureReadWrite .params , [AzureReadWrite .number_columns ])
42
+ return lpp
43
+
44
+ def setup_cache (self ):
45
+ self .setup_library ()
46
+ self .symbol = "test_symbol"
47
+ self .to_write_df = None
48
+ self .last_20 = None
49
+
50
+ def setup (self , num_rows ):
51
+ self .setup_library ()
52
+ self .lib = self .get_library_manager ().get_library (AzureReadWrite .library_type , 1 )
53
+
54
+ # Generate test data with mixed types including strings
55
+ df_generator = DFGenerator (num_rows , [AzureReadWrite .number_columns ])
56
+ self .to_write_df = df_generator .generate_dataframe ()
57
+
58
+ # Add some string columns
59
+ string_cols = [f"string_{ i } " for i in range (10 )] # 10 string columns
60
+ for col in string_cols :
61
+ self .to_write_df [col ] = [f"string_value_{ i } " for i in range (num_rows )]
62
+
63
+ # Write the data
64
+ self .lib .write (self .symbol , self .to_write_df )
65
+
66
+ # Calculate date range for last 20% of rows
67
+ self .last_20 = self .get_last_x_percent_date_range (num_rows , 20 )
68
+
69
+ def time_read (self , num_rows ):
70
+ self .lib .read (self .symbol )
71
+
72
+ def peakmem_read (self , num_rows ):
73
+ self .lib .read (self .symbol )
74
+
75
+ def time_write (self , num_rows ):
76
+ self .lib .write (self .symbol , self .to_write_df )
77
+
78
+ def peakmem_write (self , num_rows ):
79
+ self .lib .write (self .symbol , self .to_write_df )
80
+
81
+ def time_read_with_column_float (self , num_rows ):
82
+ COLS = ["float2" ]
83
+ self .lib .read (symbol = self .symbol , columns = COLS ).data
84
+
85
+ def peakmem_read_with_column_float (self , num_rows ):
86
+ COLS = ["float2" ]
87
+ self .lib .read (symbol = self .symbol , columns = COLS ).data
88
+
89
+ def time_read_with_columns_all_types (self , num_rows ):
90
+ COLS = ["float2" , "string_0" , "bool" , "int64" , "uint64" ]
91
+ self .lib .read (symbol = self .symbol , columns = COLS ).data
92
+
93
+ def peakmem_read_with_columns_all_types (self , num_rows ):
94
+ COLS = ["float2" , "string_0" , "bool" , "int64" , "uint64" ]
95
+ self .lib .read (symbol = self .symbol , columns = COLS ).data
96
+
97
+ def time_write_staged (self , num_rows ):
98
+ lib = self .lib
99
+ lib .write (self .symbol , self .to_write_df , staged = True )
100
+ lib ._nvs .compact_incomplete (self .symbol , False , False )
101
+
102
+ def peakmem_write_staged (self , num_rows ):
103
+ lib = self .lib
104
+ lib .write (self .symbol , self .to_write_df , staged = True )
105
+ lib ._nvs .compact_incomplete (self .symbol , False , False )
106
+
107
+ def time_read_with_date_ranges_last20_percent_rows (self , num_rows ):
108
+ self .lib .read (symbol = self .symbol , date_range = self .last_20 ).data
109
+
110
+ def peakmem_read_with_date_ranges_last20_percent_rows (self , num_rows ):
111
+ self .lib .read (symbol = self .symbol , date_range = self .last_20 ).data
112
+
113
+ def get_last_x_percent_date_range (self , num_rows , percents ):
114
+ df_generator = self .population_policy .df_generator
115
+ freq = df_generator .freq
116
+ return DataRangeUtils .get_last_x_percent_date_range (
117
+ initial_timestamp = df_generator .initial_timestamp ,
118
+ freq = freq ,
119
+ num_rows = num_rows ,
120
+ percents = percents
121
+ )
122
+
123
+
124
+ class AzureListVersions (AsvBase ):
125
+ """
126
+ This class is for testing list_versions performance on Azure Blob Storage
127
+ """
128
+ rounds = 1
129
+ number = 3
130
+ repeat = 1
131
+ min_run_count = 1
132
+ warmup_time = 0
133
+
134
+ timeout = 1200
135
+
136
+ library_manager = TestLibraryManager (storage = Storage .AZURE , name_benchmark = "LIST_VERSIONS" )
137
+ library_type = LibraryType .PERSISTENT
138
+
139
+ param_names = ["num_symbols" ]
140
+ params = [10_000 ] # 10k symbols
141
+
142
+ def get_library_manager (self ) -> TestLibraryManager :
143
+ return AzureListVersions .library_manager
144
+
145
+ def get_population_policy (self ) -> LibraryPopulationPolicy :
146
+ lpp = LibraryPopulationPolicy (self .get_logger ())
147
+ lpp .set_parameters ([1000 ] * AzureListVersions .params [0 ], [10 ]) # 1000 rows per symbol, 10 columns
148
+ return lpp
149
+
150
+ def setup_cache (self ):
151
+ self .setup_library ()
152
+ self .test_counter = 1
153
+
154
+ def setup (self , num_symbols ):
155
+ self .setup_library ()
156
+ self .lib = self .get_library_manager ().get_library (AzureListVersions .library_type , num_symbols )
157
+
158
+ # Generate and write test data
159
+ start = time .time ()
160
+ policy = self .get_population_policy ()
161
+ policy .set_parameters ([1000 ] * num_symbols , [10 ])
162
+ if not self .library_manager .has_library (AzureListVersions .library_type , num_symbols ):
163
+ populate_library (self .library_manager , policy , AzureListVersions .library_type , num_symbols )
164
+ self .get_logger ().info (f"Generated { num_symbols } symbols with 1000 rows each in { time .time () - start :.2f} s" )
165
+ else :
166
+ self .get_logger ().info ("Library already exists, population skipped" )
167
+
168
+ # Clear cache to ensure we're testing actual storage performance
169
+ self .lib ._nvs .version_store ._clear_symbol_list_keys ()
170
+
171
+ def time_list_versions (self , num_symbols ):
172
+ assert self .test_counter == 1 , "Test executed only once in setup-teardown cycle"
173
+ self .lib .list_versions ()
174
+ self .test_counter += 1
175
+
176
+ def peakmem_list_versions (self , num_symbols ):
177
+ assert self .test_counter == 1 , "Test executed only once in setup-teardown cycle"
178
+ self .lib .list_versions ()
179
+ self .test_counter += 1
0 commit comments