Skip to content

Commit ed0c54a

Browse files
committed
Add asv benchmarks for Azure Storage
Signed-off-by: Julien Jerphanion <[email protected]>
1 parent 4c596f2 commit ed0c54a

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""
2+
Copyright 2024 Man Group Operations Limited
3+
4+
Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
5+
6+
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
7+
"""
8+
9+
import time
10+
from arcticdb.util.utils import DFGenerator
11+
from arcticdb.util.environment_setup import TestLibraryManager, LibraryPopulationPolicy, LibraryType, Storage, populate_library
12+
from arcticdb.util.utils import DataRangeUtils
13+
from benchmarks.common import AsvBase
14+
15+
16+
class AzureReadWrite(AsvBase):
17+
"""
18+
This class is for general read write tests on Azure Blob Storage
19+
"""
20+
rounds = 1
21+
number = 3 # invokes 3 times the test runs between each setup-teardown
22+
repeat = 1 # defines the number of times the measurements will invoke setup-teardown
23+
min_run_count = 1
24+
warmup_time = 0
25+
26+
timeout = 1200
27+
28+
library_manager = TestLibraryManager(storage=Storage.AZURE, name_benchmark="READ_WRITE")
29+
library_type = LibraryType.PERSISTENT
30+
31+
param_names = ["num_rows"]
32+
params = [10_000_000] # 10M rows
33+
34+
number_columns = 100 # 100 columns
35+
36+
def get_library_manager(self) -> TestLibraryManager:
37+
return AzureReadWrite.library_manager
38+
39+
def get_population_policy(self) -> LibraryPopulationPolicy:
40+
lpp = LibraryPopulationPolicy(self.get_logger())
41+
lpp.set_parameters(AzureReadWrite.params, [AzureReadWrite.number_columns])
42+
return lpp
43+
44+
def setup_cache(self):
45+
self.setup_library()
46+
self.symbol = "test_symbol"
47+
self.to_write_df = None
48+
self.last_20 = None
49+
50+
def setup(self, num_rows):
51+
self.setup_library()
52+
self.lib = self.get_library_manager().get_library(AzureReadWrite.library_type, 1)
53+
54+
# Generate test data with mixed types including strings
55+
df_generator = DFGenerator(num_rows, [AzureReadWrite.number_columns])
56+
self.to_write_df = df_generator.generate_dataframe()
57+
58+
# Add some string columns
59+
string_cols = [f"string_{i}" for i in range(10)] # 10 string columns
60+
for col in string_cols:
61+
self.to_write_df[col] = [f"string_value_{i}" for i in range(num_rows)]
62+
63+
# Write the data
64+
self.lib.write(self.symbol, self.to_write_df)
65+
66+
# Calculate date range for last 20% of rows
67+
self.last_20 = self.get_last_x_percent_date_range(num_rows, 20)
68+
69+
def time_read(self, num_rows):
70+
self.lib.read(self.symbol)
71+
72+
def peakmem_read(self, num_rows):
73+
self.lib.read(self.symbol)
74+
75+
def time_write(self, num_rows):
76+
self.lib.write(self.symbol, self.to_write_df)
77+
78+
def peakmem_write(self, num_rows):
79+
self.lib.write(self.symbol, self.to_write_df)
80+
81+
def time_read_with_column_float(self, num_rows):
82+
COLS = ["float2"]
83+
self.lib.read(symbol=self.symbol, columns=COLS).data
84+
85+
def peakmem_read_with_column_float(self, num_rows):
86+
COLS = ["float2"]
87+
self.lib.read(symbol=self.symbol, columns=COLS).data
88+
89+
def time_read_with_columns_all_types(self, num_rows):
90+
COLS = ["float2", "string_0", "bool", "int64", "uint64"]
91+
self.lib.read(symbol=self.symbol, columns=COLS).data
92+
93+
def peakmem_read_with_columns_all_types(self, num_rows):
94+
COLS = ["float2", "string_0", "bool", "int64", "uint64"]
95+
self.lib.read(symbol=self.symbol, columns=COLS).data
96+
97+
def time_write_staged(self, num_rows):
98+
lib = self.lib
99+
lib.write(self.symbol, self.to_write_df, staged=True)
100+
lib._nvs.compact_incomplete(self.symbol, False, False)
101+
102+
def peakmem_write_staged(self, num_rows):
103+
lib = self.lib
104+
lib.write(self.symbol, self.to_write_df, staged=True)
105+
lib._nvs.compact_incomplete(self.symbol, False, False)
106+
107+
def time_read_with_date_ranges_last20_percent_rows(self, num_rows):
108+
self.lib.read(symbol=self.symbol, date_range=self.last_20).data
109+
110+
def peakmem_read_with_date_ranges_last20_percent_rows(self, num_rows):
111+
self.lib.read(symbol=self.symbol, date_range=self.last_20).data
112+
113+
def get_last_x_percent_date_range(self, num_rows, percents):
114+
df_generator = self.population_policy.df_generator
115+
freq = df_generator.freq
116+
return DataRangeUtils.get_last_x_percent_date_range(
117+
initial_timestamp=df_generator.initial_timestamp,
118+
freq=freq,
119+
num_rows=num_rows,
120+
percents=percents
121+
)
122+
123+
124+
class AzureListVersions(AsvBase):
125+
"""
126+
This class is for testing list_versions performance on Azure Blob Storage
127+
"""
128+
rounds = 1
129+
number = 3
130+
repeat = 1
131+
min_run_count = 1
132+
warmup_time = 0
133+
134+
timeout = 1200
135+
136+
library_manager = TestLibraryManager(storage=Storage.AZURE, name_benchmark="LIST_VERSIONS")
137+
library_type = LibraryType.PERSISTENT
138+
139+
param_names = ["num_symbols"]
140+
params = [10_000] # 10k symbols
141+
142+
def get_library_manager(self) -> TestLibraryManager:
143+
return AzureListVersions.library_manager
144+
145+
def get_population_policy(self) -> LibraryPopulationPolicy:
146+
lpp = LibraryPopulationPolicy(self.get_logger())
147+
lpp.set_parameters([1000] * AzureListVersions.params[0], [10]) # 1000 rows per symbol, 10 columns
148+
return lpp
149+
150+
def setup_cache(self):
151+
self.setup_library()
152+
self.test_counter = 1
153+
154+
def setup(self, num_symbols):
155+
self.setup_library()
156+
self.lib = self.get_library_manager().get_library(AzureListVersions.library_type, num_symbols)
157+
158+
# Generate and write test data
159+
start = time.time()
160+
policy = self.get_population_policy()
161+
policy.set_parameters([1000] * num_symbols, [10])
162+
if not self.library_manager.has_library(AzureListVersions.library_type, num_symbols):
163+
populate_library(self.library_manager, policy, AzureListVersions.library_type, num_symbols)
164+
self.get_logger().info(f"Generated {num_symbols} symbols with 1000 rows each in {time.time() - start:.2f}s")
165+
else:
166+
self.get_logger().info("Library already exists, population skipped")
167+
168+
# Clear cache to ensure we're testing actual storage performance
169+
self.lib._nvs.version_store._clear_symbol_list_keys()
170+
171+
def time_list_versions(self, num_symbols):
172+
assert self.test_counter == 1, "Test executed only once in setup-teardown cycle"
173+
self.lib.list_versions()
174+
self.test_counter += 1
175+
176+
def peakmem_list_versions(self, num_symbols):
177+
assert self.test_counter == 1, "Test executed only once in setup-teardown cycle"
178+
self.lib.list_versions()
179+
self.test_counter += 1

0 commit comments

Comments
 (0)