@@ -81,6 +81,12 @@ class ParallelBench:
8181 results: List of results from all runs.
8282 utilization_samples: List of (timestamp, utilization) tuples. Used to
8383 generate utilization over time plots.
84+ target_ratios: List of target ratios for each benchmark. This is used to
85+ calculate the probability of each benchmark being selected, and determined
86+ by the benchmark weights.
87+ first_run: Boolean indicating if this is the first run. We use this to
88+ determine if we can randomly select benchmarks or if we need to run all
89+ benchmarks at least once.
8490 """
8591
8692 def __init__ (
@@ -108,12 +114,15 @@ def __init__(
108114 self .runtimes : dict [str , list [BenchmarkMetrics ]] = {}
109115 self .workers : dict [int , worker .Worker ] = {}
110116 self .utilization_samples : list [tuple [pd .Timestamp , float ]] = []
117+ self .target_ratios : list [float ] = []
118+ self .first_run = True
111119
112120 def SetWeights (
113121 self ,
114122 benchmark_target : str ,
115123 benchmark_filter : list [str ] | None ,
116124 workload_filter : list [str ] | None ,
125+ scheduling_strategy : weights .SchedulingStrategy ,
117126 custom_benchmark_weights : list [str ] | None ,
118127 ) -> None :
119128 """Sets the benchmark weights."""
@@ -135,7 +144,7 @@ def SetWeights(
135144 )
136145 # Gets the number of workloads and num of benchmark for each workload
137146 self .benchmark_weights = weights .GetBenchmarkWeights (
138- self .benchmarks , custom_benchmark_weights
147+ self .benchmarks , scheduling_strategy , custom_benchmark_weights
139148 )
140149
141150 def _PreRun (
@@ -157,19 +166,23 @@ def _PreRun(
157166 for benchmark in self .benchmarks .values ():
158167 benchmark .AddCommandFlags (benchmark_flags )
159168
160- # Initialize the runtimes with a fake wall time of 1. This causes all
161- # benchmarks to be equally likely at first.
162- self .runtimes = {
163- benchmark : [
164- BenchmarkMetrics (
165- total_duration = 1.0 ,
166- per_iteration_wall_time = 0.0 ,
167- per_iteration_cpu_time = 0.0 ,
168- per_bm_run_iteration = 0 ,
169- )
170- ]
171- for benchmark in self .benchmarks .keys ()
172- }
169+ # Initialize the runtimes with a fake wall time. Based on empirically
170+ # observed runtimes, TCMalloc take 4x longer to run than others.
171+ for benchmark in self .benchmarks .keys ():
172+ total_duration = 4 if "TCMALLOC" in benchmark else 1
173+ self .runtimes [benchmark ] = [
174+ BenchmarkMetrics (
175+ total_duration = total_duration ,
176+ per_iteration_wall_time = 0.0 ,
177+ per_iteration_cpu_time = 0.0 ,
178+ per_bm_run_iteration = 0 ,
179+ )
180+ ]
181+
182+ self .target_ratios = [
183+ self .benchmark_weights [instance .BenchmarkName ()]
184+ for instance in self .benchmarks .values ()
185+ ]
173186
174187 # Create a worker thread for each CPU.
175188 self .workers = {
@@ -188,39 +201,109 @@ def _PreRun(
188201 logging .debug ("CPU activity: %s" , self .cpus )
189202 os .sched_setaffinity (os .getpid (), [self .controller_cpu ])
190203
204+ def _AdjustProbabilities (
205+ self , target_ratios : list [float ], current_runtime : list [float ]
206+ ) -> np .ndarray :
207+ """Calculates benchmark probabilities.
208+
209+ We want to run benchmarks based on their expected runtime so that the
210+ actual runtimes align with desired target ratios.
211+
212+ Probability_i = Current RT_0 * Target Ratio_i / (Target Ratio_0 * Current
213+ RT_i)
214+
215+ Args:
216+ target_ratios: List of target ratios.
217+ current_runtime: List of current runtime.
218+
219+ Returns:
220+ List of normalized probabilities.
221+ """
222+
223+ num_benchmarks = len (target_ratios )
224+ if num_benchmarks != len (current_runtime ):
225+ raise ValueError (
226+ "Target and current ratio lists must have the same length."
227+ )
228+
229+ # Set the first BM to be the reference with probability 1.0.
230+ probabilities = [1.0 ]
231+
232+ # Calculate probabilities for other benchmarks
233+ for j in range (1 , num_benchmarks ):
234+ probability = (current_runtime [0 ] * target_ratios [j ]) / (
235+ current_runtime [j ] * target_ratios [0 ]
236+ )
237+ probabilities .append (probability )
238+
239+ # Normalize the values
240+ return np .array (probabilities ) / np .sum (probabilities )
241+
242+ def _AdjustRuntime (self ) -> np .ndarray :
243+ """Adjusts the runtime of each benchmark to account for variance."""
244+ valid_data = False
245+ valid_index = - 1
246+ current_runtimes = np .empty (len (self .runtimes .keys ()))
247+ need_adjustment = []
248+
249+ for i , (times ) in enumerate (self .runtimes .values ()):
250+ # Skip the first run, which is a fake run.
251+ last_10_wall_times = np .array ([t .total_duration for t in times [1 :][- 10 :]])
252+
253+ if last_10_wall_times .size :
254+ current_runtimes [i ] = last_10_wall_times .mean ()
255+ valid_data = True
256+ valid_index = i
257+ else :
258+ current_runtimes [i ] = times [0 ].total_duration
259+ # We need to adjust the runtime as it's already a valid value.
260+ need_adjustment .append (i )
261+
262+ if valid_data :
263+ for index in need_adjustment :
264+ current_runtimes [index ] = (
265+ current_runtimes [index ] * current_runtimes [valid_index ]
266+ )
267+ return current_runtimes
268+
191269 def _ComputeBenchmarkWeights (self ) -> np .ndarray :
192270 """Probability is inversely based on expected runtime."""
193271
194- inverse_weights = np . empty ( len ( self .runtimes . keys ()) )
272+ current_runtimes = self ._AdjustRuntime ( )
195273
196- # Use the last 10 runtimes to estimate expected runtime.
197- for i , (benchmark_name , times ) in enumerate (self .runtimes .items ()):
198- last_10_wall_times = np .array ([t .total_duration for t in times [- 10 :]])
199- base_weight = 1 / last_10_wall_times .mean ()
274+ if not np .all (current_runtimes == 1.0 ):
275+ current_rt_ratios = current_runtimes / current_runtimes .sum ()
200276
201- # If we're using adaptive benchmark selection, adjust the weight based on
202- # the benchmark's performance relative to the fleet.
203- if self .benchmark_weights :
204- for keyword , weight in self .benchmark_weights .items ():
205- if keyword in benchmark_name .upper ():
206- base_weight *= weight
207- break
208- inverse_weights [i ] = base_weight
209- return inverse_weights / inverse_weights .sum ()
277+ probabilities = self ._AdjustProbabilities (
278+ self .target_ratios , current_rt_ratios
279+ )
280+ else :
281+ probabilities = np .array (self .target_ratios ) / np .sum (self .target_ratios )
282+
283+ return probabilities
210284
211285 def _SelectNextBenchmarks (self , count : int ) -> list [bm .Benchmark ]:
212286 """Randomly choose some benchmarks to run."""
213287
214288 if count <= 0 :
215289 return []
290+
291+ # We try to run all benchmarks at least once.
292+ benchmarks = []
293+ if self .first_run :
294+ if count > len (self .benchmarks ):
295+ benchmarks = list (self .benchmarks .values ())
296+ count = count - len (benchmarks )
297+ self .first_run = False
298+
216299 # Probabilities based on the expected runtime.
217300 probabilities = self ._ComputeBenchmarkWeights ()
218301 # self.runtimes is a dict of benchmark name -> list of runtimes.
219302 benchmark_names = list (self .runtimes .keys ())
220303 selected_names = np .random .choice (
221304 benchmark_names , p = probabilities , size = count , replace = True
222305 )
223- return [self .benchmarks [name ] for name in selected_names ]
306+ return benchmarks + [self .benchmarks [name ] for name in selected_names ]
224307
225308 def _RunSchedulingLoop (self ) -> None :
226309 """Check CPU utilization and pick the next job to schedule."""
0 commit comments