Skip to content

Commit 06a8573

Browse files
Add constant generation setting
1 parent 0602e21 commit 06a8573

6 files changed

+104
-3
lines changed

loadgen/bindings/python_api.cc

+2
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
310310
&TestSettings::server_max_async_queries)
311311
.def_readwrite("server_num_issue_query_threads",
312312
&TestSettings::server_num_issue_query_threads)
313+
.def_readwrite("server_constant_gen",
314+
&TestSettings::server_constant_gen)
313315
.def_readwrite("offline_expected_qps",
314316
&TestSettings::offline_expected_qps)
315317
.def_readwrite("min_duration_ms", &TestSettings::min_duration_ms)

loadgen/demos/py_demo_constant_gen.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Copyright 2019 The MLPerf Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# =============================================================================
15+
16+
"""Python demo showing how to use the MLPerf Inference load generator bindings.
17+
"""
18+
19+
from __future__ import print_function
20+
21+
import threading
22+
import time
23+
24+
from absl import app
25+
import mlperf_loadgen
26+
27+
28+
def load_samples_to_ram(query_samples):
29+
del query_samples
30+
return
31+
32+
33+
def unload_samples_from_ram(query_samples):
34+
del query_samples
35+
return
36+
37+
38+
def process_query_async(query_samples):
39+
time.sleep(0.001)
40+
responses = []
41+
for s in query_samples:
42+
responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0))
43+
mlperf_loadgen.QuerySamplesComplete(responses)
44+
45+
46+
def issue_query(query_samples):
47+
threading.Thread(target=process_query_async, args=[query_samples]).start()
48+
49+
50+
def flush_queries():
51+
pass
52+
53+
54+
def main(argv):
55+
del argv
56+
settings = mlperf_loadgen.TestSettings()
57+
settings.scenario = mlperf_loadgen.TestScenario.Server
58+
settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
59+
settings.server_target_qps = 100
60+
settings.server_target_latency_ns = 100000000
61+
settings.min_query_count = 100
62+
settings.min_duration_ms = 10000
63+
settings.server_constant_gen = True
64+
65+
sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
66+
qsl = mlperf_loadgen.ConstructQSL(
67+
1024, 128, load_samples_to_ram, unload_samples_from_ram
68+
)
69+
mlperf_loadgen.StartTest(sut, qsl, settings)
70+
mlperf_loadgen.DestroyQSL(qsl)
71+
mlperf_loadgen.DestroySUT(sut)
72+
73+
74+
if __name__ == "__main__":
75+
app.run(main)

loadgen/loadgen.cc

+15-3
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,13 @@ auto ScheduleDistribution<TestScenario::Server>(double qps) {
207207
};
208208
}
209209

210+
auto ScheduleConstantDistribution(double qps){
211+
return [dist = std::uniform_real_distribution<>(1.0 / qps)](auto& gen) mutable {
212+
return std::chrono::duration_cast<std::chrono::nanoseconds>(
213+
std::chrono::duration<double>(dist(gen)));
214+
};
215+
}
216+
210217
/// \brief Selects samples for the accuracy mode.
211218
template <TestMode mode>
212219
auto SampleDistribution(size_t sample_count, size_t stride, std::mt19937* rng) {
@@ -310,8 +317,9 @@ std::vector<QueryMetadata> GenerateQueries(
310317
auto sample_distribution_equal_issue = SampleDistributionEqualIssue(
311318
min_queries, loaded_samples.size(), &sample_rng);
312319

313-
auto schedule_distribution =
314-
ScheduleDistribution<scenario>(settings.target_qps);
320+
TestScenario temp_scenario = scenario;
321+
auto schedule_distribution = ScheduleDistribution<scenario>(settings.target_qps);
322+
auto schedule_constant_distribution = ScheduleConstantDistribution(settings.target_qps);
315323

316324
// When sample_concatenate_permutation is turned on, pad to a multiple of the
317325
// complete dataset to ensure fairness.
@@ -397,7 +405,11 @@ std::vector<QueryMetadata> GenerateQueries(
397405
}
398406
queries.emplace_back(samples, timestamp, response_delegate, sequence_gen);
399407
prev_timestamp = timestamp;
400-
timestamp += schedule_distribution(schedule_rng);
408+
if (settings.server_constant_gen && (scenario == TestScenario::Server)){
409+
timestamp += schedule_constant_distribution(schedule_rng);
410+
} else {
411+
timestamp += schedule_distribution(schedule_rng);
412+
}
401413
// In equal_issue mode, the min_queries will be bumped up by a multiple of
402414
// the dataset size if the test time has not met the threshold.
403415
if (enable_equal_issue && (queries.size() >= min_queries) &&

loadgen/test_settings.h

+4
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ struct TestSettings {
169169
/// StartTest() will be used to call IssueQuery(). See also
170170
/// mlperf::RegisterIssueQueryThread().
171171
uint64_t server_num_issue_query_threads = 0;
172+
/// \brief If this flag is set to true, LoadGen the time between samples genera-
173+
/// ted by LoadGen in the server scenario is set to constant. Otherwise, the
174+
/// time between samples follows an exponential distribution
175+
bool server_constant_gen = false;
172176
/**@}*/
173177

174178
// ==================================

loadgen/test_settings_internal.cc

+7
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ TestSettingsInternal::TestSettingsInternal(
5353
use_token_latencies(requested.use_token_latencies),
5454
server_ttft_latency(requested.server_ttft_latency),
5555
server_tpot_latency(requested.server_tpot_latency),
56+
server_constant_gen(requested.server_constant_gen),
5657
infer_token_latencies(requested.infer_token_latencies),
5758
token_latency_scaling_factor(requested.token_latency_scaling_factor) {
5859
// Target QPS, target latency, and max_async_queries.
@@ -305,6 +306,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
305306
s.server_max_async_queries);
306307
MLPERF_LOG(detail, "requested_server_num_issue_query_threads",
307308
s.server_num_issue_query_threads);
309+
MLPERF_LOG(detail, "requested_server_constant_gen",
310+
s.server_constant_gen);
308311
break;
309312
case TestScenario::Offline:
310313
MLPERF_LOG(detail, "requested_offline_expected_qps",
@@ -452,6 +455,8 @@ void TestSettingsInternal::LogEffectiveSettings() const {
452455
s.performance_sample_count);
453456
MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
454457
s.sample_concatenate_permutation);
458+
MLPERF_LOG(detail, "effective_server_constant_gen",
459+
s.server_constant_gen);
455460
#else
456461
detail("");
457462
detail("Effective Settings:");
@@ -772,6 +777,8 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
772777
server_coalesce_queries = (val == 0) ? false : true;
773778
if (lookupkv(model, "Server", "max_async_queries", &val, nullptr))
774779
server_max_async_queries = int(val);
780+
if (lookupkv(model, "Server", "constant_gen", &val, nullptr))
781+
server_constant_gen = (val == 0) ? false : true;
775782

776783
lookupkv(model, scenario, "min_duration", &min_duration_ms, nullptr);
777784
lookupkv(model, scenario, "max_duration", &max_duration_ms, nullptr);

loadgen/test_settings_internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ struct TestSettingsInternal {
8585
bool use_token_latencies = false;
8686
int64_t server_ttft_latency;
8787
int64_t server_tpot_latency;
88+
bool server_constant_gen;
8889

8990
bool infer_token_latencies = false;
9091
int64_t token_latency_scaling_factor;

0 commit comments

Comments
 (0)