-
Notifications
You must be signed in to change notification settings - Fork 295
Expand file tree
/
Copy pathbenchmark.py
More file actions
162 lines (141 loc) · 5.3 KB
/
benchmark.py
File metadata and controls
162 lines (141 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -----------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# -----------------------------------------------------------------------------
from argparse import ArgumentParser
from copy import deepcopy
from olive.cli.base import (
BaseOliveCLICommand,
add_input_model_options,
add_logging_options,
add_save_config_file_options,
add_shared_cache_options,
add_telemetry_options,
get_input_model_config,
update_shared_cache_options,
)
from olive.common.utils import set_nested_dict_value
from olive.telemetry import action
class BenchmarkCommand(BaseOliveCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
sub_parser = parser.add_parser("benchmark", help="Evaluate the model using lm-eval.")
# model options
add_input_model_options(
sub_parser, enable_hf=True, enable_hf_adapter=True, enable_pt=True, default_output_path="onnx-model"
)
# lm-eval options
lmeval_group = sub_parser.add_argument_group("lm-eval evaluator options")
lmeval_group.add_argument(
"--tasks",
type=str,
required=True,
nargs="*",
help="List of tasks to evaluate on.",
)
lmeval_group.add_argument(
"--device",
type=str,
default="cpu",
choices=["cpu", "gpu"],
help="Target device for evaluation.",
)
lmeval_group.add_argument(
"--batch_size",
type=int,
default=1,
help="Batch size.",
)
lmeval_group.add_argument(
"--max_length",
type=int,
default=1024,
help="Maximum length of input + output.",
)
lmeval_group.add_argument(
"--limit",
type=float,
default=1,
help="Number (or percentage of dataset) of samples to use for evaluation.",
)
lmeval_group.add_argument(
"--backend",
type=str,
default="auto",
choices=["auto", "ort", "ortgenai"],
help="Backend for ONNX model evaluation. Use 'auto' to infer backend from model type.",
)
lmeval_group.add_argument(
"--confirm_run_unsafe_code",
action="store_true",
default=False,
help="Allow running tasks that execute model-generated code (e.g., MBPP, HumanEval).",
)
add_logging_options(sub_parser)
add_save_config_file_options(sub_parser)
add_shared_cache_options(sub_parser)
add_telemetry_options(sub_parser)
sub_parser.set_defaults(func=BenchmarkCommand)
@action
def run(self):
return self._run_workflow()
def _get_run_config(self, tempdir: str) -> dict:
config = deepcopy(TEMPLATE)
input_model_config = get_input_model_config(self.args)
assert input_model_config["type"].lower() in {
"hfmodel",
"pytorchmodel",
"onnxmodel",
}, "Only HfModel, PyTorchModel and OnnxModel are supported in benchmark command."
if self.args.backend != "auto" and input_model_config["type"].lower() != "onnxmodel":
raise ValueError("--backend is only supported for ONNX input models.")
to_replace = [
("input_model", input_model_config),
("output_dir", self.args.output_path),
("log_severity_level", self.args.log_level),
(("systems", "local_system", "accelerators", 0, "device"), self.args.device),
(
("systems", "local_system", "accelerators", 0, "execution_providers"),
[("CUDAExecutionProvider" if self.args.device == "gpu" else "CPUExecutionProvider")],
),
(("evaluators", "evaluator", "tasks"), self.args.tasks),
(("evaluators", "evaluator", "device"), self.args.device),
(("evaluators", "evaluator", "batch_size"), self.args.batch_size),
(("evaluators", "evaluator", "max_length"), self.args.max_length),
(("evaluators", "evaluator", "limit"), self.args.limit),
(
("evaluators", "evaluator", "model_class"),
None if self.args.backend == "auto" else self.args.backend,
),
(
("evaluators", "evaluator", "confirm_run_unsafe_code"),
self.args.confirm_run_unsafe_code or None,
),
]
for keys, value in to_replace:
if value is not None:
set_nested_dict_value(config, keys, value)
update_shared_cache_options(config, self.args)
return config
TEMPLATE = {
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [{"device": "cpu", "execution_providers": ["CPUExecutionProvider"]}],
}
},
"evaluators": {
"evaluator": {
"type": "LMEvaluator",
"tasks": [],
"batch_size": 16,
"max_length": 1024,
"device": "cpu",
"limit": 64,
}
},
"evaluator": "evaluator",
"host": "local_system",
"target": "local_system",
"no_artifacts": True,
}