Skip to content

Commit 51c9263

Browse files
authored
chore: allow to set tokenizer in the nemo-run script (#884)
This change allows to use the script for log-probs benchmarks. I tested it with `piqa` and got expected accuracy Signed-off-by: Marta Stepniewska-Dziubinska <martas@nvidia.com>
1 parent f1fa073 commit 51c9263

1 file changed

Lines changed: 17 additions & 0 deletions

File tree

scripts/evaluation_with_nemo_run.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,18 @@ def get_parser():
230230
default=8,
231231
help="Number of parallel requests to send to server. Default: use default for the task.",
232232
)
233+
parser.add_argument(
234+
"--tokenizer_path",
235+
type=str,
236+
default=None,
237+
help="Path to the tokenizer. Default: None",
238+
)
239+
parser.add_argument(
240+
"--tokenizer_backend",
241+
type=str,
242+
default="huggingface",
243+
help="Backend to use for the tokenizer. Default: huggingface",
244+
)
233245
parser.add_argument(
234246
"--request_timeout",
235247
type=int,
@@ -484,11 +496,16 @@ def main():
484496
model_id="megatron_model",
485497
)
486498
eval_target = run.Config(EvaluationTarget, api_endpoint=api_endpoint)
499+
extra = {}
500+
if args.tokenizer_path:
501+
extra["tokenizer"] = args.tokenizer_path
502+
extra["tokenizer_backend"] = args.tokenizer_backend
487503
eval_params = run.Config(
488504
ConfigParams,
489505
limit_samples=args.limit,
490506
parallelism=args.parallel_requests,
491507
request_timeout=args.request_timeout,
508+
extra=extra,
492509
)
493510
eval_config = run.Config(
494511
EvaluationConfig,

0 commit comments

Comments
 (0)