fix tokenizer

richardhuo-nv · yeswanthk-26 · commit ef5f32a0e6e5 · 2026-04-07T13:31:29.000-07:00
fix
diff --git a/src/srtctl/benchmarks/scripts/sa-bench/backend_request_func.py b/src/srtctl/benchmarks/scripts/sa-bench/backend_request_func.py
@@ -511,6 +511,47 @@ def get_model(pretrained_model_name_or_path: str) -> str:
     return pretrained_model_name_or_path
 
 
+def _load_glm_moe_dsa_tokenizer(pretrained_model_name_or_path: str) -> "PreTrainedTokenizerFast":
+    """Load GLM-Moe-Dsa / GLM-5 tokenizer directly from tokenizer.json.
+
+    Works around incompatibilities when the checkpoint was saved with
+    transformers 5.x (TokenizersBackend / list-style extra_special_tokens).
+    """
+    import json
+    from pathlib import Path
+
+    from tokenizers import Tokenizer as RustTokenizer
+    from transformers import PreTrainedTokenizerFast
+
+    _SAFE_CONFIG_KEYS = (
+        "pad_token", "pad_token_id", "eos_token", "eos_token_id",
+        "bos_token", "bos_token_id", "unk_token", "unk_token_id",
+        "model_max_length", "padding_side", "truncation_side",
+    )
+
+    path = Path(pretrained_model_name_or_path)
+    tokenizer_json = path / "tokenizer.json"
+    if not tokenizer_json.exists():
+        raise FileNotFoundError(
+            f"Expected tokenizer.json at {tokenizer_json}. "
+            "GlmMoeDsaTokenizer loads from tokenizer.json only."
+        )
+
+    rust_tok = RustTokenizer.from_file(str(tokenizer_json))
+    init_kwargs = {}
+    config_path = path / "tokenizer_config.json"
+    if config_path.exists():
+        with open(config_path, encoding="utf-8") as f:
+            config = json.load(f)
+        for key in _SAFE_CONFIG_KEYS:
+            if key in config:
+                init_kwargs[key] = config[key]
+        if "extra_special_tokens" in config:
+            init_kwargs["additional_special_tokens"] = config["extra_special_tokens"]
+
+    return PreTrainedTokenizerFast(tokenizer_object=rust_tok, **init_kwargs)
+
+
 def get_tokenizer(
     pretrained_model_name_or_path: str,
     tokenizer_mode: str = "auto",
@@ -535,13 +576,11 @@ def get_tokenizer(
             ) from e
         return MistralTokenizer.from_pretrained(str(pretrained_model_name_or_path))
     if custom_tokenizer:
-        from tensorrt_llm.llmapi.llm_args import TOKENIZER_ALIASES
-
-        tokenizer_path = TOKENIZER_ALIASES.get(custom_tokenizer,
-                                               custom_tokenizer)
+        if custom_tokenizer == "glm_moe_dsa":
+            return _load_glm_moe_dsa_tokenizer(pretrained_model_name_or_path)
         from importlib import import_module
         try:
-            module_path, class_name = tokenizer_path.rsplit('.', 1)
+            module_path, class_name = custom_tokenizer.rsplit('.', 1)
             module = import_module(module_path)
             tokenizer_class = getattr(module, class_name)
             return tokenizer_class.from_pretrained(
@@ -552,7 +591,7 @@ def get_tokenizer(
         except (ValueError, ImportError, AttributeError) as e:
             raise ValueError(
                 f"Failed to load custom_tokenizer '{custom_tokenizer}'. "
-                "Expected alias or 'module.path.ClassName'.") from e
+                "Expected 'glm_moe_dsa' or 'module.path.ClassName'.") from e
     else:
         return AutoTokenizer.from_pretrained(
             pretrained_model_name_or_path,
diff --git a/src/srtctl/cli/mixins/benchmark_stage.py b/src/srtctl/cli/mixins/benchmark_stage.py
@@ -167,7 +167,6 @@ def _run_benchmark_script(
             container_image=str(self.runtime.container_image),
             container_mounts=self.runtime.container_mounts,
             env_to_set=env_to_set,
-            mpi="pmix",
         )
 
         # Wait for benchmark to complete

Original file line number	Diff line number	Diff line change
`@@ -167,7 +167,6 @@ def _run_benchmark_script(`
`167`	`167`	`container_image=str(self.runtime.container_image),`
`168`	`168`	`container_mounts=self.runtime.container_mounts,`
`169`	`169`	`env_to_set=env_to_set,`
`170`		`- mpi="pmix",`
`171`	`170`	`)`
`172`	`171`
`173`	`172`	`# Wait for benchmark to complete`