Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions nemo_automodel/_transformers/model_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
import torch
from huggingface_hub import snapshot_download
from transformers import AutoConfig, PretrainedConfig

try:
from huggingface_hub.errors import StrictDataclassClassValidationError
except ImportError:
StrictDataclassClassValidationError = ValueError
from transformers.modeling_utils import PreTrainedModel

# For models that still accesses config.pad_token_id after v5 removes it in PretrainedConfig
Expand Down Expand Up @@ -205,7 +210,7 @@ def get_hf_config(pretrained_model_name_or_path, attn_implementation, **kwargs):
trust_remote_code=trust_remote_code,
attn_implementation=attn_implementation,
)
except ValueError as e:
except (ValueError, StrictDataclassClassValidationError) as e:
err = str(e)
if "does not recognize this architecture" in err:
raise ValueError(
Expand All @@ -220,7 +225,9 @@ def get_hf_config(pretrained_model_name_or_path, attn_implementation, **kwargs):
) from e
# Some upstream configs (e.g. stepfun-ai/Step-3.5-Flash) ship
# layer_types longer than num_hidden_layers, which newer transformers
# versions reject during config instantiation. Fix the raw dict and retry.
# versions reject during config instantiation. huggingface_hub wraps
# the validator's ValueError in StrictDataclassClassValidationError
# (not a ValueError subclass), so both exception types must be caught.
if "num_hidden_layers" in err and ("layer_types" in err or "layer types" in err):
hf_config = _load_config_with_layer_types_fix(
pretrained_model_name_or_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
from transformers import AutoTokenizer
from transformers.tokenization_utils_base import BatchEncoding

try:
from huggingface_hub.errors import StrictDataclassClassValidationError
except ImportError:
StrictDataclassClassValidationError = ValueError

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -326,7 +331,21 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, add_bos_token=Tru
add_bos_token: Whether to add BOS token (default: True)
add_eos_token: Whether to add EOS token (default: True)
"""
tokenizer = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
try:
tokenizer = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
except (ValueError, StrictDataclassClassValidationError) as e:
# AutoTokenizer.from_pretrained internally calls AutoConfig.from_pretrained,
# so configs whose layer_types length differs from num_hidden_layers (e.g.
# stepfun-ai/Step-3.5-Flash) trip validate_layer_type before the tokenizer
# is built. The tokenizer itself doesn't depend on layer_types, so relax
# the validator globally and retry.
err = str(e)
if "num_hidden_layers" not in err or ("layer_types" not in err and "layer types" not in err):
raise
from nemo_automodel._transformers.v4_patches.layer_types import relax_layer_types_validator

relax_layer_types_validator()
tokenizer = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)

# Convert TikToken-based tokenizers to fast (Rust-backed) tokenizers so that
# char_to_token() works natively for {% generation %} mask computation.
Expand Down
83 changes: 83 additions & 0 deletions nemo_automodel/_transformers/v4_patches/layer_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,91 @@ def install_layer_types_patch_hook() -> bool:
return True


_VALIDATOR_RELAXED: bool = False


def _noop_validate_layer_type(self): # noqa: ARG001
"""No-op replacement for ``PretrainedConfig.validate_layer_type``."""
return None


_noop_validate_layer_type.__name__ = "validate_layer_type"


def relax_layer_types_validator() -> bool:
"""Disable ``PretrainedConfig.validate_layer_type`` and its registered copies.

Some upstream configs (e.g. ``stepfun-ai/Step-3.5-Flash``) ship
``layer_types`` whose length differs from ``num_hidden_layers``; newer
transformers rejects such configs, and ``huggingface_hub`` wraps that
``ValueError`` in ``StrictDataclassClassValidationError``. Suppressing the
validator lets downstream code load the config unmodified.

The ``huggingface_hub`` ``@strict`` decorator freezes validator references
on each class into ``__class_validators__`` at decoration time, so swapping
``PretrainedConfig.validate_layer_type`` alone is insufficient: every
already-decorated subclass keeps its own list. This helper rewrites both
the class attribute and each list entry across the live subclass tree,
then newly-decorated subclasses pick up the no-op automatically via
``getattr(cls, "validate_layer_type")`` at their decoration time.

Idempotent and best-effort: failures (missing attribute, transformers not
installed) are logged and swallowed so the caller is never broken.

Returns:
``True`` if the patch was applied on this call, ``False`` otherwise.
"""
global _VALIDATOR_RELAXED
if _VALIDATOR_RELAXED:
return False

try:
from transformers import PretrainedConfig
except ImportError:
logger.debug("[v4_patches.layer_types] transformers not importable; skipping relax.")
return False
except Exception as exc:
logger.warning("[v4_patches.layer_types] transformers import failed: %s", exc)
return False

if not hasattr(PretrainedConfig, "validate_layer_type"):
logger.debug("[v4_patches.layer_types] validate_layer_type missing; nothing to relax.")
_VALIDATOR_RELAXED = True
return False

try:
PretrainedConfig.validate_layer_type = _noop_validate_layer_type
except Exception as exc:
logger.warning("[v4_patches.layer_types] failed to rebind validate_layer_type: %s", exc)
return False

def _relax_subtree(cls) -> None:
validators = cls.__dict__.get("__class_validators__")
if isinstance(validators, list):
for i, v in enumerate(validators):
if getattr(v, "__name__", None) == "validate_layer_type":
validators[i] = _noop_validate_layer_type
try:
subs = cls.__subclasses__()
except Exception:
return
for sub in subs:
_relax_subtree(sub)

try:
_relax_subtree(PretrainedConfig)
except Exception as exc:
logger.warning("[v4_patches.layer_types] subclass walk failed: %s", exc)
return False

_VALIDATOR_RELAXED = True
logger.info("[v4_patches.layer_types] relaxed validate_layer_type to no-op")
return True


__all__ = [
"DEFAULT_EXTRA_LAYER_TYPES",
"install_layer_types_patch_hook",
"patch_allowed_layer_types",
"relax_layer_types_validator",
]
25 changes: 22 additions & 3 deletions nemo_automodel/recipes/vlm/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,28 @@ def build_dataloader(
try:
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path, **processor_kwargs)
except Exception as e:
# Some models do not provide an AutoProcessor
processor = None
logging.warning(f"AutoProcessor not available for {pretrained_model_name_or_path} ({e}). ")
# AutoProcessor.from_pretrained internally loads AutoConfig. Configs
# whose layer_types length differs from num_hidden_layers trip
# validate_layer_type. The processor itself doesn't depend on
# layer_types, so relax the validator and retry once before giving up.
err = str(e)
if "num_hidden_layers" in err and ("layer_types" in err or "layer types" in err):
from nemo_automodel._transformers.v4_patches.layer_types import (
relax_layer_types_validator,
)

relax_layer_types_validator()
try:
processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path, **processor_kwargs)
except Exception as retry_exc:
processor = None
logging.warning(
f"AutoProcessor not available for {pretrained_model_name_or_path} ({retry_exc}). "
)
else:
# Some models do not provide an AutoProcessor
processor = None
logging.warning(f"AutoProcessor not available for {pretrained_model_name_or_path} ({e}). ")

chat_template_raw = cfg_ds.__dict__.pop("chat_template", None)
# Update chat_template if chat_template is given
Expand Down
45 changes: 42 additions & 3 deletions tests/unit_tests/_transformers/test_auto_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,47 @@ def test_cls_sep_pattern_preserved_when_tokens_present(self):
tok = NeMoAutoTokenizer.from_pretrained("dummy/model")
assert tok.special_tokens_pattern == "cls_sep"

def test_retry_on_layer_types_mismatch(self):
"""When AutoTokenizer fails because the underlying config has
layer_types longer than num_hidden_layers (e.g. stepfun-ai/Step-3.5-Flash),
the wrapper should relax the validator globally and retry."""
from huggingface_hub.errors import StrictDataclassClassValidationError

stub = _StubHFTokenizer()
calls = {"n": 0}

def fake_from_pretrained(pretrained_model_name_or_path, *args, **kwargs):
calls["n"] += 1
if calls["n"] == 1:
cause = ValueError("`num_hidden_layers` (45) must be equal to the number of layer types (48).")
raise StrictDataclassClassValidationError(validator="validate_layer_type", cause=cause)
return stub

with (
patch("transformers.AutoTokenizer.from_pretrained", side_effect=fake_from_pretrained),
patch(
"nemo_automodel._transformers.v4_patches.layer_types.relax_layer_types_validator",
return_value=True,
) as mock_relax,
):
tok = NeMoAutoTokenizer.from_pretrained("stepfun-ai/Step-3.5-Flash", trust_remote_code=True)
assert tok is not None
assert calls["n"] == 2
mock_relax.assert_called_once()

def test_unrelated_value_error_is_not_retried(self):
"""Unrelated ValueErrors should propagate without triggering the fix path."""
with (
patch(
"transformers.AutoTokenizer.from_pretrained",
side_effect=ValueError("totally unrelated tokenizer failure"),
),
patch("nemo_automodel._transformers.v4_patches.layer_types.relax_layer_types_validator") as mock_relax,
):
with pytest.raises(ValueError, match="totally unrelated"):
NeMoAutoTokenizer.from_pretrained("dummy/model")
mock_relax.assert_not_called()

def test_force_hf_passthrough(self):
stub = _StubHFTokenizer()
with patch("transformers.AutoTokenizer.from_pretrained", return_value=stub):
Expand Down Expand Up @@ -841,9 +882,7 @@ def test_converts_tiktoken_tokenizer(self):
mock_fast_tokenizer.is_fast = True

with (
patch(
"transformers.convert_slow_tokenizer.TikTokenConverter"
) as mock_converter_cls,
patch("transformers.convert_slow_tokenizer.TikTokenConverter") as mock_converter_cls,
patch(
"transformers.tokenization_utils_tokenizers.TokenizersBackend",
return_value=mock_fast_tokenizer,
Expand Down
19 changes: 19 additions & 0 deletions tests/unit_tests/_transformers/test_model_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,25 @@ def test_retry_on_layer_types_mismatch(self, mock_from_pretrained, _mock_trust,
call_kwargs = mock_fix.call_args[1]
assert call_kwargs["trust_remote_code"] is True

@patch("nemo_automodel._transformers.model_init._load_config_with_layer_types_fix")
@patch("nemo_automodel._transformers.model_init.resolve_trust_remote_code", return_value=True)
@patch("nemo_automodel._transformers.model_init.AutoConfig.from_pretrained")
def test_retry_on_strict_dataclass_validation_error(self, mock_from_pretrained, _mock_trust, mock_fix):
"""huggingface_hub wraps the validator ValueError in a non-ValueError error type."""
from huggingface_hub.errors import StrictDataclassClassValidationError

cause = ValueError("`num_hidden_layers` (45) must be equal to the number of layer types (48).")
mock_from_pretrained.side_effect = StrictDataclassClassValidationError(
validator="validate_layer_type", cause=cause
)
fixed_cfg = MagicMock()
mock_fix.return_value = fixed_cfg

result = get_hf_config("stepfun-ai/Step-3.5-Flash", "sdpa")

assert result is fixed_cfg
mock_fix.assert_called_once()

@patch("nemo_automodel._transformers.model_init._load_config_with_layer_types_fix")
@patch("nemo_automodel._transformers.model_init.resolve_trust_remote_code", return_value=False)
@patch("nemo_automodel._transformers.model_init.AutoConfig.from_pretrained")
Expand Down
59 changes: 59 additions & 0 deletions tests/unit_tests/_transformers/v4_patches/test_layer_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ def isolated_layer_types_state():
"""Reset module globals and meta-path / sys.modules mutations after each test."""
saved_patched = lt_mod._PATCHED
saved_hook_installed = lt_mod._HOOK_INSTALLED
saved_validator_relaxed = lt_mod._VALIDATOR_RELAXED
saved_meta_path = list(sys.meta_path)
saved_transformers = sys.modules.get("transformers")
saved_cu = sys.modules.get(lt_mod._TARGET_MODULE)

lt_mod._PATCHED = False
lt_mod._HOOK_INSTALLED = False
lt_mod._VALIDATOR_RELAXED = False
# The package installs a finder at import time; strip it so each test can
# reason about the finders it inserts in isolation. The original list is
# restored verbatim on teardown.
Expand All @@ -50,6 +52,7 @@ def isolated_layer_types_state():
finally:
lt_mod._PATCHED = saved_patched
lt_mod._HOOK_INSTALLED = saved_hook_installed
lt_mod._VALIDATOR_RELAXED = saved_validator_relaxed
sys.meta_path[:] = saved_meta_path
_restore(sys.modules, "transformers", saved_transformers)
_restore(sys.modules, lt_mod._TARGET_MODULE, saved_cu)
Expand Down Expand Up @@ -215,3 +218,59 @@ def find_spec(self, fullname, path=None, target=None):
assert lt_mod._PATCHED is True
for extra in lt_mod.DEFAULT_EXTRA_LAYER_TYPES:
assert extra in fake_cu.ALLOWED_LAYER_TYPES


def _install_fake_pretrained_config_tree():
"""Register a minimal transformers.PretrainedConfig + subclass with a fake validator."""

def _boom(self):
raise ValueError("`num_hidden_layers` must equal number of layer types")

_boom.__name__ = "validate_layer_type"

fake_pkg = types.ModuleType("transformers")
fake_pkg.__path__ = []

class _FakePretrainedConfig:
validate_layer_type = _boom
__class_validators__ = [_boom]

class _FakeChildConfig(_FakePretrainedConfig):
__class_validators__ = [_boom]

fake_pkg.PretrainedConfig = _FakePretrainedConfig
sys.modules["transformers"] = fake_pkg
return _FakePretrainedConfig, _FakeChildConfig, _boom


class TestRelaxLayerTypesValidator:
def test_replaces_validator_on_base_and_subclass(self, isolated_layer_types_state):
base, child, _original = _install_fake_pretrained_config_tree()

assert lt_mod.relax_layer_types_validator() is True

assert base.validate_layer_type is lt_mod._noop_validate_layer_type
assert base.__class_validators__[0] is lt_mod._noop_validate_layer_type
assert child.__class_validators__[0] is lt_mod._noop_validate_layer_type

instance = child.__new__(child)
for v in child.__class_validators__:
v(instance) # must not raise

def test_idempotent(self, isolated_layer_types_state):
_install_fake_pretrained_config_tree()

assert lt_mod.relax_layer_types_validator() is True
assert lt_mod.relax_layer_types_validator() is False

def test_missing_attribute_is_tolerated(self, isolated_layer_types_state):
fake_pkg = types.ModuleType("transformers")
fake_pkg.__path__ = []

class _Bare:
pass

fake_pkg.PretrainedConfig = _Bare
sys.modules["transformers"] = fake_pkg

assert lt_mod.relax_layer_types_validator() is False
Loading
Loading