diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py index f7a7cfbf3..580487d21 100644 --- a/ads/aqua/common/utils.py +++ b/ads/aqua/common/utils.py @@ -1170,7 +1170,7 @@ def format_hf_custom_error_message(error: HfHubHTTPError): "Please check the revision identifier and try again.", service_payload={"error": "RevisionNotFoundError"}, ) - + raise AquaRuntimeError( reason=f"An error occurred while accessing `{url}` " "Please check your network connection and try again. " diff --git a/ads/aqua/shaperecommend/constants.py b/ads/aqua/shaperecommend/constants.py index 608b5b1ea..dec3b017f 100644 --- a/ads/aqua/shaperecommend/constants.py +++ b/ads/aqua/shaperecommend/constants.py @@ -13,6 +13,8 @@ MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet) + +EXCLUDED_MODELS contains a set of model identifiers that are known to be unsupported for shape recommendation, such as audio and speech models. """ LLAMA_REQUIRED_FIELDS = [ @@ -131,3 +133,7 @@ "ARM": "CPU", "UNKNOWN_ENUM_VALUE": "N/A", } +EXCLUDED_MODELS = { + "t5", "gemma", "bart", "bert", "roberta", "albert", + "whisper", "wav2vec", "speech", "audio" + } \ No newline at end of file diff --git a/ads/aqua/shaperecommend/llm_config.py b/ads/aqua/shaperecommend/llm_config.py index 9c5c00f13..b756b2874 100644 --- a/ads/aqua/shaperecommend/llm_config.py +++ b/ads/aqua/shaperecommend/llm_config.py @@ -3,7 +3,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import re -from typing import Optional +from typing import Optional, Any, Dict from pydantic import BaseModel, Field @@ -17,6 +17,7 @@ QUANT_MAPPING, QUANT_METHODS, RUNTIME_WEIGHTS, + EXCLUDED_MODELS ) from ads.common.utils import parse_bool @@ -24,7 +25,7 @@ class GeneralConfig(BaseModel): num_hidden_layers: int = Field( ..., - description="Number of transformer blocks (layers) in the model’s neural network stack.", + description="Number of transformer blocks (layers) in the model's neural network stack.", ) hidden_size: int = Field( ..., description="Embedding dimension or hidden size of each layer." @@ -46,6 +47,27 @@ class GeneralConfig(BaseModel): description="Parameter data type: 'float32', 'float16', etc.", ) + @staticmethod + def _get_required_int(raw: dict[str, Any], keys: list[str], field_name: str) -> int: + """ + Helper to safely extract a required integer field from multiple possible keys. + Raises AquaRecommendationError if the value is missing or None. + """ + for key in keys: + val = raw.get(key) + if val is not None: + try: + return int(val) + except (ValueError, TypeError): + pass # If value exists but isn't a number, keep looking or fail later + + # If we reach here, no valid key was found + raise AquaRecommendationError( + f"Could not determine '{field_name}' from the model configuration. " + f"Checked keys: {keys}. " + "This indicates the model architecture might not be supported or uses a non-standard config structure." + ) + @classmethod def get_weight_dtype(cls, raw: dict) -> str: # some configs use a different weight dtype at runtime @@ -173,21 +195,26 @@ class VisionConfig(GeneralConfig): @classmethod def from_raw_config(cls, vision_section: dict) -> "VisionConfig": weight_dtype = cls.get_weight_dtype(vision_section) - num_layers = ( - vision_section.get("num_layers") - or vision_section.get("vision_layers") - or vision_section.get("num_hidden_layers") - or vision_section.get("n_layer") + + num_layers = cls._get_required_int( + vision_section, + ["num_layers", "vision_layers", "num_hidden_layers", "n_layer"], + "num_hidden_layers" ) - hidden_size = vision_section.get("hidden_size") or vision_section.get( - "embed_dim" + hidden_size = cls._get_required_int( + vision_section, + ["hidden_size", "embed_dim"], + "hidden_size" ) - mlp_dim = vision_section.get("mlp_dim") or vision_section.get( - "intermediate_size" + mlp_dim = cls._get_required_int( + vision_section, + ["mlp_dim", "intermediate_size"], + "mlp_dim" ) + # Optional fields can use standard .get() num_attention_heads = ( vision_section.get("num_attention_heads") or vision_section.get("vision_num_attention_heads") @@ -202,10 +229,10 @@ def from_raw_config(cls, vision_section: dict) -> "VisionConfig": weight_dtype = str(cls.get_weight_dtype(vision_section)) return cls( - num_hidden_layers=int(num_layers), - hidden_size=int(hidden_size), - mlp_dim=int(mlp_dim), - patch_size=int(patch_size), + num_hidden_layers=num_layers, + hidden_size=hidden_size, + mlp_dim=mlp_dim, + patch_size=int(patch_size) if patch_size else 0, num_attention_heads=int(num_attention_heads) if num_attention_heads else None, @@ -311,18 +338,28 @@ def optimal_config(self): return configs @classmethod - def validate_model_support(cls, raw: dict) -> ValueError: + def validate_model_support(cls, raw: dict): """ Validates if model is decoder-only. Check for text-generation model occurs at DataScienceModel level. + Also explicitly checks for unsupported audio/speech models. """ - excluded_models = {"t5", "gemma", "bart", "bert", "roberta", "albert"} + # Known unsupported model architectures or types + excluded_models = EXCLUDED_MODELS + + model_type = raw.get("model_type", "").lower() + + if model_type in excluded_models: + raise AquaRecommendationError( + f"The model type '{model_type}' is not supported. " + "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). " + "Encoder-decoder models (ex. T5, Gemma), encoder-only (BERT), and audio models (Whisper) are not supported at this time." + ) + if ( raw.get("is_encoder_decoder", False) # exclude encoder-decoder models or ( raw.get("is_decoder") is False ) # exclude explicit encoder-only models (altho no text-generation task ones, just dbl check) - or raw.get("model_type", "").lower() # exclude by known model types - in excluded_models ): raise AquaRecommendationError( "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). " @@ -337,14 +374,33 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig": """ cls.validate_model_support(raw) - # Field mappings with fallback - num_hidden_layers = ( - raw.get("num_hidden_layers") or raw.get("n_layer") or raw.get("num_layers") + # Field mappings with fallback using safe extraction + num_hidden_layers = cls._get_required_int( + raw, + ["num_hidden_layers", "n_layer", "num_layers"], + "num_hidden_layers" ) - weight_dtype = cls.get_weight_dtype(raw) - hidden_size = raw.get("hidden_size") or raw.get("n_embd") or raw.get("d_model") - vocab_size = raw.get("vocab_size") + hidden_size = cls._get_required_int( + raw, + ["hidden_size", "n_embd", "d_model"], + "hidden_size" + ) + + num_attention_heads = cls._get_required_int( + raw, + ["num_attention_heads", "n_head", "num_heads"], + "num_attention_heads" + ) + + # Vocab size might be missing in some architectures, but usually required for memory calc + vocab_size = cls._get_required_int( + raw, + ["vocab_size"], + "vocab_size" + ) + + weight_dtype = cls.get_weight_dtype(raw) quantization = cls.detect_quantization_bits(raw) quantization_type = cls.detect_quantization_type(raw) @@ -355,15 +411,18 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig": raw.get("num_key_value_heads") # GQA models (ex. Llama-type) ) - num_attention_heads = ( - raw.get("num_attention_heads") or raw.get("n_head") or raw.get("num_heads") - ) - head_dim = raw.get("head_dim") or ( int(hidden_size) // int(num_attention_heads) if hidden_size and num_attention_heads else None ) + + # Ensure head_dim is not None if calculation failed + if head_dim is None: + raise AquaRecommendationError( + "Could not determine 'head_dim' and it could not be calculated from 'hidden_size' and 'num_attention_heads'." + ) + max_seq_len = ( raw.get("max_position_embeddings") or raw.get("n_positions") @@ -388,12 +447,12 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig": ) # trust-remote-code is always needed when this key is present return cls( - num_hidden_layers=int(num_hidden_layers), - hidden_size=int(hidden_size), - num_attention_heads=int(num_attention_heads), + num_hidden_layers=num_hidden_layers, + hidden_size=hidden_size, + num_attention_heads=num_attention_heads, num_key_value_heads=num_key_value_heads, head_dim=int(head_dim), - vocab_size=int(vocab_size), + vocab_size=vocab_size, weight_dtype=weight_dtype, quantization=quantization, quantization_type=quantization_type, @@ -511,4 +570,4 @@ def get_model_config(cls, raw: dict): # Neither found -- explicit failure raise AquaRecommendationError( "Config could not be parsed as either text, vision, or multimodal model. Check your fields/structure." - ) + ) \ No newline at end of file diff --git a/ads/aqua/shaperecommend/recommend.py b/ads/aqua/shaperecommend/recommend.py index d41c771a2..0e84f2395 100644 --- a/ads/aqua/shaperecommend/recommend.py +++ b/ads/aqua/shaperecommend/recommend.py @@ -8,7 +8,7 @@ from typing import Dict, List, Optional, Tuple, Union from huggingface_hub import hf_hub_download -from huggingface_hub.utils import HfHubHTTPError +from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError from pydantic import ValidationError from rich.table import Table @@ -182,17 +182,34 @@ def _get_model_config_and_name( return config, model_name def _fetch_hf_config(self, model_id: str) -> Dict: - """ - Downloads a model's config.json from Hugging Face Hub using the - huggingface_hub library. - """ - try: - config_path = hf_hub_download(repo_id=model_id, filename="config.json") - with open(config_path, encoding="utf-8") as f: - return json.load(f) - except HfHubHTTPError as e: - format_hf_custom_error_message(e) - + """ + Downloads a model's config.json from Hugging Face Hub. + """ + try: + config_path = hf_hub_download(repo_id=model_id, filename="config.json") + with open(config_path, encoding="utf-8") as f: + return json.load(f) + + except EntryNotFoundError as e: + # EXPLICIT HANDLING: This covers the GGUF case + logger.error(f"config.json not found for model '{model_id}': {e}") + raise AquaRecommendationError( + f"The configuration file 'config.json' was not found in the repository '{model_id}'. " + "This often happens with GGUF models (which are not supported) or invalid repositories. " + "Please ensure the model ID is correct and the repository contains a 'config.json'." + ) from e + + except HfHubHTTPError as e: + # For other errors (Auth, Network), use the shared formatter. + logger.error(f"HTTP error fetching config for '{model_id}': {e}") + format_hf_custom_error_message(e) + + except Exception as e: + logger.error(f"Unexpected error fetching config for '{model_id}': {e}") + raise AquaRecommendationError( + f"An unexpected error occurred while fetching the model configuration: {e}" + ) from e + def valid_compute_shapes( self, compartment_id: Optional[str] = None ) -> List["ComputeShapeSummary"]: diff --git a/pyproject.toml b/pyproject.toml index 824acc78f..9f4d62211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -141,7 +141,7 @@ opctl = [ "rich", "fire", "cachetools", - "huggingface_hub==0.26.2" + "huggingface_hub" ] optuna = ["optuna==2.9.0", "oracle_ads[viz]"] spark = ["pyspark>=3.0.0"] diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json new file mode 100644 index 000000000..c83b20832 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json @@ -0,0 +1,295 @@ +{ + "architectures": [ + "Llama_Nemotron_Nano_VL" + ], + "auto_map": { + "AutoConfig": "configuration.Llama_Nemotron_Nano_VL_Config", + "AutoModel": "modeling.Llama_Nemotron_Nano_VL", + "AutoModelForCausalLM": "modeling.Llama_Nemotron_Nano_VL" + }, + "max_sequence_length": 16384, + "downsample_ratio": 0.5, + "force_image_size": 512, + "patch_size": 16, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "model_type": "Llama_Nemotron_Nano_VL", + "ps_version": "v2", + "template": "llama_3p1", + "torch_dtype": "bfloat16", + "image_tag_type": "internvl", + + "vit_hidden_size": 1280, + "projector_hidden_size": 4096, + + "llm_config": { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "low_freq_factor": 1.0, + "high_freq_factor": 4.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.42.3", + "use_cache": true, + "vocab_size": 128512 + }, + + "vision_config": { + "auto_map": { + "AutoConfig": "nvidia/C-RADIOv2-H--hf_model.RADIOConfig", + "AutoModel": "nvidia/C-RADIOv2-H--hf_model.RADIOModel" + }, + "adaptor_configs": {}, + "adaptor_names": null, + "architectures": [ + "RADIOModel" + ], + "args": { + "aa": null, + "amp": true, + "amp_dtype": "bfloat16", + "amp_impl": "native", + "aug_repeats": 0, + "aug_splits": 0, + "bn_eps": null, + "bn_momentum": null, + "cache_dir": null, + "channels_last": false, + "checkpoint_hist": 10, + "chk_keep_forever": 100, + "class_map": "", + "clip_grad": null, + "clip_mode": "norm", + "cls_token_per_teacher": true, + "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json", + "coco_image_dir": "/datasets/coco2017-adlsa/val2017", + "color_jitter": 0.4, + "cooldown_epochs": 0, + "cpe_max_size": 2048, + "crd_loss": false, + "crd_loss_weight": 0.8, + "crop_pct": null, + "cutmix": 0.0, + "cutmix_minmax": null, + "dataset_download": false, + "debug_full_knn": false, + "decay_epochs": 90, + "decay_milestones": [ + 90, + 180, + 270 + ], + "decay_rate": 0.1, + "depchain": true, + "dist_bn": "reduce", + "dist_norm_weight": 0.0, + "distributed": true, + "drop": 0.0, + "drop_block": null, + "drop_connect": null, + "drop_path": null, + "dtype": "bfloat16", + "epoch_repeats": 0.0, + "eval": false, + "eval_metric": "knn_top1", + "eval_teacher": false, + "eval_teacher_only": false, + "eval_throughput": false, + "fast_norm": false, + "fd_loss_fn": "MSE", + "feature_normalization": "SHIP_NORM", + "feature_summarizer": "cls_token", + "feature_upscale_factor": null, + "force_new_wandb_id": false, + "force_spectral_reparam": true, + "freeze_bn": false, + "fsdp": false, + "fuser": "", + "gp": null, + "grad_accum_steps": 1, + "grad_checkpointing": false, + "head_init_bias": null, + "head_init_scale": null, + "head_warmup": 5, + "head_weight_decay": 0.001, + "hflip": 0.5, + "img_size": null, + "in_chans": null, + "initial_checkpoint": null, + "input_size": null, + "interpolation": "", + "layer_decay": null, + "local_rank": 0, + "log_interval": 50, + "log_mlflow": false, + "log_wandb": true, + "loss_auto_balance": false, + "lr_base": 0.1, + "lr_base_scale": "", + "lr_base_size": 256, + "lr_cycle_decay": 0.5, + "lr_cycle_limit": 1, + "lr_cycle_mul": 1.0, + "lr_k_decay": 1.0, + "lr_noise": null, + "lr_noise_pct": 0.67, + "lr_noise_std": 1.0, + "mean": null, + "mesa": false, + "min_lr": 0, + "mixup": 0.0, + "mixup_mode": "batch", + "mixup_off_epoch": 0, + "mixup_prob": 1.0, + "mixup_switch_prob": 0.5, + "mlp_hidden_size": 1520, + "mlp_num_inner": 3, + "mlp_version": "v2", + "model": "vit_huge_patch16_224", + "model_kwargs": {}, + "model_norm": false, + "momentum": 0.9, + "no_aug": false, + "no_ddp_bb": true, + "no_prefetcher": false, + "no_resume_opt": false, + "num_classes": null, + "opt_betas": null, + "opt_eps": null, + "patience_epochs": 10, + "pin_mem": false, + "prefetcher": true, + "pretrained": false, + "rank": 0, + "ratio": [ + 0.75, + 1.3333333333333333 + ], + "recount": 1, + "recovery_interval": 0, + "register_multiple": 8, + "remode": "pixel", + "reprob": 0.0, + "reset_loss_state": false, + "resplit": false, + "save_images": false, + "scale": [ + 0.5, + 1.0 + ], + "sched": "cosine", + "seed": 42, + "smoothing": 0.1, + "spectral_heads": false, + "spectral_reparam": false, + "split_bn": false, + "start_epoch": null, + "std": null, + "stream_teachers": true, + "sync_bn": false, + "synchronize_step": false, + "teachers": [ + { + "fd_normalize": false, + "feature_distillation": true, + "input_size": 378, + "model": "ViT-H-14-378-quickgelu", + "name": "clip", + "pretrained": "dfn5b", + "type": "open_clip", + "use_summary": true + }, + { + "fd_normalize": false, + "feature_distillation": true, + "input_size": 378, + "model": "ViT-SO400M-14-SigLIP-384", + "name": "siglip", + "pretrained": "webli", + "type": "open_clip", + "use_summary": true + }, + { + "fd_normalize": false, + "feature_distillation": true, + "input_size": 378, + "model": "dinov2_vitg14_reg", + "name": "dino_v2", + "type": "dino_v2", + "use_summary": true + }, + { + "fd_normalize": false, + "feature_distillation": true, + "input_size": 1024, + "model": "vit-h", + "name": "sam", + "type": "sam", + "use_summary": false + } + ], + "torchcompile": null, + "torchscript": false, + "train_interpolation": "random", + "train_split": "train", + "tta": 0, + "use_coco": false, + "use_multi_epochs_loader": false, + "val_ema_only": false, + "val_split": "val", + "vflip": 0.0, + "vitdet_version": 1, + "wandb_entity": "", + "wandb_job_type": "", + "wandb_name": "", + "wandb_project": "", + "warmup_lr": 1e-05, + "warmup_prefix": false, + "worker_seeding": "all", + "workers": 8, + "world_size": 256 + }, + "feature_normalizer_config": null, + "inter_feature_normalizer_config": null, + "max_resolution": 2048, + "patch_size": 16, + "preferred_resolution": [ + 768, + 768 + ], + "torch_dtype": "bfloat16", + "version": "radio_v2.5-h", + "vitdet_window_size": null + }, + "attn_implementation": "flash_attention_2" +} diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json new file mode 100644 index 000000000..14c6c8cf4 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json @@ -0,0 +1,50 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float16", + "transformers_version": "4.36.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/tests/unitary/with_extras/aqua/test_recommend.py b/tests/unitary/with_extras/aqua/test_recommend.py index 33b2503a8..198d8f165 100644 --- a/tests/unitary/with_extras/aqua/test_recommend.py +++ b/tests/unitary/with_extras/aqua/test_recommend.py @@ -208,6 +208,22 @@ def test_llm_config_from_raw_config_file( assert config.weight_dtype.lower() == expected_dtype assert config.head_dim == expected_head_dim assert config.quantization == expected_quant + + @pytest.mark.parametrize( + "config_file, error_match", + [ + # CASE 1: Whisper (Audio model) -> Should trigger "model type not supported" + ("config-json-files/whisper-large-v3.json", "model type.*not supported"), + + # CASE 2: Nemotron (VLM) -> Should trigger "Could not determine 'num_hidden_layers'" + ("config-json-files/nemotron-vl-8b.json", "Could not determine.*num_hidden_layers"), + ], + ) + def test_llm_config_unsupported_models(self, config_file, error_match): + raw = load_config(config_file) + # We expect a clean AquaRecommendationError, NOT a TypeError crash + with pytest.raises(AquaRecommendationError, match=error_match): + LLMConfig.from_raw_config(raw) def test_suggested_quantizations(self): c = LLMConfig( @@ -316,7 +332,7 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download): @pytest.mark.parametrize( "config, expected_recs, expected_troubleshoot", [ - ( # decoder-only model + ( # 1. Decoder-only model (Standard Case - Should Work) { "num_hidden_layers": 2, "hidden_size": 64, @@ -328,7 +344,7 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download): [], "", ), - ( # encoder-decoder model + ( # 2. Encoder-Decoder model (e.g., T5 - Known Unsupported) { "num_hidden_layers": 2, "hidden_size": 64, @@ -341,6 +357,29 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download): [], "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). Encoder-decoder models (ex. T5, Gemma) and encoder-only (BERT) are not supported at this time.", ), + ( # 3. Whisper (Audio Model) - Explicitly blocked by model_type + { + "model_type": "whisper", + "d_model": 1280, + "encoder_layers": 32, + "vocab_size": 51865 + }, + [], + # Matches the full error string from llm_config.py + "The model type 'whisper' is not supported. Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). Encoder-decoder models (ex. T5, Gemma), encoder-only (BERT), and audio models (Whisper) are not supported at this time.", + ), + ( # 4. Nemotron (VLM) - Fails because keys are nested in 'text_config' + { + "model_type": "llama-3.1-nemotron-nano-vl", + "vocab_size": 128256, + "text_config": { # Parser doesn't look here yet, so it fails finding layers at top level + "num_hidden_layers": 32 + } + }, + [], + # Matches the 'missing key' error from llm_config.py + "Could not determine 'num_hidden_layers' from the model configuration. Checked keys: ['num_hidden_layers', 'n_layer', 'num_layers']. This indicates the model architecture might not be supported or uses a non-standard config structure." + ), ], ) def test_which_shapes_valid( @@ -364,6 +403,7 @@ def test_which_shapes_valid( model_id="ocid1.datasciencemodel.oc1.TEST", generate_table=False ) result = app.which_shapes(request) + assert result == expected_result # If troubleshoot is populated (error case), _summarize_shapes_for_seq_lens should not have been called