diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index f7a7cfbf3..580487d21 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -1170,7 +1170,7 @@ def format_hf_custom_error_message(error: HfHubHTTPError):
             "Please check the revision identifier and try again.",
             service_payload={"error": "RevisionNotFoundError"},
         )
-
+                
     raise AquaRuntimeError(
         reason=f"An error occurred while accessing `{url}` "
         "Please check your network connection and try again. "
diff --git a/ads/aqua/shaperecommend/constants.py b/ads/aqua/shaperecommend/constants.py
index 608b5b1ea..dec3b017f 100644
--- a/ads/aqua/shaperecommend/constants.py
+++ b/ads/aqua/shaperecommend/constants.py
@@ -13,6 +13,8 @@
 MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models
 
 NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)
+
+EXCLUDED_MODELS contains a set of model identifiers that are known to be unsupported for shape recommendation, such as audio and speech models.
 """
 
 LLAMA_REQUIRED_FIELDS = [
@@ -131,3 +133,7 @@
     "ARM": "CPU",
     "UNKNOWN_ENUM_VALUE": "N/A",
 }
+EXCLUDED_MODELS = {
+            "t5", "gemma", "bart", "bert", "roberta", "albert", 
+            "whisper", "wav2vec", "speech", "audio"
+        }
\ No newline at end of file
diff --git a/ads/aqua/shaperecommend/llm_config.py b/ads/aqua/shaperecommend/llm_config.py
index 9c5c00f13..b756b2874 100644
--- a/ads/aqua/shaperecommend/llm_config.py
+++ b/ads/aqua/shaperecommend/llm_config.py
@@ -3,7 +3,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import re
-from typing import Optional
+from typing import Optional, Any, Dict
 
 from pydantic import BaseModel, Field
 
@@ -17,6 +17,7 @@
     QUANT_MAPPING,
     QUANT_METHODS,
     RUNTIME_WEIGHTS,
+    EXCLUDED_MODELS
 )
 from ads.common.utils import parse_bool
 
@@ -24,7 +25,7 @@
 class GeneralConfig(BaseModel):
     num_hidden_layers: int = Field(
         ...,
-        description="Number of transformer blocks (layers) in the model’s neural network stack.",
+        description="Number of transformer blocks (layers) in the model's neural network stack.",
     )
     hidden_size: int = Field(
         ..., description="Embedding dimension or hidden size of each layer."
@@ -46,6 +47,27 @@ class GeneralConfig(BaseModel):
         description="Parameter data type: 'float32', 'float16', etc.",
     )
 
+    @staticmethod
+    def _get_required_int(raw: dict[str, Any], keys: list[str], field_name: str) -> int:
+        """
+        Helper to safely extract a required integer field from multiple possible keys.
+        Raises AquaRecommendationError if the value is missing or None.
+        """
+        for key in keys:
+            val = raw.get(key)
+            if val is not None:
+                try:
+                    return int(val)
+                except (ValueError, TypeError):
+                    pass  # If value exists but isn't a number, keep looking or fail later
+        
+        # If we reach here, no valid key was found
+        raise AquaRecommendationError(
+            f"Could not determine '{field_name}' from the model configuration. "
+            f"Checked keys: {keys}. "
+            "This indicates the model architecture might not be supported or uses a non-standard config structure."
+        )
+
     @classmethod
     def get_weight_dtype(cls, raw: dict) -> str:
         # some configs use a different weight dtype at runtime
@@ -173,21 +195,26 @@ class VisionConfig(GeneralConfig):
     @classmethod
     def from_raw_config(cls, vision_section: dict) -> "VisionConfig":
         weight_dtype = cls.get_weight_dtype(vision_section)
-        num_layers = (
-            vision_section.get("num_layers")
-            or vision_section.get("vision_layers")
-            or vision_section.get("num_hidden_layers")
-            or vision_section.get("n_layer")
+        
+        num_layers = cls._get_required_int(
+            vision_section, 
+            ["num_layers", "vision_layers", "num_hidden_layers", "n_layer"], 
+            "num_hidden_layers"
         )
 
-        hidden_size = vision_section.get("hidden_size") or vision_section.get(
-            "embed_dim"
+        hidden_size = cls._get_required_int(
+            vision_section,
+            ["hidden_size", "embed_dim"],
+            "hidden_size"
         )
 
-        mlp_dim = vision_section.get("mlp_dim") or vision_section.get(
-            "intermediate_size"
+        mlp_dim = cls._get_required_int(
+            vision_section,
+            ["mlp_dim", "intermediate_size"],
+            "mlp_dim"
         )
 
+        # Optional fields can use standard .get()
         num_attention_heads = (
             vision_section.get("num_attention_heads")
             or vision_section.get("vision_num_attention_heads")
@@ -202,10 +229,10 @@ def from_raw_config(cls, vision_section: dict) -> "VisionConfig":
         weight_dtype = str(cls.get_weight_dtype(vision_section))
 
         return cls(
-            num_hidden_layers=int(num_layers),
-            hidden_size=int(hidden_size),
-            mlp_dim=int(mlp_dim),
-            patch_size=int(patch_size),
+            num_hidden_layers=num_layers,
+            hidden_size=hidden_size,
+            mlp_dim=mlp_dim,
+            patch_size=int(patch_size) if patch_size else 0,
             num_attention_heads=int(num_attention_heads)
             if num_attention_heads
             else None,
@@ -311,18 +338,28 @@ def optimal_config(self):
         return configs
 
     @classmethod
-    def validate_model_support(cls, raw: dict) -> ValueError:
+    def validate_model_support(cls, raw: dict):
         """
         Validates if model is decoder-only. Check for text-generation model occurs at DataScienceModel level.
+        Also explicitly checks for unsupported audio/speech models.
         """
-        excluded_models = {"t5", "gemma", "bart", "bert", "roberta", "albert"}
+        # Known unsupported model architectures or types
+        excluded_models = EXCLUDED_MODELS
+        
+        model_type = raw.get("model_type", "").lower()
+        
+        if model_type in excluded_models:
+            raise AquaRecommendationError(
+                f"The model type '{model_type}' is not supported. "
+                "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). "
+                "Encoder-decoder models (ex. T5, Gemma), encoder-only (BERT), and audio models (Whisper) are not supported at this time."
+            )
+
         if (
             raw.get("is_encoder_decoder", False)  # exclude encoder-decoder models
             or (
                 raw.get("is_decoder") is False
             )  # exclude explicit encoder-only models (altho no text-generation task ones, just dbl check)
-            or raw.get("model_type", "").lower()  # exclude by known model types
-            in excluded_models
         ):
             raise AquaRecommendationError(
                 "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). "
@@ -337,14 +374,33 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
         """
         cls.validate_model_support(raw)
 
-        # Field mappings with fallback
-        num_hidden_layers = (
-            raw.get("num_hidden_layers") or raw.get("n_layer") or raw.get("num_layers")
+        # Field mappings with fallback using safe extraction
+        num_hidden_layers = cls._get_required_int(
+            raw, 
+            ["num_hidden_layers", "n_layer", "num_layers"], 
+            "num_hidden_layers"
         )
-        weight_dtype = cls.get_weight_dtype(raw)
 
-        hidden_size = raw.get("hidden_size") or raw.get("n_embd") or raw.get("d_model")
-        vocab_size = raw.get("vocab_size")
+        hidden_size = cls._get_required_int(
+            raw,
+            ["hidden_size", "n_embd", "d_model"],
+            "hidden_size"
+        )
+        
+        num_attention_heads = cls._get_required_int(
+            raw,
+            ["num_attention_heads", "n_head", "num_heads"],
+            "num_attention_heads"
+        )
+        
+        # Vocab size might be missing in some architectures, but usually required for memory calc
+        vocab_size = cls._get_required_int(
+            raw,
+            ["vocab_size"],
+            "vocab_size"
+        )
+
+        weight_dtype = cls.get_weight_dtype(raw)
         quantization = cls.detect_quantization_bits(raw)
         quantization_type = cls.detect_quantization_type(raw)
 
@@ -355,15 +411,18 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
             raw.get("num_key_value_heads")  # GQA models (ex. Llama-type)
         )
 
-        num_attention_heads = (
-            raw.get("num_attention_heads") or raw.get("n_head") or raw.get("num_heads")
-        )
-
         head_dim = raw.get("head_dim") or (
             int(hidden_size) // int(num_attention_heads)
             if hidden_size and num_attention_heads
             else None
         )
+        
+        # Ensure head_dim is not None if calculation failed
+        if head_dim is None:
+            raise AquaRecommendationError(
+                "Could not determine 'head_dim' and it could not be calculated from 'hidden_size' and 'num_attention_heads'."
+            )
+
         max_seq_len = (
             raw.get("max_position_embeddings")
             or raw.get("n_positions")
@@ -388,12 +447,12 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
         )  # trust-remote-code is always needed when this key is present
 
         return cls(
-            num_hidden_layers=int(num_hidden_layers),
-            hidden_size=int(hidden_size),
-            num_attention_heads=int(num_attention_heads),
+            num_hidden_layers=num_hidden_layers,
+            hidden_size=hidden_size,
+            num_attention_heads=num_attention_heads,
             num_key_value_heads=num_key_value_heads,
             head_dim=int(head_dim),
-            vocab_size=int(vocab_size),
+            vocab_size=vocab_size,
             weight_dtype=weight_dtype,
             quantization=quantization,
             quantization_type=quantization_type,
@@ -511,4 +570,4 @@ def get_model_config(cls, raw: dict):
         # Neither found -- explicit failure
         raise AquaRecommendationError(
             "Config could not be parsed as either text, vision, or multimodal model. Check your fields/structure."
-        )
+        )
\ No newline at end of file
diff --git a/ads/aqua/shaperecommend/recommend.py b/ads/aqua/shaperecommend/recommend.py
index d41c771a2..0e84f2395 100644
--- a/ads/aqua/shaperecommend/recommend.py
+++ b/ads/aqua/shaperecommend/recommend.py
@@ -8,7 +8,7 @@
 from typing import Dict, List, Optional, Tuple, Union
 
 from huggingface_hub import hf_hub_download
-from huggingface_hub.utils import HfHubHTTPError
+from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
 from pydantic import ValidationError
 from rich.table import Table
 
@@ -182,17 +182,34 @@ def _get_model_config_and_name(
         return config, model_name
 
     def _fetch_hf_config(self, model_id: str) -> Dict:
-        """
-        Downloads a model's config.json from Hugging Face Hub using the
-        huggingface_hub library.
-        """
-        try:
-            config_path = hf_hub_download(repo_id=model_id, filename="config.json")
-            with open(config_path, encoding="utf-8") as f:
-                return json.load(f)
-        except HfHubHTTPError as e:
-            format_hf_custom_error_message(e)
-
+            """
+            Downloads a model's config.json from Hugging Face Hub.
+            """
+            try:
+                config_path = hf_hub_download(repo_id=model_id, filename="config.json")
+                with open(config_path, encoding="utf-8") as f:
+                    return json.load(f)
+
+            except EntryNotFoundError as e:
+                # EXPLICIT HANDLING: This covers the GGUF case
+                logger.error(f"config.json not found for model '{model_id}': {e}")
+                raise AquaRecommendationError(
+                    f"The configuration file 'config.json' was not found in the repository '{model_id}'. "
+                    "This often happens with GGUF models (which are not supported) or invalid repositories. "
+                    "Please ensure the model ID is correct and the repository contains a 'config.json'."
+                ) from e
+
+            except HfHubHTTPError as e:
+                # For other errors (Auth, Network), use the shared formatter.
+                logger.error(f"HTTP error fetching config for '{model_id}': {e}")
+                format_hf_custom_error_message(e) 
+                
+            except Exception as e:
+                logger.error(f"Unexpected error fetching config for '{model_id}': {e}")
+                raise AquaRecommendationError(
+                    f"An unexpected error occurred while fetching the model configuration: {e}"
+                ) from e
+                
     def valid_compute_shapes(
         self, compartment_id: Optional[str] = None
     ) -> List["ComputeShapeSummary"]:
diff --git a/pyproject.toml b/pyproject.toml
index 824acc78f..9f4d62211 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -141,7 +141,7 @@ opctl = [
   "rich",
   "fire",
   "cachetools",
-  "huggingface_hub==0.26.2"
+  "huggingface_hub"
 ]
 optuna = ["optuna==2.9.0", "oracle_ads[viz]"]
 spark = ["pyspark>=3.0.0"]
diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json
new file mode 100644
index 000000000..c83b20832
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/nemotron-vl-8b.json
@@ -0,0 +1,295 @@
+{
+  "architectures": [
+    "Llama_Nemotron_Nano_VL"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration.Llama_Nemotron_Nano_VL_Config",
+    "AutoModel": "modeling.Llama_Nemotron_Nano_VL",
+    "AutoModelForCausalLM": "modeling.Llama_Nemotron_Nano_VL"
+  },
+  "max_sequence_length": 16384,
+  "downsample_ratio": 0.5,
+  "force_image_size": 512,
+  "patch_size": 16,
+  "eos_token_id": [
+    128001,
+    128008,
+    128009
+  ],
+  "model_type": "Llama_Nemotron_Nano_VL",
+  "ps_version": "v2",
+  "template": "llama_3p1",
+  "torch_dtype": "bfloat16",
+  "image_tag_type": "internvl",
+
+  "vit_hidden_size": 1280,
+  "projector_hidden_size": 4096,
+
+  "llm_config": {
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 128000,
+    "eos_token_id": [
+      128001,
+      128008,
+      128009
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "max_position_embeddings": 131072,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": {
+      "factor": 8.0,
+      "low_freq_factor": 1.0,
+      "high_freq_factor": 4.0,
+      "original_max_position_embeddings": 8192,
+      "rope_type": "llama3"
+    },
+    "rope_theta": 500000.0,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.42.3",
+    "use_cache": true,
+    "vocab_size": 128512
+  },
+
+  "vision_config": {
+    "auto_map": {
+      "AutoConfig": "nvidia/C-RADIOv2-H--hf_model.RADIOConfig",
+      "AutoModel": "nvidia/C-RADIOv2-H--hf_model.RADIOModel"
+    },
+    "adaptor_configs": {},
+    "adaptor_names": null,
+    "architectures": [
+      "RADIOModel"
+    ],
+    "args": {
+      "aa": null,
+      "amp": true,
+      "amp_dtype": "bfloat16",
+      "amp_impl": "native",
+      "aug_repeats": 0,
+      "aug_splits": 0,
+      "bn_eps": null,
+      "bn_momentum": null,
+      "cache_dir": null,
+      "channels_last": false,
+      "checkpoint_hist": 10,
+      "chk_keep_forever": 100,
+      "class_map": "",
+      "clip_grad": null,
+      "clip_mode": "norm",
+      "cls_token_per_teacher": true,
+      "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
+      "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
+      "color_jitter": 0.4,
+      "cooldown_epochs": 0,
+      "cpe_max_size": 2048,
+      "crd_loss": false,
+      "crd_loss_weight": 0.8,
+      "crop_pct": null,
+      "cutmix": 0.0,
+      "cutmix_minmax": null,
+      "dataset_download": false,
+      "debug_full_knn": false,
+      "decay_epochs": 90,
+      "decay_milestones": [
+        90,
+        180,
+        270
+      ],
+      "decay_rate": 0.1,
+      "depchain": true,
+      "dist_bn": "reduce",
+      "dist_norm_weight": 0.0,
+      "distributed": true,
+      "drop": 0.0,
+      "drop_block": null,
+      "drop_connect": null,
+      "drop_path": null,
+      "dtype": "bfloat16",
+      "epoch_repeats": 0.0,
+      "eval": false,
+      "eval_metric": "knn_top1",
+      "eval_teacher": false,
+      "eval_teacher_only": false,
+      "eval_throughput": false,
+      "fast_norm": false,
+      "fd_loss_fn": "MSE",
+      "feature_normalization": "SHIP_NORM",
+      "feature_summarizer": "cls_token",
+      "feature_upscale_factor": null,
+      "force_new_wandb_id": false,
+      "force_spectral_reparam": true,
+      "freeze_bn": false,
+      "fsdp": false,
+      "fuser": "",
+      "gp": null,
+      "grad_accum_steps": 1,
+      "grad_checkpointing": false,
+      "head_init_bias": null,
+      "head_init_scale": null,
+      "head_warmup": 5,
+      "head_weight_decay": 0.001,
+      "hflip": 0.5,
+      "img_size": null,
+      "in_chans": null,
+      "initial_checkpoint": null,
+      "input_size": null,
+      "interpolation": "",
+      "layer_decay": null,
+      "local_rank": 0,
+      "log_interval": 50,
+      "log_mlflow": false,
+      "log_wandb": true,
+      "loss_auto_balance": false,
+      "lr_base": 0.1,
+      "lr_base_scale": "",
+      "lr_base_size": 256,
+      "lr_cycle_decay": 0.5,
+      "lr_cycle_limit": 1,
+      "lr_cycle_mul": 1.0,
+      "lr_k_decay": 1.0,
+      "lr_noise": null,
+      "lr_noise_pct": 0.67,
+      "lr_noise_std": 1.0,
+      "mean": null,
+      "mesa": false,
+      "min_lr": 0,
+      "mixup": 0.0,
+      "mixup_mode": "batch",
+      "mixup_off_epoch": 0,
+      "mixup_prob": 1.0,
+      "mixup_switch_prob": 0.5,
+      "mlp_hidden_size": 1520,
+      "mlp_num_inner": 3,
+      "mlp_version": "v2",
+      "model": "vit_huge_patch16_224",
+      "model_kwargs": {},
+      "model_norm": false,
+      "momentum": 0.9,
+      "no_aug": false,
+      "no_ddp_bb": true,
+      "no_prefetcher": false,
+      "no_resume_opt": false,
+      "num_classes": null,
+      "opt_betas": null,
+      "opt_eps": null,
+      "patience_epochs": 10,
+      "pin_mem": false,
+      "prefetcher": true,
+      "pretrained": false,
+      "rank": 0,
+      "ratio": [
+        0.75,
+        1.3333333333333333
+      ],
+      "recount": 1,
+      "recovery_interval": 0,
+      "register_multiple": 8,
+      "remode": "pixel",
+      "reprob": 0.0,
+      "reset_loss_state": false,
+      "resplit": false,
+      "save_images": false,
+      "scale": [
+        0.5,
+        1.0
+      ],
+      "sched": "cosine",
+      "seed": 42,
+      "smoothing": 0.1,
+      "spectral_heads": false,
+      "spectral_reparam": false,
+      "split_bn": false,
+      "start_epoch": null,
+      "std": null,
+      "stream_teachers": true,
+      "sync_bn": false,
+      "synchronize_step": false,
+      "teachers": [
+        {
+          "fd_normalize": false,
+          "feature_distillation": true,
+          "input_size": 378,
+          "model": "ViT-H-14-378-quickgelu",
+          "name": "clip",
+          "pretrained": "dfn5b",
+          "type": "open_clip",
+          "use_summary": true
+        },
+        {
+          "fd_normalize": false,
+          "feature_distillation": true,
+          "input_size": 378,
+          "model": "ViT-SO400M-14-SigLIP-384",
+          "name": "siglip",
+          "pretrained": "webli",
+          "type": "open_clip",
+          "use_summary": true
+        },
+        {
+          "fd_normalize": false,
+          "feature_distillation": true,
+          "input_size": 378,
+          "model": "dinov2_vitg14_reg",
+          "name": "dino_v2",
+          "type": "dino_v2",
+          "use_summary": true
+        },
+        {
+          "fd_normalize": false,
+          "feature_distillation": true,
+          "input_size": 1024,
+          "model": "vit-h",
+          "name": "sam",
+          "type": "sam",
+          "use_summary": false
+        }
+      ],
+      "torchcompile": null,
+      "torchscript": false,
+      "train_interpolation": "random",
+      "train_split": "train",
+      "tta": 0,
+      "use_coco": false,
+      "use_multi_epochs_loader": false,
+      "val_ema_only": false,
+      "val_split": "val",
+      "vflip": 0.0,
+      "vitdet_version": 1,
+      "wandb_entity": "",
+      "wandb_job_type": "",
+      "wandb_name": "",
+      "wandb_project": "",
+      "warmup_lr": 1e-05,
+      "warmup_prefix": false,
+      "worker_seeding": "all",
+      "workers": 8,
+      "world_size": 256
+    },
+    "feature_normalizer_config": null,
+    "inter_feature_normalizer_config": null,
+    "max_resolution": 2048,
+    "patch_size": 16,
+    "preferred_resolution": [
+      768,
+      768
+    ],
+    "torch_dtype": "bfloat16",
+    "version": "radio_v2.5-h",
+    "vitdet_window_size": null
+  },
+  "attn_implementation": "flash_attention_2"
+}
diff --git a/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json
new file mode 100644
index 000000000..14c6c8cf4
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/recommend/config-json-files/whisper-large-v3.json
@@ -0,0 +1,50 @@
+{
+  "_name_or_path": "openai/whisper-large-v3",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1280,
+  "decoder_attention_heads": 20,
+  "decoder_ffn_dim": 5120,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 32,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 20,
+  "encoder_ffn_dim": 5120,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 32,
+  "eos_token_id": 50257,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 32,
+  "num_mel_bins": 128,
+  "pad_token_id": 50256,
+  "scale_embedding": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0.dev0",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51866
+}
diff --git a/tests/unitary/with_extras/aqua/test_recommend.py b/tests/unitary/with_extras/aqua/test_recommend.py
index 33b2503a8..198d8f165 100644
--- a/tests/unitary/with_extras/aqua/test_recommend.py
+++ b/tests/unitary/with_extras/aqua/test_recommend.py
@@ -208,6 +208,22 @@ def test_llm_config_from_raw_config_file(
         assert config.weight_dtype.lower() == expected_dtype
         assert config.head_dim == expected_head_dim
         assert config.quantization == expected_quant
+        
+    @pytest.mark.parametrize(
+        "config_file, error_match",
+        [
+            # CASE 1: Whisper (Audio model) -> Should trigger "model type not supported"
+            ("config-json-files/whisper-large-v3.json", "model type.*not supported"),
+            
+            # CASE 2: Nemotron (VLM) -> Should trigger "Could not determine 'num_hidden_layers'"
+            ("config-json-files/nemotron-vl-8b.json", "Could not determine.*num_hidden_layers"),
+        ],
+    )
+    def test_llm_config_unsupported_models(self, config_file, error_match):
+        raw = load_config(config_file)
+        # We expect a clean AquaRecommendationError, NOT a TypeError crash
+        with pytest.raises(AquaRecommendationError, match=error_match):
+            LLMConfig.from_raw_config(raw)
 
     def test_suggested_quantizations(self):
         c = LLMConfig(
@@ -316,7 +332,7 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download):
     @pytest.mark.parametrize(
         "config, expected_recs, expected_troubleshoot",
         [
-            (  # decoder-only model
+            (  # 1. Decoder-only model (Standard Case - Should Work)
                 {
                     "num_hidden_layers": 2,
                     "hidden_size": 64,
@@ -328,7 +344,7 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download):
                 [],
                 "",
             ),
-            (  # encoder-decoder model
+            (  # 2. Encoder-Decoder model (e.g., T5 - Known Unsupported)
                 {
                     "num_hidden_layers": 2,
                     "hidden_size": 64,
@@ -341,6 +357,29 @@ def test_fetch_hf_config_http_error(self, mock_format_error, mock_download):
                 [],
                 "Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). Encoder-decoder models (ex. T5, Gemma) and encoder-only (BERT) are not supported at this time.",
             ),
+            (  # 3. Whisper (Audio Model) - Explicitly blocked by model_type
+                {
+                    "model_type": "whisper",
+                    "d_model": 1280,
+                    "encoder_layers": 32, 
+                    "vocab_size": 51865
+                },
+                [], 
+                # Matches the full error string from llm_config.py
+                "The model type 'whisper' is not supported. Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). Encoder-decoder models (ex. T5, Gemma), encoder-only (BERT), and audio models (Whisper) are not supported at this time.", 
+            ),
+            (  # 4. Nemotron (VLM) - Fails because keys are nested in 'text_config'
+                {
+                    "model_type": "llama-3.1-nemotron-nano-vl",
+                    "vocab_size": 128256,
+                    "text_config": { # Parser doesn't look here yet, so it fails finding layers at top level
+                        "num_hidden_layers": 32 
+                    }
+                },
+                [],
+                # Matches the 'missing key' error from llm_config.py
+                "Could not determine 'num_hidden_layers' from the model configuration. Checked keys: ['num_hidden_layers', 'n_layer', 'num_layers']. This indicates the model architecture might not be supported or uses a non-standard config structure."
+            ),
         ],
     )
     def test_which_shapes_valid(
@@ -364,6 +403,7 @@ def test_which_shapes_valid(
             model_id="ocid1.datasciencemodel.oc1.TEST", generate_table=False
         )
         result = app.which_shapes(request)
+        
         assert result == expected_result
 
         # If troubleshoot is populated (error case), _summarize_shapes_for_seq_lens should not have been called