Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ads/aqua/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ def format_hf_custom_error_message(error: HfHubHTTPError):
"Please check the revision identifier and try again.",
service_payload={"error": "RevisionNotFoundError"},
)

raise AquaRuntimeError(
reason=f"An error occurred while accessing `{url}` "
"Please check your network connection and try again. "
Expand Down
6 changes: 6 additions & 0 deletions ads/aqua/shaperecommend/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
MOE_REQUIRED_FIELDS refer to fields necessary for Mixture of Experts (MoE) Architecture Models

NEXT_QUANT suggests the next quantization level based on the current quantization (if applied) or the model weights (if no quantization yet)

EXCLUDED_MODELS contains a set of model identifiers that are known to be unsupported for shape recommendation, such as audio and speech models.
"""

LLAMA_REQUIRED_FIELDS = [
Expand Down Expand Up @@ -131,3 +133,7 @@
"ARM": "CPU",
"UNKNOWN_ENUM_VALUE": "N/A",
}
EXCLUDED_MODELS = {
"t5", "gemma", "bart", "bert", "roberta", "albert",
"whisper", "wav2vec", "speech", "audio"
}
127 changes: 93 additions & 34 deletions ads/aqua/shaperecommend/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import re
from typing import Optional
from typing import Optional, Any, Dict

from pydantic import BaseModel, Field

Expand All @@ -17,14 +17,15 @@
QUANT_MAPPING,
QUANT_METHODS,
RUNTIME_WEIGHTS,
EXCLUDED_MODELS
)
from ads.common.utils import parse_bool


class GeneralConfig(BaseModel):
num_hidden_layers: int = Field(
...,
description="Number of transformer blocks (layers) in the models neural network stack.",
description="Number of transformer blocks (layers) in the model's neural network stack.",
)
hidden_size: int = Field(
..., description="Embedding dimension or hidden size of each layer."
Expand All @@ -46,6 +47,27 @@ class GeneralConfig(BaseModel):
description="Parameter data type: 'float32', 'float16', etc.",
)

@staticmethod
def _get_required_int(raw: dict[str, Any], keys: list[str], field_name: str) -> int:
"""
Helper to safely extract a required integer field from multiple possible keys.
Raises AquaRecommendationError if the value is missing or None.
"""
for key in keys:
val = raw.get(key)
if val is not None:
try:
return int(val)
except (ValueError, TypeError):
pass # If value exists but isn't a number, keep looking or fail later

# If we reach here, no valid key was found
raise AquaRecommendationError(
f"Could not determine '{field_name}' from the model configuration. "
f"Checked keys: {keys}. "
"This indicates the model architecture might not be supported or uses a non-standard config structure."
)

@classmethod
def get_weight_dtype(cls, raw: dict) -> str:
# some configs use a different weight dtype at runtime
Expand Down Expand Up @@ -173,21 +195,26 @@ class VisionConfig(GeneralConfig):
@classmethod
def from_raw_config(cls, vision_section: dict) -> "VisionConfig":
weight_dtype = cls.get_weight_dtype(vision_section)
num_layers = (
vision_section.get("num_layers")
or vision_section.get("vision_layers")
or vision_section.get("num_hidden_layers")
or vision_section.get("n_layer")

num_layers = cls._get_required_int(
vision_section,
["num_layers", "vision_layers", "num_hidden_layers", "n_layer"],
"num_hidden_layers"
)

hidden_size = vision_section.get("hidden_size") or vision_section.get(
"embed_dim"
hidden_size = cls._get_required_int(
vision_section,
["hidden_size", "embed_dim"],
"hidden_size"
)

mlp_dim = vision_section.get("mlp_dim") or vision_section.get(
"intermediate_size"
mlp_dim = cls._get_required_int(
vision_section,
["mlp_dim", "intermediate_size"],
"mlp_dim"
)

# Optional fields can use standard .get()
num_attention_heads = (
vision_section.get("num_attention_heads")
or vision_section.get("vision_num_attention_heads")
Expand All @@ -202,10 +229,10 @@ def from_raw_config(cls, vision_section: dict) -> "VisionConfig":
weight_dtype = str(cls.get_weight_dtype(vision_section))

return cls(
num_hidden_layers=int(num_layers),
hidden_size=int(hidden_size),
mlp_dim=int(mlp_dim),
patch_size=int(patch_size),
num_hidden_layers=num_layers,
hidden_size=hidden_size,
mlp_dim=mlp_dim,
patch_size=int(patch_size) if patch_size else 0,
num_attention_heads=int(num_attention_heads)
if num_attention_heads
else None,
Expand Down Expand Up @@ -311,18 +338,28 @@ def optimal_config(self):
return configs

@classmethod
def validate_model_support(cls, raw: dict) -> ValueError:
def validate_model_support(cls, raw: dict):
"""
Validates if model is decoder-only. Check for text-generation model occurs at DataScienceModel level.
Also explicitly checks for unsupported audio/speech models.
"""
excluded_models = {"t5", "gemma", "bart", "bert", "roberta", "albert"}
# Known unsupported model architectures or types
excluded_models = EXCLUDED_MODELS

model_type = raw.get("model_type", "").lower()

if model_type in excluded_models:
raise AquaRecommendationError(
f"The model type '{model_type}' is not supported. "
"Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). "
"Encoder-decoder models (ex. T5, Gemma), encoder-only (BERT), and audio models (Whisper) are not supported at this time."
)

if (
raw.get("is_encoder_decoder", False) # exclude encoder-decoder models
or (
raw.get("is_decoder") is False
) # exclude explicit encoder-only models (altho no text-generation task ones, just dbl check)
or raw.get("model_type", "").lower() # exclude by known model types
in excluded_models
):
raise AquaRecommendationError(
"Please provide a decoder-only text-generation model (ex. Llama, Falcon, etc). "
Expand All @@ -337,14 +374,33 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
"""
cls.validate_model_support(raw)

# Field mappings with fallback
num_hidden_layers = (
raw.get("num_hidden_layers") or raw.get("n_layer") or raw.get("num_layers")
# Field mappings with fallback using safe extraction
num_hidden_layers = cls._get_required_int(
raw,
["num_hidden_layers", "n_layer", "num_layers"],
"num_hidden_layers"
)
weight_dtype = cls.get_weight_dtype(raw)

hidden_size = raw.get("hidden_size") or raw.get("n_embd") or raw.get("d_model")
vocab_size = raw.get("vocab_size")
hidden_size = cls._get_required_int(
raw,
["hidden_size", "n_embd", "d_model"],
"hidden_size"
)

num_attention_heads = cls._get_required_int(
raw,
["num_attention_heads", "n_head", "num_heads"],
"num_attention_heads"
)

# Vocab size might be missing in some architectures, but usually required for memory calc
vocab_size = cls._get_required_int(
raw,
["vocab_size"],
"vocab_size"
)

weight_dtype = cls.get_weight_dtype(raw)
quantization = cls.detect_quantization_bits(raw)
quantization_type = cls.detect_quantization_type(raw)

Expand All @@ -355,15 +411,18 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
raw.get("num_key_value_heads") # GQA models (ex. Llama-type)
)

num_attention_heads = (
raw.get("num_attention_heads") or raw.get("n_head") or raw.get("num_heads")
)

head_dim = raw.get("head_dim") or (
int(hidden_size) // int(num_attention_heads)
if hidden_size and num_attention_heads
else None
)

# Ensure head_dim is not None if calculation failed
if head_dim is None:
raise AquaRecommendationError(
"Could not determine 'head_dim' and it could not be calculated from 'hidden_size' and 'num_attention_heads'."
)

max_seq_len = (
raw.get("max_position_embeddings")
or raw.get("n_positions")
Expand All @@ -388,12 +447,12 @@ def from_raw_config(cls, raw: dict) -> "LLMConfig":
) # trust-remote-code is always needed when this key is present

return cls(
num_hidden_layers=int(num_hidden_layers),
hidden_size=int(hidden_size),
num_attention_heads=int(num_attention_heads),
num_hidden_layers=num_hidden_layers,
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
num_key_value_heads=num_key_value_heads,
head_dim=int(head_dim),
vocab_size=int(vocab_size),
vocab_size=vocab_size,
weight_dtype=weight_dtype,
quantization=quantization,
quantization_type=quantization_type,
Expand Down Expand Up @@ -511,4 +570,4 @@ def get_model_config(cls, raw: dict):
# Neither found -- explicit failure
raise AquaRecommendationError(
"Config could not be parsed as either text, vision, or multimodal model. Check your fields/structure."
)
)
41 changes: 29 additions & 12 deletions ads/aqua/shaperecommend/recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Dict, List, Optional, Tuple, Union

from huggingface_hub import hf_hub_download
from huggingface_hub.utils import HfHubHTTPError
from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError
from pydantic import ValidationError
from rich.table import Table

Expand Down Expand Up @@ -182,17 +182,34 @@ def _get_model_config_and_name(
return config, model_name

def _fetch_hf_config(self, model_id: str) -> Dict:
"""
Downloads a model's config.json from Hugging Face Hub using the
huggingface_hub library.
"""
try:
config_path = hf_hub_download(repo_id=model_id, filename="config.json")
with open(config_path, encoding="utf-8") as f:
return json.load(f)
except HfHubHTTPError as e:
format_hf_custom_error_message(e)

"""
Downloads a model's config.json from Hugging Face Hub.
"""
try:
config_path = hf_hub_download(repo_id=model_id, filename="config.json")
with open(config_path, encoding="utf-8") as f:
return json.load(f)

except EntryNotFoundError as e:
# EXPLICIT HANDLING: This covers the GGUF case
logger.error(f"config.json not found for model '{model_id}': {e}")
raise AquaRecommendationError(
f"The configuration file 'config.json' was not found in the repository '{model_id}'. "
"This often happens with GGUF models (which are not supported) or invalid repositories. "
"Please ensure the model ID is correct and the repository contains a 'config.json'."
) from e

except HfHubHTTPError as e:
# For other errors (Auth, Network), use the shared formatter.
logger.error(f"HTTP error fetching config for '{model_id}': {e}")
format_hf_custom_error_message(e)

except Exception as e:
logger.error(f"Unexpected error fetching config for '{model_id}': {e}")
raise AquaRecommendationError(
f"An unexpected error occurred while fetching the model configuration: {e}"
) from e

def valid_compute_shapes(
self, compartment_id: Optional[str] = None
) -> List["ComputeShapeSummary"]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ opctl = [
"rich",
"fire",
"cachetools",
"huggingface_hub==0.26.2"
"huggingface_hub"
]
optuna = ["optuna==2.9.0", "oracle_ads[viz]"]
spark = ["pyspark>=3.0.0"]
Expand Down
Loading
Loading