|
| 1 | +import ast |
| 2 | +from typing import Any |
| 3 | + |
| 4 | +from tests.model_registry.model_catalog.constants import HF_SOURCE_ID |
| 5 | +from tests.model_registry.model_catalog.utils import LOGGER |
| 6 | +from tests.model_registry.utils import execute_get_command |
| 7 | +from huggingface_hub import HfApi |
| 8 | + |
| 9 | + |
| 10 | +def get_huggingface_model_params(model_name: str, huggingface_api: HfApi) -> dict[str, Any]: |
| 11 | + """ |
| 12 | + Get some of the fields from HuggingFace API for validation against our model catalog data |
| 13 | + """ |
| 14 | + hf_model_info = huggingface_api.model_info(repo_id=model_name) |
| 15 | + fields_mapping = { |
| 16 | + "tags": "tags", |
| 17 | + "gated": "gated", |
| 18 | + "private": "private", |
| 19 | + "architectures": "config.architectures", |
| 20 | + "model_type": "config.model_type", |
| 21 | + } |
| 22 | + |
| 23 | + result = {} |
| 24 | + for key, path in fields_mapping.items(): |
| 25 | + value = get_huggingface_nested_attributes(obj=hf_model_info, attr_path=path) |
| 26 | + if key == "tags": |
| 27 | + value = list(filter(lambda field: not field.startswith("license:"), value)) |
| 28 | + # Convert gated to string if it's the gated field |
| 29 | + if key in ["gated", "private"] and value is not None: |
| 30 | + # model registry converts them to lower case. So before validation we need to do the same |
| 31 | + value = str(value).lower() |
| 32 | + result[key] = value |
| 33 | + return result |
| 34 | + |
| 35 | + |
| 36 | +def get_huggingface_nested_attributes(obj, attr_path) -> Any: |
| 37 | + """ |
| 38 | + Get nested attribute using dot notation like 'config.architectures' |
| 39 | + """ |
| 40 | + try: |
| 41 | + current_obj = obj |
| 42 | + for index, attr in enumerate(attr_path.split(".")): |
| 43 | + # Handle both object attributes and dictionary keys |
| 44 | + if isinstance(current_obj, dict): |
| 45 | + # For dictionaries, use key access |
| 46 | + if attr not in current_obj: |
| 47 | + return None |
| 48 | + current_obj = current_obj[attr] |
| 49 | + else: |
| 50 | + # For objects, use attribute access |
| 51 | + if not hasattr(current_obj, attr): |
| 52 | + return None |
| 53 | + current_obj = getattr(current_obj, attr) |
| 54 | + return current_obj |
| 55 | + except AttributeError as e: |
| 56 | + LOGGER.error(f"AttributeError getting '{attr_path}': {e}") |
| 57 | + return None |
| 58 | + except Exception as e: |
| 59 | + LOGGER.error(f"Unexpected error getting '{attr_path}': {e}") |
| 60 | + return None |
| 61 | + |
| 62 | + |
| 63 | +def assert_huggingface_values_matches_model_catalog_api_values( |
| 64 | + model_catalog_rest_url: list[str], |
| 65 | + model_registry_rest_headers: dict[str, str], |
| 66 | + expected_catalog_values: dict[str, str], |
| 67 | + huggingface_api: HfApi, |
| 68 | +) -> None: |
| 69 | + mismatch = {} |
| 70 | + LOGGER.info("Validating HuggingFace model metadata:") |
| 71 | + for model_name in expected_catalog_values: |
| 72 | + url = f"{model_catalog_rest_url[0]}sources/{HF_SOURCE_ID}/models/{model_name}" |
| 73 | + result = execute_get_command( |
| 74 | + url=url, |
| 75 | + headers=model_registry_rest_headers, |
| 76 | + ) |
| 77 | + assert result["name"] == model_name |
| 78 | + hf_api_values = get_huggingface_model_params(model_name=model_name, huggingface_api=huggingface_api) |
| 79 | + error = "" |
| 80 | + for field_name in ["gated", "private", "model_type"]: |
| 81 | + model_catalog_value = result["customProperties"][f"hf_{field_name}"]["string_value"] |
| 82 | + if model_catalog_value != str(hf_api_values[field_name]): |
| 83 | + error += ( |
| 84 | + f"HuggingFace api value for {field_name} is {hf_api_values[field_name]} and " |
| 85 | + f"value found from model catalog api call is {model_catalog_value}" |
| 86 | + ) |
| 87 | + for field_name in ["architectures", "tags"]: |
| 88 | + field_value = sorted(ast.literal_eval(result["customProperties"][f"hf_{field_name}"]["string_value"])) |
| 89 | + hf_api_value = sorted(hf_api_values[field_name]) |
| 90 | + if field_value != hf_api_value: |
| 91 | + error += f"HF api value for {field_name} {field_value} and found {hf_api_value}" |
| 92 | + if error: |
| 93 | + mismatch[model_name] = error |
| 94 | + |
| 95 | + if mismatch: |
| 96 | + LOGGER.error(f"mismatches are: {mismatch}") |
| 97 | + raise AssertionError("HF api call and model catalog hf models has value mismatch") |
0 commit comments