Skip to content

Commit 7ebdb05

Browse files
authored
feat: Add tests for duplicate models in multiple HF sources (#1221)
* feat: Add tests for dupplicate models in multiple HF sources Signed-off-by: Debarati Basu-Nag <dbasunag@redhat.com> * fix: address review comments Signed-off-by: Debarati Basu-Nag <dbasunag@redhat.com> * fix: address review comments Signed-off-by: Debarati Basu-Nag <dbasunag@redhat.com> --------- Signed-off-by: Debarati Basu-Nag <dbasunag@redhat.com>
1 parent 4e3cb17 commit 7ebdb05

3 files changed

Lines changed: 156 additions & 1 deletion

File tree

tests/model_registry/model_catalog/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@
7272
"ibm-granite/granite-4.0-h-micro",
7373
],
7474
"custom": [HF_CUSTOM_MODE],
75+
"overlapping_mixed": [
76+
# Shared with "mixed" - tests that same model across sources is not silently dropped
77+
"ibm-granite/granite-4.0-h-1b",
78+
# Unique to this source
79+
"ibm-granite/granite-4.0-h-small",
80+
],
7581
}
7682
EXPECTED_HF_CATALOG_VALUES: list[dict[str, str]] = [{"id": HF_SOURCE_ID, "model_name": HF_MODELS["mixed"][0]}]
7783
EXPECTED_MULTIPLE_HF_CATALOG_VALUES: list[dict[str, str]] = [
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
from typing import Self
2+
3+
import pytest
4+
from ocp_resources.config_map import ConfigMap
5+
from simple_logger.logger import get_logger
6+
7+
from tests.model_registry.model_catalog.utils import get_hf_catalog_str, get_models_from_catalog_api
8+
from tests.model_registry.utils import execute_get_command
9+
10+
LOGGER = get_logger(name=__name__)
11+
12+
pytestmark = [
13+
pytest.mark.skip_on_disconnected,
14+
pytest.mark.usefixtures("updated_dsc_component_state_scope_session", "model_registry_namespace"),
15+
]
16+
17+
# Source IDs generated by get_hf_catalog_str: "huggingface_{id}"
18+
MIXED_SOURCE_ID = "huggingface_mixed"
19+
OVERLAPPING_SOURCE_ID = "huggingface_overlapping_mixed"
20+
# Model shared across both sources - the core scenario for silent drop bug
21+
SHARED_MODEL = "ibm-granite/granite-4.0-h-1b"
22+
23+
24+
@pytest.mark.parametrize(
25+
"updated_catalog_config_map",
26+
[
27+
pytest.param(
28+
{"sources_yaml": get_hf_catalog_str(ids=["mixed", "overlapping_mixed"])},
29+
id="test_shared_models_across_hf_sources",
30+
marks=pytest.mark.install,
31+
),
32+
],
33+
indirect=["updated_catalog_config_map"],
34+
)
35+
@pytest.mark.usefixtures("updated_catalog_config_map")
36+
class TestHuggingFaceModelsMultipleSources:
37+
"""
38+
Verifies that identical models across multiple HuggingFace sources are not silently dropped.
39+
"""
40+
41+
def test_source_status_duplicate_models(
42+
self: Self,
43+
updated_catalog_config_map: ConfigMap,
44+
model_catalog_rest_url: list[str],
45+
model_registry_rest_headers: dict[str, str],
46+
):
47+
"""Verify both HF sources report 'available' status after catalog sync."""
48+
response = execute_get_command(
49+
url=f"{model_catalog_rest_url[0]}sources",
50+
headers=model_registry_rest_headers,
51+
)
52+
sources = response.get("items", [])
53+
expected_source_ids = {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}
54+
found_source_ids = set()
55+
for source in sources:
56+
if source["id"] in expected_source_ids:
57+
found_source_ids.add(source["id"])
58+
assert source["status"] == "available", (
59+
f"Source '{source['id']}' has status '{source['status']}', expected 'available'. "
60+
f"Error: {source.get('error', 'N/A')}"
61+
)
62+
missing_sources = expected_source_ids - found_source_ids
63+
assert not missing_sources, (
64+
f"Expected sources {missing_sources} not found in response. "
65+
f"Available source IDs: {[s['id'] for s in sources]}"
66+
)
67+
68+
def test_shared_model_present_in_both_sources(
69+
self: Self,
70+
updated_catalog_config_map: ConfigMap,
71+
model_catalog_rest_url: list[str],
72+
model_registry_rest_headers: dict[str, str],
73+
):
74+
"""Verify that a model included in two HF sources appears in both, not silently dropped from one."""
75+
for source_id, source_label in [
76+
(MIXED_SOURCE_ID, "HuggingFace Source mixed"),
77+
(OVERLAPPING_SOURCE_ID, "HuggingFace Source overlapping_mixed"),
78+
]:
79+
LOGGER.info(f"Checking source '{source_id}' for shared model '{SHARED_MODEL}'")
80+
response = get_models_from_catalog_api(
81+
model_catalog_rest_url=model_catalog_rest_url,
82+
model_registry_rest_headers=model_registry_rest_headers,
83+
source_label=source_label,
84+
page_size=1000,
85+
)
86+
model_names = [model["name"] for model in response.get("items", [])]
87+
assert SHARED_MODEL in model_names, (
88+
f"Shared model '{SHARED_MODEL}' not found in source '{source_id}'. "
89+
f"Models found: {model_names}. This indicates the model was silently dropped."
90+
)
91+
92+
def test_shared_model_retrievable_per_source(
93+
self: Self,
94+
updated_catalog_config_map: ConfigMap,
95+
model_catalog_rest_url: list[str],
96+
model_registry_rest_headers: dict[str, str],
97+
):
98+
"""Verify the shared model can be fetched individually from each source."""
99+
for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
100+
LOGGER.info(f"Fetching model '{SHARED_MODEL}' from source '{source_id}'")
101+
url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
102+
result = execute_get_command(url=url, headers=model_registry_rest_headers)
103+
assert result["name"] == SHARED_MODEL, (
104+
f"Expected model name '{SHARED_MODEL}', got '{result['name']}' from source '{source_id}'"
105+
)
106+
107+
def test_external_id_has_no_namespace_prefix(
108+
self: Self,
109+
updated_catalog_config_map: ConfigMap,
110+
model_catalog_rest_url: list[str],
111+
model_registry_rest_headers: dict[str, str],
112+
):
113+
"""Verify the API response does not leak internal sourceId: prefix in externalId."""
114+
for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
115+
url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
116+
result = execute_get_command(url=url, headers=model_registry_rest_headers)
117+
external_id = result.get("externalId", "")
118+
assert not external_id.startswith(f"{source_id}:"), (
119+
f"externalId '{external_id}' leaks internal namespace prefix '{source_id}:'. "
120+
f"The API should strip the source prefix for backward compatibility."
121+
)
122+
123+
@pytest.mark.parametrize(
124+
"filter_field",
125+
[
126+
pytest.param("name", id="filter_by_name", marks=pytest.mark.xfail(reason="RHOAIENG-53498")),
127+
pytest.param("externalId", id="filter_by_external_id"),
128+
],
129+
)
130+
def test_filter_returns_model_from_all_sources(
131+
self: Self,
132+
updated_catalog_config_map: ConfigMap,
133+
model_catalog_rest_url: list[str],
134+
model_registry_rest_headers: dict[str, str],
135+
filter_field: str,
136+
):
137+
"""Verify filtering by model name or externalId returns the model from all sources."""
138+
response = get_models_from_catalog_api(
139+
model_catalog_rest_url=model_catalog_rest_url,
140+
model_registry_rest_headers=model_registry_rest_headers,
141+
additional_params=f"&filterQuery={filter_field}='{SHARED_MODEL}'",
142+
page_size=1000,
143+
)
144+
matching_items = response.get("items", [])
145+
source_ids = {item["source_id"] for item in matching_items}
146+
assert {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}.issubset(source_ids), (
147+
f"Expected model '{SHARED_MODEL}' from both sources {MIXED_SOURCE_ID} and {OVERLAPPING_SOURCE_ID}, "
148+
f"but found it only in sources: {source_ids}"
149+
)

tests/model_registry/model_catalog/huggingface/test_huggingface_source_error_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"model_registry_namespace",
5050
)
5151
class TestHuggingFaceSourceErrorValidation:
52-
"""Partial model fetching errors should not affect other models."""
52+
"""Test cases for Partial model fetching errors should not affect other models."""
5353

5454
def test_source_state_and_message(
5555
self: Self,

0 commit comments

Comments
 (0)