Skip to content

Commit 9d4cf15

Browse files
committed
feat: Add tests for dupplicate models in multiple HF sources
Signed-off-by: Debarati Basu-Nag <dbasunag@redhat.com>
1 parent 9d4eb3a commit 9d4cf15

3 files changed

Lines changed: 170 additions & 4 deletions

File tree

tests/model_registry/model_catalog/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@
7272
"ibm-granite/granite-4.0-h-micro",
7373
],
7474
"custom": [HF_CUSTOM_MODE],
75+
"overlapping_mixed": [
76+
# Shared with "mixed" - tests that same model across sources is not silently dropped
77+
"ibm-granite/granite-4.0-h-1b",
78+
# Unique to this source
79+
"ibm-granite/granite-4.0-h-small",
80+
],
7581
}
7682
EXPECTED_HF_CATALOG_VALUES: list[dict[str, str]] = [{"id": HF_SOURCE_ID, "model_name": HF_MODELS["mixed"][0]}]
7783
EXPECTED_MULTIPLE_HF_CATALOG_VALUES: list[dict[str, str]] = [
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from typing import Self
2+
3+
import pytest
4+
from ocp_resources.config_map import ConfigMap
5+
from simple_logger.logger import get_logger
6+
7+
from tests.model_registry.model_catalog.utils import get_hf_catalog_str, get_models_from_catalog_api
8+
from tests.model_registry.utils import execute_get_command
9+
10+
LOGGER = get_logger(name=__name__)
11+
12+
pytestmark = [
13+
pytest.mark.skip_on_disconnected,
14+
pytest.mark.usefixtures("updated_dsc_component_state_scope_session", "model_registry_namespace"),
15+
]
16+
17+
# Source IDs generated by get_hf_catalog_str: "huggingface_{id}"
18+
MIXED_SOURCE_ID = "huggingface_mixed"
19+
OVERLAPPING_SOURCE_ID = "huggingface_overlapping_mixed"
20+
# Model shared across both sources - the core scenario for silent drop bug
21+
SHARED_MODEL = "ibm-granite/granite-4.0-h-1b"
22+
23+
24+
@pytest.mark.parametrize(
25+
"updated_catalog_config_map",
26+
[
27+
pytest.param(
28+
{"sources_yaml": get_hf_catalog_str(ids=["mixed", "overlapping_mixed"])},
29+
id="test_shared_models_across_hf_sources",
30+
marks=pytest.mark.install,
31+
),
32+
],
33+
indirect=["updated_catalog_config_map"],
34+
)
35+
@pytest.mark.usefixtures("updated_catalog_config_map")
36+
class TestHuggingFaceModelsMultipleSources:
37+
"""
38+
Verifies that identical models across multiple HuggingFace sources are not silently dropped.
39+
"""
40+
41+
def test_source_status_duplicate_models(
42+
self: Self,
43+
updated_catalog_config_map: ConfigMap,
44+
model_catalog_rest_url: list[str],
45+
model_registry_rest_headers: dict[str, str],
46+
):
47+
"""Verify both HF sources report 'available' status after catalog sync."""
48+
response = execute_get_command(
49+
url=f"{model_catalog_rest_url[0]}sources",
50+
headers=model_registry_rest_headers,
51+
)
52+
sources = response.get("items", [])
53+
for source in sources:
54+
if source["id"] in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
55+
assert source["status"] == "available", (
56+
f"Source '{source['id']}' has status '{source['status']}', expected 'available'. "
57+
f"Error: {source.get('error', 'N/A')}"
58+
)
59+
60+
def test_shared_model_present_in_both_sources(
61+
self: Self,
62+
updated_catalog_config_map: ConfigMap,
63+
model_catalog_rest_url: list[str],
64+
model_registry_rest_headers: dict[str, str],
65+
):
66+
"""Verify that a model included in two HF sources appears in both, not silently dropped from one."""
67+
for source_id, source_label in [
68+
(MIXED_SOURCE_ID, "HuggingFace Source mixed"),
69+
(OVERLAPPING_SOURCE_ID, "HuggingFace Source overlapping_mixed"),
70+
]:
71+
LOGGER.info(f"Checking source '{source_id}' for shared model '{SHARED_MODEL}'")
72+
response = get_models_from_catalog_api(
73+
model_catalog_rest_url=model_catalog_rest_url,
74+
model_registry_rest_headers=model_registry_rest_headers,
75+
source_label=source_label,
76+
page_size=1000,
77+
)
78+
model_names = [model["name"] for model in response.get("items", [])]
79+
assert SHARED_MODEL in model_names, (
80+
f"Shared model '{SHARED_MODEL}' not found in source '{source_id}'. "
81+
f"Models found: {model_names}. This indicates the model was silently dropped."
82+
)
83+
84+
def test_shared_model_retrievable_per_source(
85+
self: Self,
86+
updated_catalog_config_map: ConfigMap,
87+
model_catalog_rest_url: list[str],
88+
model_registry_rest_headers: dict[str, str],
89+
):
90+
"""Verify the shared model can be fetched individually from each source."""
91+
for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
92+
LOGGER.info(f"Fetching model '{SHARED_MODEL}' from source '{source_id}'")
93+
url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
94+
result = execute_get_command(url=url, headers=model_registry_rest_headers)
95+
assert result["name"] == SHARED_MODEL, (
96+
f"Expected model name '{SHARED_MODEL}', got '{result['name']}' from source '{source_id}'"
97+
)
98+
99+
def test_external_id_has_no_namespace_prefix(
100+
self: Self,
101+
updated_catalog_config_map: ConfigMap,
102+
model_catalog_rest_url: list[str],
103+
model_registry_rest_headers: dict[str, str],
104+
):
105+
"""Verify the API response does not leak internal sourceId: prefix in externalId."""
106+
for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
107+
url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
108+
result = execute_get_command(url=url, headers=model_registry_rest_headers)
109+
external_id = result.get("externalId", "")
110+
assert not external_id.startswith(f"{source_id}:"), (
111+
f"externalId '{external_id}' leaks internal namespace prefix '{source_id}:'. "
112+
f"The API should strip the source prefix for backward compatibility."
113+
)
114+
115+
@pytest.mark.xfail(reason="Name and sourceLabel filter query is not working")
116+
def test_filter_by_name_and_source_label(
117+
self: Self,
118+
updated_catalog_config_map: ConfigMap,
119+
model_catalog_rest_url: list[str],
120+
model_registry_rest_headers: dict[str, str],
121+
):
122+
"""Verify filtering by model name with a source label returns the model only from that source."""
123+
for source_id, source_label in [
124+
(MIXED_SOURCE_ID, "HuggingFace Source mixed"),
125+
(OVERLAPPING_SOURCE_ID, "HuggingFace Source overlapping_mixed"),
126+
]:
127+
LOGGER.info(f"Filtering model '{SHARED_MODEL}' with source label '{source_label}'")
128+
response = get_models_from_catalog_api(
129+
model_catalog_rest_url=model_catalog_rest_url,
130+
model_registry_rest_headers=model_registry_rest_headers,
131+
source_label=source_label,
132+
additional_params=f"&filterQuery=name='{SHARED_MODEL}'",
133+
page_size=1000,
134+
)
135+
matching_items = response.get("items", [])
136+
assert len(matching_items) == 1, (
137+
f"Expected exactly 1 model for source '{source_id}', got {len(matching_items)}"
138+
)
139+
assert matching_items[0]["name"] == SHARED_MODEL
140+
assert matching_items[0]["source_id"] == source_id
141+
142+
def test_filter_by_name_returns_model_from_all_sources(
143+
self: Self,
144+
updated_catalog_config_map: ConfigMap,
145+
model_catalog_rest_url: list[str],
146+
model_registry_rest_headers: dict[str, str],
147+
):
148+
"""Verify filtering by model name without specifying a source returns the model from all sources."""
149+
response = get_models_from_catalog_api(
150+
model_catalog_rest_url=model_catalog_rest_url,
151+
model_registry_rest_headers=model_registry_rest_headers,
152+
additional_params=f"&filterQuery=externalId='{SHARED_MODEL}'",
153+
page_size=1000,
154+
)
155+
matching_items = response.get("items", [])
156+
source_ids = {item["source_id"] for item in matching_items}
157+
assert {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}.issubset(source_ids), (
158+
f"Expected model '{SHARED_MODEL}' from both sources {MIXED_SOURCE_ID} and {OVERLAPPING_SOURCE_ID}, "
159+
f"but found it only in sources: {source_ids}"
160+
)

tests/model_registry/model_catalog/huggingface/test_huggingface_source_error_validation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
"model_registry_namespace",
5050
)
5151
class TestHuggingFaceSourceErrorValidation:
52-
"""Test cases for RHOAIENG-47934 - Partial model fetching errors should not affect other models."""
52+
"""Test cases for Partial model fetching errors should not affect other models."""
5353

5454
def test_source_state_and_message(
5555
self: Self,
@@ -58,7 +58,7 @@ def test_source_state_and_message(
5858
model_registry_rest_headers: dict[str, str],
5959
):
6060
"""
61-
RHOAIENG-47934: Verify source shows error state with correct error message.
61+
Verify source shows error state with correct error message.
6262
6363
This test verifies that:
6464
1. The source is in error state due to private model fetch failure
@@ -108,7 +108,7 @@ def test_accessible_models_catalog_api_no_source_id(
108108
model_registry_rest_headers: dict[str, str],
109109
):
110110
"""
111-
RHOAIENG-47934: Check that accessible models are visible through catalog API.
111+
Check that accessible models are visible through catalog API.
112112
113113
This test verifies that accessible models are still returned by the catalog API
114114
even when the source is in error state.
@@ -127,7 +127,7 @@ def test_inaccessible_models_not_found_via_api_calls(
127127
model_registry_rest_headers: dict[str, str],
128128
):
129129
"""
130-
RHOAIENG-47934: Ensure inaccessible models are not found via API calls.
130+
Ensure inaccessible models are not found via API calls.
131131
132132
This test verifies that inaccessible models (private/gated) correctly return
133133
"Not Found" errors when accessed via individual model API endpoints.

0 commit comments

Comments
 (0)