opendatahub-io · dbasunag · Mar 17, 2026 · Mar 14, 2026 · Mar 16, 2026 · Mar 16, 2026
@@ -72,6 +72,12 @@
         "ibm-granite/granite-4.0-h-micro",
     ],
     "custom": [HF_CUSTOM_MODE],
+    "overlapping_mixed": [
+        # Shared with "mixed" - tests that same model across sources is not silently dropped
+        "ibm-granite/granite-4.0-h-1b",
+        # Unique to this source
+        "ibm-granite/granite-4.0-h-small",
+    ],
 }
 EXPECTED_HF_CATALOG_VALUES: list[dict[str, str]] = [{"id": HF_SOURCE_ID, "model_name": HF_MODELS["mixed"][0]}]
 EXPECTED_MULTIPLE_HF_CATALOG_VALUES: list[dict[str, str]] = [

@@ -0,0 +1,149 @@
+from typing import Self
+
+import pytest
+from ocp_resources.config_map import ConfigMap
+from simple_logger.logger import get_logger
+
+from tests.model_registry.model_catalog.utils import get_hf_catalog_str, get_models_from_catalog_api
+from tests.model_registry.utils import execute_get_command
+
+LOGGER = get_logger(name=__name__)
+
+pytestmark = [
+    pytest.mark.skip_on_disconnected,
+    pytest.mark.usefixtures("updated_dsc_component_state_scope_session", "model_registry_namespace"),
+]
+
+# Source IDs generated by get_hf_catalog_str: "huggingface_{id}"
+MIXED_SOURCE_ID = "huggingface_mixed"
+OVERLAPPING_SOURCE_ID = "huggingface_overlapping_mixed"
+# Model shared across both sources - the core scenario for silent drop bug
+SHARED_MODEL = "ibm-granite/granite-4.0-h-1b"
+
+
+@pytest.mark.parametrize(
+    "updated_catalog_config_map",
+    [
+        pytest.param(
+            {"sources_yaml": get_hf_catalog_str(ids=["mixed", "overlapping_mixed"])},
+            id="test_shared_models_across_hf_sources",
+            marks=pytest.mark.install,
+        ),
+    ],
+    indirect=["updated_catalog_config_map"],
+)
+@pytest.mark.usefixtures("updated_catalog_config_map")
+class TestHuggingFaceModelsMultipleSources:
+    """
+    Verifies that identical models across multiple HuggingFace sources are not silently dropped.
+    """
+
+    def test_source_status_duplicate_models(
+        self: Self,
+        updated_catalog_config_map: ConfigMap,
+        model_catalog_rest_url: list[str],
+        model_registry_rest_headers: dict[str, str],
+    ):
+        """Verify both HF sources report 'available' status after catalog sync."""
+        response = execute_get_command(
+            url=f"{model_catalog_rest_url[0]}sources",
+            headers=model_registry_rest_headers,
+        )
+        sources = response.get("items", [])
+        expected_source_ids = {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}
+        found_source_ids = set()
+        for source in sources:
+            if source["id"] in expected_source_ids:
+                found_source_ids.add(source["id"])
+                assert source["status"] == "available", (
+                    f"Source '{source['id']}' has status '{source['status']}', expected 'available'. "
+                    f"Error: {source.get('error', 'N/A')}"
+                )
+        missing_sources = expected_source_ids - found_source_ids
+        assert not missing_sources, (
+            f"Expected sources {missing_sources} not found in response. "
+            f"Available source IDs: {[s['id'] for s in sources]}"
+        )
+
+    def test_shared_model_present_in_both_sources(
+        self: Self,
+        updated_catalog_config_map: ConfigMap,
+        model_catalog_rest_url: list[str],
+        model_registry_rest_headers: dict[str, str],
+    ):
+        """Verify that a model included in two HF sources appears in both, not silently dropped from one."""
+        for source_id, source_label in [
+            (MIXED_SOURCE_ID, "HuggingFace Source mixed"),
+            (OVERLAPPING_SOURCE_ID, "HuggingFace Source overlapping_mixed"),
+        ]:
+            LOGGER.info(f"Checking source '{source_id}' for shared model '{SHARED_MODEL}'")
+            response = get_models_from_catalog_api(
+                model_catalog_rest_url=model_catalog_rest_url,
+                model_registry_rest_headers=model_registry_rest_headers,
+                source_label=source_label,
+                page_size=1000,
+            )
+            model_names = [model["name"] for model in response.get("items", [])]
+            assert SHARED_MODEL in model_names, (
+                f"Shared model '{SHARED_MODEL}' not found in source '{source_id}'. "
+                f"Models found: {model_names}. This indicates the model was silently dropped."
+            )
+
+    def test_shared_model_retrievable_per_source(
+        self: Self,
+        updated_catalog_config_map: ConfigMap,
+        model_catalog_rest_url: list[str],
+        model_registry_rest_headers: dict[str, str],
+    ):
+        """Verify the shared model can be fetched individually from each source."""
+        for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
+            LOGGER.info(f"Fetching model '{SHARED_MODEL}' from source '{source_id}'")
+            url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
+            result = execute_get_command(url=url, headers=model_registry_rest_headers)
+            assert result["name"] == SHARED_MODEL, (
+                f"Expected model name '{SHARED_MODEL}', got '{result['name']}' from source '{source_id}'"
+            )
+
+    def test_external_id_has_no_namespace_prefix(
+        self: Self,
+        updated_catalog_config_map: ConfigMap,
+        model_catalog_rest_url: list[str],
+        model_registry_rest_headers: dict[str, str],
+    ):
+        """Verify the API response does not leak internal sourceId: prefix in externalId."""
+        for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]:
+            url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}"
+            result = execute_get_command(url=url, headers=model_registry_rest_headers)
+            external_id = result.get("externalId", "")
+            assert not external_id.startswith(f"{source_id}:"), (
+                f"externalId '{external_id}' leaks internal namespace prefix '{source_id}:'. "
+                f"The API should strip the source prefix for backward compatibility."
+            )
+
+    @pytest.mark.parametrize(
+        "filter_field",
+        [
+            pytest.param("name", id="filter_by_name", marks=pytest.mark.xfail(reason="RHOAIENG-53498")),
+            pytest.param("externalId", id="filter_by_external_id"),
+        ],
+    )
+    def test_filter_returns_model_from_all_sources(
+        self: Self,
+        updated_catalog_config_map: ConfigMap,
+        model_catalog_rest_url: list[str],
+        model_registry_rest_headers: dict[str, str],
+        filter_field: str,
+    ):
+        """Verify filtering by model name or externalId returns the model from all sources."""
+        response = get_models_from_catalog_api(
+            model_catalog_rest_url=model_catalog_rest_url,
+            model_registry_rest_headers=model_registry_rest_headers,
+            additional_params=f"&filterQuery={filter_field}='{SHARED_MODEL}'",
+            page_size=1000,
+        )
+        matching_items = response.get("items", [])
+        source_ids = {item["source_id"] for item in matching_items}
+        assert {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}.issubset(source_ids), (
+            f"Expected model '{SHARED_MODEL}' from both sources {MIXED_SOURCE_ID} and {OVERLAPPING_SOURCE_ID}, "
+            f"but found it only in sources: {source_ids}"
+        )
@@ -49,7 +49,7 @@
     "model_registry_namespace",
 )
 class TestHuggingFaceSourceErrorValidation:
-    """Test cases for RHOAIENG-47934 - Partial model fetching errors should not affect other models."""
+    """Test cases for Partial model fetching errors should not affect other models."""
 
     def test_source_state_and_message(
         self: Self,
@@ -58,7 +58,7 @@ def test_source_state_and_message(
         model_registry_rest_headers: dict[str, str],
     ):
         """
-        RHOAIENG-47934: Verify source shows error state with correct error message.
+        Verify source shows error state with correct error message.
 
         This test verifies that:
         1. The source is in error state due to private model fetch failure
@@ -108,7 +108,7 @@ def test_accessible_models_catalog_api_no_source_id(
         model_registry_rest_headers: dict[str, str],
     ):
         """
-        RHOAIENG-47934: Check that accessible models are visible through catalog API.
+        Check that accessible models are visible through catalog API.
 
         This test verifies that accessible models are still returned by the catalog API
         even when the source is in error state.
@@ -127,7 +127,7 @@ def test_inaccessible_models_not_found_via_api_calls(
         model_registry_rest_headers: dict[str, str],
     ):
         """
-        RHOAIENG-47934: Ensure inaccessible models are not found via API calls.
+        Ensure inaccessible models are not found via API calls.
 
         This test verifies that inaccessible models (private/gated) correctly return
         "Not Found" errors when accessed via individual model API endpoints.