-
Notifications
You must be signed in to change notification settings - Fork 64
feat: Add tests for duplicate models in multiple HF sources #1221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
9d4cf15
feat: Add tests for dupplicate models in multiple HF sources
dbasunag 527dc66
fix: address review comments
dbasunag b6691c1
fix: address review comments
dbasunag 1c551e5
Merge branch 'main' into hf_dup_model
dbasunag b74cc01
Merge branch 'main' into hf_dup_model
dbasunag d035ad0
Merge branch 'main' into hf_dup_model
dbasunag f19e334
Merge branch 'main' into hf_dup_model
dbasunag File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
149 changes: 149 additions & 0 deletions
149
tests/model_registry/model_catalog/huggingface/test_huggingface_models_multiple_sources.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| from typing import Self | ||
|
|
||
| import pytest | ||
| from ocp_resources.config_map import ConfigMap | ||
| from simple_logger.logger import get_logger | ||
|
|
||
| from tests.model_registry.model_catalog.utils import get_hf_catalog_str, get_models_from_catalog_api | ||
| from tests.model_registry.utils import execute_get_command | ||
|
|
||
| LOGGER = get_logger(name=__name__) | ||
|
|
||
| pytestmark = [ | ||
| pytest.mark.skip_on_disconnected, | ||
| pytest.mark.usefixtures("updated_dsc_component_state_scope_session", "model_registry_namespace"), | ||
| ] | ||
|
|
||
| # Source IDs generated by get_hf_catalog_str: "huggingface_{id}" | ||
| MIXED_SOURCE_ID = "huggingface_mixed" | ||
| OVERLAPPING_SOURCE_ID = "huggingface_overlapping_mixed" | ||
| # Model shared across both sources - the core scenario for silent drop bug | ||
| SHARED_MODEL = "ibm-granite/granite-4.0-h-1b" | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "updated_catalog_config_map", | ||
| [ | ||
| pytest.param( | ||
| {"sources_yaml": get_hf_catalog_str(ids=["mixed", "overlapping_mixed"])}, | ||
| id="test_shared_models_across_hf_sources", | ||
| marks=pytest.mark.install, | ||
| ), | ||
| ], | ||
| indirect=["updated_catalog_config_map"], | ||
| ) | ||
| @pytest.mark.usefixtures("updated_catalog_config_map") | ||
| class TestHuggingFaceModelsMultipleSources: | ||
| """ | ||
| Verifies that identical models across multiple HuggingFace sources are not silently dropped. | ||
| """ | ||
|
|
||
| def test_source_status_duplicate_models( | ||
| self: Self, | ||
| updated_catalog_config_map: ConfigMap, | ||
| model_catalog_rest_url: list[str], | ||
| model_registry_rest_headers: dict[str, str], | ||
| ): | ||
| """Verify both HF sources report 'available' status after catalog sync.""" | ||
| response = execute_get_command( | ||
| url=f"{model_catalog_rest_url[0]}sources", | ||
| headers=model_registry_rest_headers, | ||
| ) | ||
| sources = response.get("items", []) | ||
| expected_source_ids = {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID} | ||
| found_source_ids = set() | ||
| for source in sources: | ||
| if source["id"] in expected_source_ids: | ||
| found_source_ids.add(source["id"]) | ||
| assert source["status"] == "available", ( | ||
| f"Source '{source['id']}' has status '{source['status']}', expected 'available'. " | ||
| f"Error: {source.get('error', 'N/A')}" | ||
| ) | ||
|
dbasunag marked this conversation as resolved.
|
||
| missing_sources = expected_source_ids - found_source_ids | ||
| assert not missing_sources, ( | ||
| f"Expected sources {missing_sources} not found in response. " | ||
| f"Available source IDs: {[s['id'] for s in sources]}" | ||
| ) | ||
|
|
||
| def test_shared_model_present_in_both_sources( | ||
| self: Self, | ||
| updated_catalog_config_map: ConfigMap, | ||
| model_catalog_rest_url: list[str], | ||
| model_registry_rest_headers: dict[str, str], | ||
| ): | ||
| """Verify that a model included in two HF sources appears in both, not silently dropped from one.""" | ||
| for source_id, source_label in [ | ||
| (MIXED_SOURCE_ID, "HuggingFace Source mixed"), | ||
| (OVERLAPPING_SOURCE_ID, "HuggingFace Source overlapping_mixed"), | ||
| ]: | ||
| LOGGER.info(f"Checking source '{source_id}' for shared model '{SHARED_MODEL}'") | ||
| response = get_models_from_catalog_api( | ||
| model_catalog_rest_url=model_catalog_rest_url, | ||
| model_registry_rest_headers=model_registry_rest_headers, | ||
| source_label=source_label, | ||
| page_size=1000, | ||
| ) | ||
| model_names = [model["name"] for model in response.get("items", [])] | ||
| assert SHARED_MODEL in model_names, ( | ||
| f"Shared model '{SHARED_MODEL}' not found in source '{source_id}'. " | ||
| f"Models found: {model_names}. This indicates the model was silently dropped." | ||
| ) | ||
|
|
||
| def test_shared_model_retrievable_per_source( | ||
| self: Self, | ||
| updated_catalog_config_map: ConfigMap, | ||
| model_catalog_rest_url: list[str], | ||
| model_registry_rest_headers: dict[str, str], | ||
| ): | ||
| """Verify the shared model can be fetched individually from each source.""" | ||
| for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]: | ||
| LOGGER.info(f"Fetching model '{SHARED_MODEL}' from source '{source_id}'") | ||
| url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}" | ||
| result = execute_get_command(url=url, headers=model_registry_rest_headers) | ||
| assert result["name"] == SHARED_MODEL, ( | ||
| f"Expected model name '{SHARED_MODEL}', got '{result['name']}' from source '{source_id}'" | ||
| ) | ||
|
|
||
| def test_external_id_has_no_namespace_prefix( | ||
| self: Self, | ||
| updated_catalog_config_map: ConfigMap, | ||
| model_catalog_rest_url: list[str], | ||
| model_registry_rest_headers: dict[str, str], | ||
| ): | ||
| """Verify the API response does not leak internal sourceId: prefix in externalId.""" | ||
| for source_id in [MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID]: | ||
| url = f"{model_catalog_rest_url[0]}sources/{source_id}/models/{SHARED_MODEL}" | ||
| result = execute_get_command(url=url, headers=model_registry_rest_headers) | ||
| external_id = result.get("externalId", "") | ||
| assert not external_id.startswith(f"{source_id}:"), ( | ||
|
dbasunag marked this conversation as resolved.
|
||
| f"externalId '{external_id}' leaks internal namespace prefix '{source_id}:'. " | ||
| f"The API should strip the source prefix for backward compatibility." | ||
| ) | ||
|
|
||
|
fege marked this conversation as resolved.
|
||
| @pytest.mark.parametrize( | ||
| "filter_field", | ||
| [ | ||
| pytest.param("name", id="filter_by_name", marks=pytest.mark.xfail(reason="RHOAIENG-53498")), | ||
| pytest.param("externalId", id="filter_by_external_id"), | ||
| ], | ||
| ) | ||
| def test_filter_returns_model_from_all_sources( | ||
| self: Self, | ||
| updated_catalog_config_map: ConfigMap, | ||
| model_catalog_rest_url: list[str], | ||
| model_registry_rest_headers: dict[str, str], | ||
| filter_field: str, | ||
| ): | ||
| """Verify filtering by model name or externalId returns the model from all sources.""" | ||
| response = get_models_from_catalog_api( | ||
| model_catalog_rest_url=model_catalog_rest_url, | ||
| model_registry_rest_headers=model_registry_rest_headers, | ||
| additional_params=f"&filterQuery={filter_field}='{SHARED_MODEL}'", | ||
| page_size=1000, | ||
| ) | ||
| matching_items = response.get("items", []) | ||
| source_ids = {item["source_id"] for item in matching_items} | ||
| assert {MIXED_SOURCE_ID, OVERLAPPING_SOURCE_ID}.issubset(source_ids), ( | ||
| f"Expected model '{SHARED_MODEL}' from both sources {MIXED_SOURCE_ID} and {OVERLAPPING_SOURCE_ID}, " | ||
| f"but found it only in sources: {source_ids}" | ||
| ) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.