-
Notifications
You must be signed in to change notification settings - Fork 63
feat(evalhub): Update EvalHub provider tests and add RBAC fixtures #1178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
1fe44f3
eb77148
783e4f6
55b31df
d297049
6fc160b
c26a84f
121348f
c9c8790
04a3b59
3771ceb
03dc1d4
663ce8d
a5812e4
4e7a02d
2017d59
8a19c7d
d975393
cb1b962
056e22c
06d8faf
7ed9fed
d7a0f95
c5772cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| import pytest | ||
| import requests | ||
| from ocp_resources.namespace import Namespace | ||
| from ocp_resources.role_binding import RoleBinding | ||
| from ocp_resources.route import Route | ||
|
|
||
| from tests.model_explainability.evalhub.utils import get_evalhub_provider, list_evalhub_providers | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "model_namespace", | ||
| [ | ||
| pytest.param( | ||
| {"name": "test-evalhub-providers"}, | ||
| ), | ||
| ], | ||
| indirect=True, | ||
| ) | ||
| @pytest.mark.sanity | ||
| @pytest.mark.model_explainability | ||
| class TestEvalHubProviders: | ||
| """Tests for the EvalHub providers API using a scoped non-admin ServiceAccount.""" | ||
|
|
||
| def test_scoped_user_can_list_providers( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that a scoped user with providers access can list providers.""" | ||
| assert "items" in evalhub_providers_response, "Response missing 'items' field" | ||
| assert isinstance(evalhub_providers_response["items"], list), "'items' must be a list" | ||
| assert "total_count" in evalhub_providers_response, "Response missing 'total_count' field" | ||
| assert "limit" in evalhub_providers_response, "Response missing 'limit' field" | ||
|
|
||
| def test_list_providers_has_registered_providers( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that at least one provider is registered.""" | ||
| assert evalhub_providers_response["total_count"] > 0, "Expected at least one registered provider" | ||
| assert len(evalhub_providers_response["items"]) > 0, "Expected at least one provider in items" | ||
|
|
||
| def test_provider_has_required_fields( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that each provider contains the expected resource metadata and config fields.""" | ||
| for provider in evalhub_providers_response["items"]: | ||
| assert "resource" in provider, f"Provider missing 'resource': {provider}" | ||
| assert "id" in provider["resource"], f"Provider resource missing 'id': {provider}" | ||
| assert provider["resource"]["id"], "Provider ID must not be empty" | ||
| assert "name" in provider, f"Provider missing 'name': {provider}" | ||
| assert "benchmarks" in provider, f"Provider missing 'benchmarks': {provider}" | ||
|
|
||
| def test_provider_benchmarks_have_required_fields( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that benchmarks within each provider have id, name, and category.""" | ||
| for provider in evalhub_providers_response["items"]: | ||
| provider_name = provider.get("name", "unknown") | ||
| for benchmark in provider.get("benchmarks", []): | ||
| assert "id" in benchmark, f"Benchmark in provider '{provider_name}' missing 'id'" | ||
| assert "name" in benchmark, f"Benchmark in provider '{provider_name}' missing 'name'" | ||
| assert "category" in benchmark, f"Benchmark in provider '{provider_name}' missing 'category'" | ||
|
|
||
| def test_lm_evaluation_harness_provider_exists( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that the lm_evaluation_harness provider is registered and has benchmarks.""" | ||
| provider_ids = [provider["resource"]["id"] for provider in evalhub_providers_response["items"]] | ||
| assert "lm_evaluation_harness" in provider_ids, ( | ||
| f"Expected 'lm_evaluation_harness' in providers, got: {provider_ids}" | ||
| ) | ||
|
|
||
| lmeval_provider = next( | ||
| provider | ||
| for provider in evalhub_providers_response["items"] | ||
| if provider["resource"]["id"] == "lm_evaluation_harness" | ||
| ) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any next() call should be used with an explicit default. We are trying to move to python 3.14 and they tightened this in 3.14 |
||
| assert len(lmeval_provider["benchmarks"]) > 0, ( | ||
| "lm_evaluation_harness provider should have at least one benchmark" | ||
| ) | ||
|
|
||
| def test_get_single_provider( | ||
| self, | ||
| evalhub_providers_response: dict, | ||
| evalhub_scoped_token: str, | ||
| evalhub_ca_bundle_file: str, | ||
| evalhub_route: Route, | ||
| model_namespace: Namespace, | ||
| ) -> None: | ||
| """Verify that a single provider can be retrieved by ID.""" | ||
| assert evalhub_providers_response.get("items") and len(evalhub_providers_response["items"]) > 0, ( | ||
| "No providers registered; cannot test single-provider retrieval" | ||
| ) | ||
| first_provider_id = evalhub_providers_response["items"][0]["resource"]["id"] | ||
|
|
||
| data = get_evalhub_provider( | ||
| host=evalhub_route.host, | ||
| token=evalhub_scoped_token, | ||
| ca_bundle_file=evalhub_ca_bundle_file, | ||
| provider_id=first_provider_id, | ||
| tenant=model_namespace.name, | ||
| ) | ||
|
|
||
| assert data["resource"]["id"] == first_provider_id | ||
| assert "name" in data | ||
| assert "benchmarks" in data | ||
|
|
||
| def test_get_nonexistent_provider_returns_error( | ||
| self, | ||
| model_namespace: Namespace, | ||
| evalhub_scoped_token: str, | ||
| evalhub_providers_role_binding: RoleBinding, | ||
| evalhub_ca_bundle_file: str, | ||
| evalhub_route: Route, | ||
| ) -> None: | ||
| """Verify that requesting a non-existent provider ID returns 404.""" | ||
| with pytest.raises(requests.exceptions.HTTPError) as excinfo: | ||
| get_evalhub_provider( | ||
| host=evalhub_route.host, | ||
| token=evalhub_scoped_token, | ||
| ca_bundle_file=evalhub_ca_bundle_file, | ||
| provider_id="nonexistent-provider-id", | ||
| tenant=model_namespace.name, | ||
| ) | ||
| assert excinfo.value.response.status_code == 404 | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "model_namespace", | ||
| [ | ||
| pytest.param( | ||
| {"name": "test-evalhub-providers"}, | ||
| ), | ||
| ], | ||
| indirect=True, | ||
| ) | ||
| @pytest.mark.sanity | ||
| @pytest.mark.model_explainability | ||
| class TestEvalHubProvidersUnauthorised: | ||
| """Tests verifying that a user without providers RBAC is denied access.""" | ||
|
|
||
| def test_list_providers_denied_without_role_binding( | ||
| self, | ||
| model_namespace: Namespace, | ||
| evalhub_unauthorised_token: str, | ||
| evalhub_ca_bundle_file: str, | ||
| evalhub_route: Route, | ||
| ) -> None: | ||
| """Verify that a user without providers ClusterRole binding gets 403.""" | ||
| with pytest.raises(requests.exceptions.HTTPError, match="403"): | ||
| list_evalhub_providers( | ||
| host=evalhub_route.host, | ||
| token=evalhub_unauthorised_token, | ||
| ca_bundle_file=evalhub_ca_bundle_file, | ||
| tenant=model_namespace.name, | ||
| ) | ||
|
Comment on lines
+153
to
+159
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cd tests/model_explainability/evalhub && wc -l test_evalhub_providers.pyRepository: opendatahub-io/opendatahub-tests Length of output: 105 🏁 Script executed: cd tests/model_explainability/evalhub && sed -n '175,195p' test_evalhub_providers.pyRepository: opendatahub-io/opendatahub-tests Length of output: 829 🏁 Script executed: cd tests/model_explainability/evalhub && sed -n '210,240p' test_evalhub_providers.pyRepository: opendatahub-io/opendatahub-tests Length of output: 1201 🏁 Script executed: cd tests/model_explainability/evalhub && sed -n '65,85p' utils.pyRepository: opendatahub-io/opendatahub-tests Length of output: 764 🏁 Script executed: cd tests/model_explainability/evalhub && grep -n "raise_for_status\|HTTPError" utils.pyRepository: opendatahub-io/opendatahub-tests Length of output: 342 🏁 Script executed: cd tests/model_explainability/evalhub && sed -n '1,30p' test_evalhub_providers.pyRepository: opendatahub-io/opendatahub-tests Length of output: 829 🏁 Script executed: cd tests/model_explainability/evalhub && sed -n '85,110p' utils.pyRepository: opendatahub-io/opendatahub-tests Length of output: 734 🏁 Script executed: cd tests/model_explainability/evalhub && python3 << 'EOF'
import requests
# Verify that HTTPError from raise_for_status() has a response attribute
try:
# Create a mock response scenario to understand HTTPError structure
response = requests.Response()
response.status_code = 403
response.url = "http://example.com"
# Simulate what raise_for_status does
http_error = requests.exceptions.HTTPError()
http_error.response = response
# Check if we can access response.status_code
print(f"HTTPError.response accessible: {http_error.response is not None}")
print(f"HTTPError.response.status_code accessible: {http_error.response.status_code}")
except Exception as e:
print(f"Error: {e}")
EOFRepository: opendatahub-io/opendatahub-tests Length of output: 382 🏁 Script executed: cd tests/model_explainability/evalhub && grep -B5 -A5 "match=" test_evalhub_providers.py | head -30Repository: opendatahub-io/opendatahub-tests Length of output: 1103 Assert the 403 contract on
Suggested change- with pytest.raises(requests.exceptions.HTTPError, match="403"):
+ with pytest.raises(requests.exceptions.HTTPError) as excinfo:
list_evalhub_providers(
host=evalhub_route.host,
token=evalhub_unauthorised_token,
ca_bundle_file=evalhub_ca_bundle_file,
tenant=model_namespace.name,
)
+ assert excinfo.value.response is not None
+ assert excinfo.value.response.status_code == 403
...
- with pytest.raises(requests.exceptions.HTTPError, match="403"):
+ with pytest.raises(requests.exceptions.HTTPError) as excinfo:
get_evalhub_provider(
host=evalhub_route.host,
token=evalhub_unauthorised_token,
ca_bundle_file=evalhub_ca_bundle_file,
provider_id="lm_evaluation_harness",
tenant=model_namespace.name,
)
+ assert excinfo.value.response is not None
+ assert excinfo.value.response.status_code == 403Also applies to: Lines 229-236. |
||
|
|
||
| def test_get_provider_denied_without_role_binding( | ||
| self, | ||
| model_namespace: Namespace, | ||
| evalhub_unauthorised_token: str, | ||
| evalhub_ca_bundle_file: str, | ||
| evalhub_route: Route, | ||
| evalhub_providers_response: dict, | ||
| ) -> None: | ||
| """Verify that a user without providers ClusterRole binding cannot get a provider.""" | ||
| provider_id = evalhub_providers_response["items"][0]["resource"]["id"] | ||
dbasunag marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| with pytest.raises(requests.exceptions.HTTPError, match="403"): | ||
| get_evalhub_provider( | ||
| host=evalhub_route.host, | ||
| token=evalhub_unauthorised_token, | ||
| ca_bundle_file=evalhub_ca_bundle_file, | ||
| provider_id=provider_id, | ||
| tenant=model_namespace.name, | ||
| ) | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Uh oh!
There was an error while loading. Please reload this page.