Skip to content

Update response helpfulness threshold #79

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.0.15] 2025-04-24

- Update default thresholds for response helpfulness to 0.23 in `Validator` API.

## [1.0.14] 2025-04-23
- Update `codex-sdk` dependency to `0.1.0-alpha.17`.
- Capture data for the number of times the validator API is called on a Codex project.
Expand Down Expand Up @@ -71,7 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Initial release of the `cleanlab-codex` client library.

[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.14...HEAD
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.15...HEAD
[1.0.15]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.14...v1.0.15
[1.0.14]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.13...v1.0.14
[1.0.13]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.12...v1.0.13
[1.0.12]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.11...v1.0.12
Expand Down
2 changes: 1 addition & 1 deletion src/cleanlab_codex/__about__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: MIT
__version__ = "1.0.14"
__version__ = "1.0.15"
2 changes: 1 addition & 1 deletion src/cleanlab_codex/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ class BadResponseThresholds(BaseModel):
)
response_helpfulness: float = Field(
description="Threshold for response helpfulness.",
default=0.7,
default=0.23,
ge=0.0,
le=1.0,
)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_get_threshold(self) -> None:
def test_default_threshold(self) -> None:
thresholds = BadResponseThresholds()
assert thresholds.get_threshold("trustworthiness") == 0.7
assert thresholds.get_threshold("response_helpfulness") == 0.7
assert thresholds.get_threshold("response_helpfulness") == 0.23

def test_unspecified_threshold(self) -> None:
thresholds = BadResponseThresholds()
Expand Down Expand Up @@ -147,7 +147,7 @@ def test_user_provided_thresholds(self, mock_project: Mock, mock_trustworthy_rag
# Test with user-provided thresholds that match evals
validator = Validator(codex_access_key="test", bad_response_thresholds={"trustworthiness": 0.6})
assert_threshold_equal(validator, "trustworthiness", 0.6)
assert_threshold_equal(validator, "response_helpfulness", 0.7)
assert_threshold_equal(validator, "response_helpfulness", 0.23)

# Test with extra thresholds that should raise ValueError
with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):
Expand All @@ -157,7 +157,7 @@ def test_default_thresholds(self, mock_project: Mock, mock_trustworthy_rag: Mock
# Test with default thresholds (bad_response_thresholds is None)
validator = Validator(codex_access_key="test")
assert_threshold_equal(validator, "trustworthiness", 0.7)
assert_threshold_equal(validator, "response_helpfulness", 0.7)
assert_threshold_equal(validator, "response_helpfulness", 0.23)

def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None: # noqa: ARG002
# Note, the `"evals"` field should not be a list of strings in practice, but an Eval from cleanlab_tlm
Expand All @@ -173,7 +173,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
# No extra Evals
validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": []})
assert_threshold_equal(validator, "trustworthiness", 0.7) # Default should apply
assert_threshold_equal(validator, "response_helpfulness", 0.7) # Default should apply
assert_threshold_equal(validator, "response_helpfulness", 0.23) # Default should apply

# Test with non-existent evals in trustworthy_rag_config
with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):
Expand Down