Skip to content

Commit ffae565

Browse files
authored
Update response helpfulness threshold (#79)
* Update default response helpfulness threshold in BadResponseThresholds from 0.7 to 0.23 * bump version and add to changelog * update tests
1 parent 13686e9 commit ffae565

File tree

4 files changed

+12
-7
lines changed

4 files changed

+12
-7
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [1.0.15] 2025-04-24
11+
12+
- Update default thresholds for response helpfulness to 0.23 in `Validator` API.
13+
1014
## [1.0.14] 2025-04-23
1115
- Update `codex-sdk` dependency to `0.1.0-alpha.17`.
1216
- Capture data for the number of times the validator API is called on a Codex project.
@@ -71,7 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7175

7276
- Initial release of the `cleanlab-codex` client library.
7377

74-
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.14...HEAD
78+
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.15...HEAD
79+
[1.0.15]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.14...v1.0.15
7580
[1.0.14]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.13...v1.0.14
7681
[1.0.13]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.12...v1.0.13
7782
[1.0.12]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.11...v1.0.12

src/cleanlab_codex/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# SPDX-License-Identifier: MIT
2-
__version__ = "1.0.14"
2+
__version__ = "1.0.15"

src/cleanlab_codex/validator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ class BadResponseThresholds(BaseModel):
315315
)
316316
response_helpfulness: float = Field(
317317
description="Threshold for response helpfulness.",
318-
default=0.7,
318+
default=0.23,
319319
ge=0.0,
320320
le=1.0,
321321
)

tests/test_validator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_get_threshold(self) -> None:
1919
def test_default_threshold(self) -> None:
2020
thresholds = BadResponseThresholds()
2121
assert thresholds.get_threshold("trustworthiness") == 0.7
22-
assert thresholds.get_threshold("response_helpfulness") == 0.7
22+
assert thresholds.get_threshold("response_helpfulness") == 0.23
2323

2424
def test_unspecified_threshold(self) -> None:
2525
thresholds = BadResponseThresholds()
@@ -147,7 +147,7 @@ def test_user_provided_thresholds(self, mock_project: Mock, mock_trustworthy_rag
147147
# Test with user-provided thresholds that match evals
148148
validator = Validator(codex_access_key="test", bad_response_thresholds={"trustworthiness": 0.6})
149149
assert_threshold_equal(validator, "trustworthiness", 0.6)
150-
assert_threshold_equal(validator, "response_helpfulness", 0.7)
150+
assert_threshold_equal(validator, "response_helpfulness", 0.23)
151151

152152
# Test with extra thresholds that should raise ValueError
153153
with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):
@@ -157,7 +157,7 @@ def test_default_thresholds(self, mock_project: Mock, mock_trustworthy_rag: Mock
157157
# Test with default thresholds (bad_response_thresholds is None)
158158
validator = Validator(codex_access_key="test")
159159
assert_threshold_equal(validator, "trustworthiness", 0.7)
160-
assert_threshold_equal(validator, "response_helpfulness", 0.7)
160+
assert_threshold_equal(validator, "response_helpfulness", 0.23)
161161

162162
def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> None: # noqa: ARG002
163163
# Note, the `"evals"` field should not be a list of strings in practice, but an Eval from cleanlab_tlm
@@ -173,7 +173,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
173173
# No extra Evals
174174
validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": []})
175175
assert_threshold_equal(validator, "trustworthiness", 0.7) # Default should apply
176-
assert_threshold_equal(validator, "response_helpfulness", 0.7) # Default should apply
176+
assert_threshold_equal(validator, "response_helpfulness", 0.23) # Default should apply
177177

178178
# Test with non-existent evals in trustworthy_rag_config
179179
with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):

0 commit comments

Comments
 (0)