Skip to content

Commit e57fd31

Browse files
elisnoulya-tkch
andauthored
Default threshold to zero (#75)
* Update default thresholds in BadResponseThresholds to 0.0 and adjust related tests accordingly * update changelog * bump version * Update src/cleanlab_codex/validator.py Co-authored-by: Ulyana <[email protected]> --------- Co-authored-by: Ulyana <[email protected]>
1 parent 4e92233 commit e57fd31

File tree

4 files changed

+13
-8
lines changed

4 files changed

+13
-8
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [1.0.11] - 2025-04-16
11+
12+
- Update default thresholds for custom evals to 0.0 in `Validator` API.
13+
1014
## [1.0.10] - 2025-04-15
1115

1216
- Add async support to `Validator` API.
@@ -55,7 +59,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5559

5660
- Initial release of the `cleanlab-codex` client library.
5761

58-
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.10...HEAD
62+
[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.11...HEAD
63+
[1.0.11]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.10...v1.0.11
5964
[1.0.10]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.9...v1.0.10
6065
[1.0.9]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.8...v1.0.9
6166
[1.0.8]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.7...v1.0.8

src/cleanlab_codex/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# SPDX-License-Identifier: MIT
2-
__version__ = "1.0.10"
2+
__version__ = "1.0.11"

src/cleanlab_codex/validator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ class BadResponseThresholds(BaseModel):
278278
Default Thresholds:
279279
- trustworthiness: 0.7
280280
- response_helpfulness: 0.7
281-
- Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
281+
- Any custom eval: 0.0 (if not explicitly specified in bad_response_thresholds). A threshold of 0.0 means that the associated eval is not used to determine if a response is bad, unless explicitly specified in bad_response_thresholds, but still allow for reporting of those scores.
282282
"""
283283

284284
trustworthiness: float = Field(
@@ -296,8 +296,8 @@ class BadResponseThresholds(BaseModel):
296296

297297
@property
298298
def default_threshold(self) -> float:
299-
"""The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.5."""
300-
return 0.5
299+
"""The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.0."""
300+
return 0.0
301301

302302
def get_threshold(self, eval_name: str) -> float:
303303
"""Get threshold for an eval, if it exists.

tests/test_validator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_default_threshold(self) -> None:
2323

2424
def test_unspecified_threshold(self) -> None:
2525
thresholds = BadResponseThresholds()
26-
assert thresholds.get_threshold("unspecified_threshold") == 0.5
26+
assert thresholds.get_threshold("unspecified_threshold") == 0.0
2727

2828
def test_threshold_value(self) -> None:
2929
thresholds = BadResponseThresholds(valid_threshold=0.3) # type: ignore
@@ -168,7 +168,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
168168
assert_threshold_equal(validator, "trustworthiness", 0.7) # Default should apply
169169

170170
validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": ["non_existent_eval"]})
171-
assert_threshold_equal(validator, "non_existent_eval", 0.5) # Default should apply for undefined thresholds
171+
assert_threshold_equal(validator, "non_existent_eval", 0.0) # Default should apply for undefined thresholds
172172

173173
# No extra Evals
174174
validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": []})
@@ -177,7 +177,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
177177

178178
# Test with non-existent evals in trustworthy_rag_config
179179
with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):
180-
Validator(codex_access_key="test", bad_response_thresholds={"non_existent_eval": 0.5})
180+
Validator(codex_access_key="test", bad_response_thresholds={"non_existent_eval": 0.0})
181181

182182

183183
def test_validator_with_empty_evals(mock_project: Mock) -> None: # noqa: ARG001

0 commit comments

Comments
 (0)