Default threshold to zero (#75)

elisno · ulya-tkch · web-flow · commit e57fd31d1eef · 2025-04-16T23:39:07.000Z
* Update default thresholds in BadResponseThresholds to 0.0 and adjust related tests accordingly

* update changelog

* bump version

* Update src/cleanlab_codex/validator.py

Co-authored-by: Ulyana &lt;ulya.tkach@gmail.com&gt;

---------

Co-authored-by: Ulyana &lt;ulya.tkach@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.0.11] - 2025-04-16
+
+- Update default thresholds for custom evals to 0.0 in `Validator` API.
+
 ## [1.0.10] - 2025-04-15
 
 - Add async support to `Validator` API.
@@ -55,7 +59,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Initial release of the `cleanlab-codex` client library.
 
-[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.10...HEAD
+[Unreleased]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.11...HEAD
+[1.0.11]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.10...v1.0.11
 [1.0.10]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.9...v1.0.10
 [1.0.9]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.8...v1.0.9
 [1.0.8]: https://github.com/cleanlab/cleanlab-codex/compare/v1.0.7...v1.0.8
diff --git a/src/cleanlab_codex/__about__.py b/src/cleanlab_codex/__about__.py
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: MIT
-__version__ = "1.0.10"
+__version__ = "1.0.11"
diff --git a/src/cleanlab_codex/validator.py b/src/cleanlab_codex/validator.py
@@ -278,7 +278,7 @@ class BadResponseThresholds(BaseModel):
     Default Thresholds:
         - trustworthiness: 0.7
         - response_helpfulness: 0.7
-        - Any custom eval: 0.5 (if not explicitly specified in bad_response_thresholds)
+        - Any custom eval: 0.0 (if not explicitly specified in bad_response_thresholds). A threshold of 0.0 means that the associated eval is not used to determine if a response is bad, unless explicitly specified in bad_response_thresholds, but still allow for reporting of those scores.
     """
 
     trustworthiness: float = Field(
@@ -296,8 +296,8 @@ class BadResponseThresholds(BaseModel):
 
     @property
     def default_threshold(self) -> float:
-        """The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.5."""
-        return 0.5
+        """The default threshold to use when an evaluation metric's threshold is not specified. This threshold is set to 0.0."""
+        return 0.0
 
     def get_threshold(self, eval_name: str) -> float:
         """Get threshold for an eval, if it exists.
diff --git a/tests/test_validator.py b/tests/test_validator.py
@@ -23,7 +23,7 @@ def test_default_threshold(self) -> None:
 
     def test_unspecified_threshold(self) -> None:
         thresholds = BadResponseThresholds()
-        assert thresholds.get_threshold("unspecified_threshold") == 0.5
+        assert thresholds.get_threshold("unspecified_threshold") == 0.0
 
     def test_threshold_value(self) -> None:
         thresholds = BadResponseThresholds(valid_threshold=0.3)  # type: ignore
@@ -168,7 +168,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
         assert_threshold_equal(validator, "trustworthiness", 0.7)  # Default should apply
 
         validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": ["non_existent_eval"]})
-        assert_threshold_equal(validator, "non_existent_eval", 0.5)  # Default should apply for undefined thresholds
+        assert_threshold_equal(validator, "non_existent_eval", 0.0)  # Default should apply for undefined thresholds
 
         # No extra Evals
         validator = Validator(codex_access_key="test", trustworthy_rag_config={"evals": []})
@@ -177,7 +177,7 @@ def test_edge_cases(self, mock_project: Mock, mock_trustworthy_rag: Mock) -> Non
 
         # Test with non-existent evals in trustworthy_rag_config
         with pytest.raises(ValueError, match="Found thresholds for metrics that are not available"):
-            Validator(codex_access_key="test", bad_response_thresholds={"non_existent_eval": 0.5})
+            Validator(codex_access_key="test", bad_response_thresholds={"non_existent_eval": 0.0})
 
 
 def test_validator_with_empty_evals(mock_project: Mock) -> None:  # noqa: ARG001

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# SPDX-License-Identifier: MIT`
`2`		`-__version__ = "1.0.10"`
	`2`	`+__version__ = "1.0.11"`