Keep ToolCall hashable when output/inputs hold unhashable objects (#2815)

Zain Dana Harper · claude · Zain Dana Harper · commit 4cefc1cf6549 · 2026-06-29T20:10:39.000-07:00
ToolCorrectnessMetric places ToolCall instances into a set during component-level evaluation. ToolCall.__hash__ routes input_parameters and output through _make_hashable, whose final branch returned arbitrary objects unchanged, assuming they were primitive hashable types. When the output (or a value nested in input_parameters) is an unhashable object, such as a LangChain ToolMessage or a pydantic model that defines __eq__ without __hash__, hashing raised `TypeError: unhashable type`, breaking evals_iterator() on agent traces. Fall back to a stable repr() for any value that is not hashable. Primitives and already-hashable objects are returned unchanged. Adds a regression test covering an unhashable object both as the output and nested in input_parameters. Closes #2815 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
diff --git a/deepeval/test_case/llm_test_case.py b/deepeval/test_case/llm_test_case.py
@@ -229,7 +229,16 @@ def _make_hashable(obj):
         # Handle frozenset that might contain unhashable elements
         return frozenset(_make_hashable(item) for item in obj)
     else:
-        # For primitive hashable types (str, int, float, bool, etc.)
+        # Primitive hashable types (str, int, float, bool, etc.) pass through
+        # unchanged. Some objects reach here while being unhashable, e.g. a
+        # LangChain ToolMessage or a pydantic model that defines __eq__ without
+        # __hash__. Returning those as-is makes ToolCall.__hash__ raise
+        # ``TypeError: unhashable type`` (#2815), so fall back to a stable
+        # string representation for anything that is not hashable.
+        try:
+            hash(obj)
+        except TypeError:
+            return repr(obj)
         return obj
 
 
diff --git a/tests/test_core/test_test_case/test_single_turn.py b/tests/test_core/test_test_case/test_single_turn.py
@@ -450,6 +450,43 @@ def test_tool_call_hashing_with_complex_types(self):
         hash_value = hash(tool_call)
         assert isinstance(hash_value, int)
 
+    def test_tool_call_hashing_with_unhashable_types(self):
+        """Regression test for #2815.
+
+        ToolCall must stay hashable even when its output (or a value nested in
+        input_parameters) is an arbitrary unhashable object, such as a
+        LangChain ToolMessage or a pydantic model that defines __eq__ without
+        __hash__. ToolCorrectnessMetric puts ToolCall instances into a set, so a
+        crash here breaks component-level evaluation of agent traces.
+        """
+
+        class _Unhashable:
+            # Mirrors objects that define __eq__ without __hash__, which Python
+            # then makes unhashable.
+            __hash__ = None
+
+            def __init__(self, value):
+                self.value = value
+
+            def __repr__(self):
+                return f"_Unhashable({self.value!r})"
+
+        # Sanity check: the helper object really is unhashable.
+        with pytest.raises(TypeError):
+            hash(_Unhashable("x"))
+
+        # Unhashable object as the output.
+        tool_output = ToolCall(name="tool", output=_Unhashable("x"))
+        assert isinstance(hash(tool_output), int)
+        assert len({tool_output}) == 1  # usable in a set
+
+        # Unhashable object nested inside input_parameters.
+        tool_input = ToolCall(
+            name="tool", input_parameters={"arg": _Unhashable("y")}
+        )
+        assert isinstance(hash(tool_input), int)
+        assert len({tool_input}) == 1
+
     def test_tool_call_repr(self):
         tool_call = ToolCall(
             name="test_tool",