Skip to content

Commit dc7a009

Browse files
feat(core): introduce ToolSchema as root schema cache; replace TypedDict conversion with TypeAdapter (#37103)
Builds on #37101. --- Two changes in one commit, both motivated by the same principle: a single, clean owner for everything schema-related on a tool. ## `ToolSchema` — the root cache Previously `BaseTool` had three independent `cached_property` slots (`tool_call_schema`, `args`, `_approximate_schema_chars`) that all computed overlapping data and each needed individual invalidation. This PR replaces them with a single `ToolSchema` dataclass and one `tool_schema` cached property that is the sole root: ```python @DataClass class ToolSchema: name: str description: str validator: TypeAdapter # validates tool call inputs json_schema: dict # sent to LLMs pydantic_schema: Any # model class or dict (backward compat) args: dict # properties from json_schema approximate_chars: int # precomputed for token estimation ``` `BaseTool.tool_call_schema`, `BaseTool.args`, and `BaseTool._approximate_schema_chars` are now plain `@property` delegates to `tool_schema`. `__setattr__` only needs to pop one key on mutation instead of four. The `is`-identity caching tests still pass because all delegates read from the same cached `ToolSchema` object. `ToolSchema` is exported from `langchain_core.tools` and can be used directly by integrations that want to consume both the validator and the schema without going through `BaseTool`. ## `TypeAdapter`-based TypedDict conversion `_convert_any_typed_dicts_to_pydantic` was a ~70-line recursive function that converted TypedDicts to throwaway pydantic v1 model classes just to call `.schema()`. Replaced with: ```python adapter = TypeAdapter(typed_dict) schema = adapter.json_schema() ``` Pydantic v2's `TypeAdapter` handles everything the old code did — nested TypedDicts, generic containers, `Annotated` metadata — and also correctly handles `NotRequired` and `Required` annotations, which the v1 path did not. A new test `test__convert_typed_dict_not_required` verifies this: ```python class Tool(TypedDict): required_field: str optional_field: NotRequired[int] result = _convert_typed_dict_to_openai_function(Tool) assert "required_field" in result["parameters"]["required"] assert "optional_field" not in result["parameters"]["required"] ``` Field descriptions from Google-style docstrings and `Annotated[T, ..., "description"]` metadata are preserved by post-processing the schema after generation. The old `test__convert_typed_dict_to_openai_function_fail` test expected a `TypeError` for `MutableSet` because pydantic v1 didn't support it. pydantic v2 does; the test is updated to verify successful conversion instead. ## What stays unchanged - All public `BaseTool` API signatures — `tool_call_schema`, `args`, `get_input_schema()` all have the same signatures and return types as before. - `pydantic.v1` acceptance for `args_schema` — tools with v1 model schemas continue to work. > AI-agent assisted contribution. --------- Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent c832f7c commit dc7a009

6 files changed

Lines changed: 308 additions & 363 deletions

File tree

libs/core/langchain_core/tools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"StructuredTool",
5252
"Tool",
5353
"ToolException",
54+
"ToolSchema",
5455
"ToolsRenderer",
5556
"_get_runnable_config_param",
5657
"convert_runnable_to_tool",
@@ -70,6 +71,7 @@
7071
"InjectedToolCallId": "base",
7172
"SchemaAnnotationError": "base",
7273
"ToolException": "base",
74+
"ToolSchema": "schema",
7375
"_get_runnable_config_param": "base",
7476
"create_schema_from_function": "base",
7577
"convert_runnable_to_tool": "convert",

libs/core/langchain_core/tools/base.py

Lines changed: 80 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ConfigDict,
3030
Field,
3131
SkipValidation,
32+
TypeAdapter,
3233
ValidationError,
3334
create_model,
3435
)
@@ -69,6 +70,8 @@
6970
import uuid
7071
from collections.abc import Sequence
7172

73+
from langchain_core.tools.schema import ToolSchema
74+
7275
FILTERED_ARGS = ("run_manager", "callbacks")
7376
TOOL_MESSAGE_BLOCK_TYPES = (
7477
"text",
@@ -495,65 +498,100 @@ def __init__(self, **kwargs: Any) -> None:
495498
def __setattr__(self, name: str, value: object) -> None:
496499
"""Clear schema caches when schema-influencing fields are mutated."""
497500
if name in self._SCHEMA_INVALIDATING_FIELDS:
498-
self.__dict__.pop("tool_call_schema", None)
499-
self.__dict__.pop("args", None)
501+
# tool_schema is the single root cache; _inferred_input_schema is
502+
# kept separate since it's also used outside the tool_schema path.
503+
self.__dict__.pop("tool_schema", None)
500504
self.__dict__.pop("_inferred_input_schema", None)
501-
self.__dict__.pop("_approximate_schema_chars", None)
502505
super().__setattr__(name, value)
503506

507+
@functools.cached_property
508+
def tool_schema(self) -> ToolSchema:
509+
"""Unified schema object — the single root cache for this tool's schema.
510+
511+
Owns input validation (`TypeAdapter`), the JSON schema for LLM APIs,
512+
the args properties dict, and the approximate char count for token
513+
estimation. All other schema properties on `BaseTool` delegate here;
514+
only this property needs to be invalidated on mutation.
515+
516+
Returns:
517+
A `ToolSchema` instance for this tool.
518+
"""
519+
from langchain_core.tools.schema import ToolSchema # noqa: PLC0415
520+
521+
# Compute pydantic_schema — the ArgsSchema (model class or dict) for
522+
# backward compatibility with callers that inspect the type.
523+
if isinstance(self.args_schema, dict):
524+
pydantic_schema: ArgsSchema = (
525+
{**self.args_schema, "description": self.description}
526+
if self.description
527+
else self.args_schema
528+
)
529+
else:
530+
full_schema = self.get_input_schema()
531+
fields = [
532+
n
533+
for n, t in get_all_basemodel_annotations(full_schema).items()
534+
if not _is_injected_arg_type(t)
535+
]
536+
pydantic_schema = _create_subset_model(
537+
self.name, full_schema, fields, fn_description=self.description
538+
)
539+
540+
if isinstance(pydantic_schema, dict):
541+
json_schema: dict = pydantic_schema
542+
elif hasattr(pydantic_schema, "model_json_schema"):
543+
json_schema = pydantic_schema.model_json_schema()
544+
else:
545+
json_schema = pydantic_schema.schema() # type: ignore[deprecated] # pydantic v1
546+
args = cast("dict", json_schema.get("properties", {}))
547+
payload = {
548+
"name": self.name,
549+
"description": self.description,
550+
"schema": json_schema,
551+
}
552+
approximate_chars = len(json.dumps(payload, default=str))
553+
554+
return ToolSchema(
555+
name=self.name,
556+
description=self.description or "",
557+
validator=TypeAdapter(self.get_input_schema()),
558+
json_schema=json_schema,
559+
pydantic_schema=pydantic_schema,
560+
args=args,
561+
approximate_chars=approximate_chars,
562+
)
563+
504564
@property
505-
def is_single_input(self) -> bool:
506-
"""Check if the tool accepts only a single input argument.
565+
def tool_call_schema(self) -> ArgsSchema:
566+
"""The schema for tool calls, excluding injected arguments.
507567
508568
Returns:
509-
`True` if the tool has only one input argument, `False` otherwise.
569+
The schema used for tool calls from language models.
510570
"""
511-
keys = {k for k in self.args if k != "kwargs"}
512-
return len(keys) == 1
571+
return self.tool_schema.pydantic_schema # type: ignore[no-any-return]
513572

514-
@functools.cached_property
573+
@property
515574
def args(self) -> dict:
516-
"""Get the tool's input arguments schema.
575+
"""The tool's input argument properties.
517576
518577
Returns:
519578
`dict` containing the tool's argument properties.
520579
"""
521-
if isinstance(self.args_schema, dict):
522-
json_schema = self.args_schema
523-
elif self.args_schema and issubclass(self.args_schema, BaseModelV1):
524-
json_schema = self.args_schema.schema()
525-
else:
526-
input_schema = self.tool_call_schema
527-
if isinstance(input_schema, dict):
528-
json_schema = input_schema
529-
else:
530-
json_schema = input_schema.model_json_schema()
531-
return cast("dict", json_schema["properties"])
580+
return self.tool_schema.args
532581

533-
@functools.cached_property
534-
def tool_call_schema(self) -> ArgsSchema:
535-
"""Get the schema for tool calls, excluding injected arguments.
582+
@property
583+
def _approximate_schema_chars(self) -> int:
584+
return self.tool_schema.approximate_chars
585+
586+
@property
587+
def is_single_input(self) -> bool:
588+
"""Check if the tool accepts only a single input argument.
536589
537590
Returns:
538-
The schema that should be used for tool calls from language models.
591+
`True` if the tool has only one input argument, `False` otherwise.
539592
"""
540-
if isinstance(self.args_schema, dict):
541-
if self.description:
542-
return {
543-
**self.args_schema,
544-
"description": self.description,
545-
}
546-
547-
return self.args_schema
548-
549-
full_schema = self.get_input_schema()
550-
fields = []
551-
for name, type_ in get_all_basemodel_annotations(full_schema).items():
552-
if not _is_injected_arg_type(type_):
553-
fields.append(name)
554-
return _create_subset_model(
555-
self.name, full_schema, fields, fn_description=self.description
556-
)
593+
keys = {k for k in self.args if k != "kwargs"}
594+
return len(keys) == 1
557595

558596
@functools.cached_property
559597
def _injected_args_keys(self) -> frozenset[str]:
@@ -583,18 +621,6 @@ def _inferred_input_schema(self) -> type[BaseModel]:
583621
"""Schema inferred from `_run` signature; computed once."""
584622
return create_schema_from_function(self.name, self._run)
585623

586-
@functools.cached_property
587-
def _approximate_schema_chars(self) -> int:
588-
"""Cached char count of the neutral tool payload for token estimation."""
589-
schema = self.tool_call_schema
590-
schema_dict = schema if isinstance(schema, dict) else schema.model_json_schema()
591-
payload = {
592-
"name": self.name,
593-
"description": self.description,
594-
"schema": schema_dict,
595-
}
596-
return len(json.dumps(payload, default=str))
597-
598624
@override
599625
def invoke(
600626
self,
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Schema dataclass for LangChain tool definitions."""
2+
3+
from __future__ import annotations
4+
5+
from dataclasses import dataclass
6+
from typing import TYPE_CHECKING, Any
7+
8+
if TYPE_CHECKING:
9+
from pydantic import TypeAdapter
10+
11+
12+
@dataclass
13+
class ToolSchema:
14+
"""Unified schema representation for a LangChain tool.
15+
16+
This is the single source of truth for a tool's schema, validation, and
17+
token-estimation data. `BaseTool.tool_schema` is the one cached property;
18+
`tool_call_schema`, `args`, and `_approximate_schema_chars` are plain
19+
properties that delegate here.
20+
21+
Attributes:
22+
name: The tool name.
23+
description: The tool description sent to the LLM.
24+
validator: A `TypeAdapter` for validating and coercing tool call inputs.
25+
json_schema: Pre-computed JSON schema dict describing the tool's
26+
parameters, suitable for passing directly to an LLM's tool/function
27+
calling API.
28+
pydantic_schema: The Pydantic model class or dict that backs
29+
`json_schema`. Preserved for backward compatibility with callers of
30+
`tool_call_schema` that check `issubclass(schema, BaseModel)`.
31+
args: Pre-computed properties dict (the `"properties"` field of
32+
`json_schema`), used by `BaseTool.args`.
33+
approximate_chars: Pre-computed char count of the neutral tool payload
34+
(name + description + schema), used for token estimation.
35+
"""
36+
37+
name: str
38+
description: str
39+
validator: TypeAdapter
40+
json_schema: dict[str, Any]
41+
pydantic_schema: Any
42+
args: dict[str, Any]
43+
approximate_chars: int
44+
45+
def validate_python(self, data: Any) -> Any:
46+
"""Validate and coerce tool call input data.
47+
48+
Args:
49+
data: Raw input data to validate.
50+
51+
Returns:
52+
Validated data, coerced to the expected types.
53+
"""
54+
return self.validator.validate_python(data)

libs/core/langchain_core/utils/function_calling.py

Lines changed: 44 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
from pydantic import BaseModel
2525
from pydantic.errors import PydanticInvalidForJsonSchema
2626
from pydantic.v1 import BaseModel as BaseModelV1
27-
from pydantic.v1 import Field as Field_v1
28-
from pydantic.v1 import create_model as create_model_v1
2927
from typing_extensions import TypedDict, is_typeddict
3028

3129
import langchain_core
@@ -243,86 +241,55 @@ def _convert_python_function_to_openai_function(
243241

244242

245243
def _convert_typed_dict_to_openai_function(typed_dict: type) -> FunctionDescription:
246-
visited: dict = {}
244+
"""Convert a TypedDict to an OpenAI function description using `TypeAdapter`.
247245
248-
model = cast(
249-
"type[BaseModel]",
250-
_convert_any_typed_dicts_to_pydantic(typed_dict, visited=visited),
251-
)
252-
return _convert_pydantic_to_openai_function(model)
246+
Uses pydantic v2's `TypeAdapter` directly rather than creating an
247+
intermediate Pydantic model class. This correctly handles `NotRequired`
248+
and `Required` annotations, nested TypedDicts, and `Annotated` metadata.
253249
250+
Field descriptions are sourced from:
251+
1. Google-style docstring arg descriptions.
252+
2. Plain string metadata in `Annotated[T, ..., "description"]` annotations.
253+
3. `Field(description=...)` metadata (handled natively by `TypeAdapter`).
254+
"""
255+
from pydantic import TypeAdapter # noqa: PLC0415
254256

255-
_MAX_TYPED_DICT_RECURSION = 25
257+
adapter: TypeAdapter = TypeAdapter(typed_dict)
258+
schema = adapter.json_schema()
256259

260+
docstring = inspect.getdoc(typed_dict)
261+
try:
262+
annotations_ = get_type_hints(typed_dict, include_extras=True)
263+
except Exception:
264+
annotations_ = getattr(typed_dict, "__annotations__", {})
257265

258-
def _convert_any_typed_dicts_to_pydantic(
259-
type_: type,
260-
*,
261-
visited: dict[type, type],
262-
depth: int = 0,
263-
) -> type:
264-
if type_ in visited:
265-
return visited[type_]
266-
if depth >= _MAX_TYPED_DICT_RECURSION:
267-
return type_
268-
if is_typeddict(type_):
269-
typed_dict = type_
270-
docstring = inspect.getdoc(typed_dict)
271-
# Use get_type_hints to properly resolve forward references and
272-
# string annotations in Python 3.14+ (PEP 649 deferred annotations).
273-
# include_extras=True preserves Annotated metadata.
274-
try:
275-
annotations_ = get_type_hints(typed_dict, include_extras=True)
276-
except Exception:
277-
# Fallback for edge cases where get_type_hints might fail
278-
annotations_ = typed_dict.__annotations__
279-
description, arg_descriptions = _parse_google_docstring(
280-
docstring, list(annotations_)
281-
)
282-
fields: dict = {}
283-
for arg, arg_type in annotations_.items():
284-
if get_origin(arg_type) in {Annotated, typing_extensions.Annotated}:
285-
annotated_args = get_args(arg_type)
286-
new_arg_type = _convert_any_typed_dicts_to_pydantic(
287-
annotated_args[0], depth=depth + 1, visited=visited
288-
)
289-
field_kwargs = dict(
290-
zip(("default", "description"), annotated_args[1:], strict=False)
291-
)
292-
if (field_desc := field_kwargs.get("description")) and not isinstance(
293-
field_desc, str
294-
):
295-
msg = (
296-
f"Invalid annotation for field {arg}. Third argument to "
297-
f"Annotated must be a string description, received value of "
298-
f"type {type(field_desc)}."
299-
)
300-
raise ValueError(msg)
301-
if arg_desc := arg_descriptions.get(arg):
302-
field_kwargs["description"] = arg_desc
303-
fields[arg] = (new_arg_type, Field_v1(**field_kwargs))
304-
else:
305-
new_arg_type = _convert_any_typed_dicts_to_pydantic(
306-
arg_type, depth=depth + 1, visited=visited
307-
)
308-
field_kwargs = {"default": ...}
309-
if arg_desc := arg_descriptions.get(arg):
310-
field_kwargs["description"] = arg_desc
311-
fields[arg] = (new_arg_type, Field_v1(**field_kwargs))
312-
model = cast(
313-
"type[BaseModelV1]", create_model_v1(typed_dict.__name__, **fields)
314-
)
315-
model.__doc__ = description
316-
visited[typed_dict] = model
317-
return model
318-
if (origin := get_origin(type_)) and (type_args := get_args(type_)):
319-
subscriptable_origin = _py_38_safe_origin(origin)
320-
type_args = tuple(
321-
_convert_any_typed_dicts_to_pydantic(arg, depth=depth + 1, visited=visited)
322-
for arg in type_args
323-
)
324-
return cast("type", subscriptable_origin[type_args]) # type: ignore[index]
325-
return type_
266+
description, arg_descriptions = _parse_google_docstring(
267+
docstring, list(annotations_)
268+
)
269+
270+
# Extract plain-string descriptions from Annotated[T, ..., "description"] style.
271+
for field_name, annotation in annotations_.items():
272+
if field_name in arg_descriptions:
273+
continue
274+
if get_origin(annotation) in {Annotated, typing_extensions.Annotated}:
275+
for meta in get_args(annotation)[1:]:
276+
if isinstance(meta, str):
277+
arg_descriptions[field_name] = meta
278+
break
279+
280+
# Inject descriptions into schema properties.
281+
if arg_descriptions and "properties" in schema:
282+
for field_name, field_desc in arg_descriptions.items():
283+
if field_name in schema["properties"] and isinstance(
284+
schema["properties"][field_name], dict
285+
):
286+
schema["properties"][field_name].setdefault("description", field_desc)
287+
288+
return _convert_json_schema_to_openai_function(
289+
schema,
290+
name=typed_dict.__name__,
291+
description=description or None,
292+
)
326293

327294

328295
def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:

0 commit comments

Comments
 (0)