Skip to content

Commit 95fe150

Browse files
alvinttanggambletanmdrxy
authored
fix(core): _parse_google_docstring mishandling continuation lines with colons (#35680)
## Description `_parse_google_docstring` incorrectly parses multi-line argument descriptions when a continuation line contains a colon. The continuation line is treated as a new argument definition instead of being appended to the current argument's description. ### Example ```python def search(query: str, top_k: int = 5) -> str: """Search the knowledge base. Args: query: The search query to use for finding things: important ones top_k: Number of results to return """ ``` **Before (broken):** The parser creates 3 args: `query`, `for finding things`, `top_k` **After (fixed):** The parser correctly creates 2 args: `query` (with full description including "for finding things: important ones"), `top_k` ### Root Cause The parser used `if ":" in line` to detect new argument lines without considering indentation. In Google-style docstrings, continuation lines have deeper indentation than argument definition lines. ### Fix Detect the base indentation level from the first argument line and treat any line with deeper indentation as a continuation of the current argument's description, regardless of whether it contains a colon. ## Issue Fixes #35679 ## Dependencies None. ## Testing Added 4 unit tests in `test_function_calling.py::TestParseGoogleDocstring`: - `test_continuation_line_with_colon` — the core bug scenario - `test_simple_args_still_work` — regression check for basic args - `test_continuation_line_without_colon` — multi-line descriptions without colons - `test_multiple_continuation_lines_with_colons` — multiple continuation lines each containing colons All tests pass locally with Python 3.12. --------- Co-authored-by: gambletan <ethanchang32@gmail.com> Co-authored-by: Mason Daugherty <github@mdrxy.com>
1 parent 64177b6 commit 95fe150

2 files changed

Lines changed: 140 additions & 3 deletions

File tree

libs/core/langchain_core/utils/function_calling.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,11 +781,27 @@ def _parse_google_docstring(
781781
raise ValueError(msg)
782782
description = ""
783783
args_block = None
784-
arg_descriptions = {}
784+
arg_descriptions: dict[str, str] = {}
785785
if args_block:
786-
arg = None
786+
arg: str | None = None
787+
# Base indentation, latched once from the first argument line, lets us
788+
# distinguish new argument definitions from continuation lines. This
789+
# assumes Google-style uniform indentation of argument names: a line
790+
# indented deeper than the first argument is treated as a continuation
791+
# (even if it contains a colon), so a more-indented later `name:` line
792+
# in a malformed, non-uniformly-indented block folds into the previous
793+
# argument rather than starting a new one.
794+
arg_indent: int | None = None
787795
for line in args_block.split("\n")[1:]:
788-
if ":" in line:
796+
if not line.strip():
797+
continue
798+
current_indent = len(line) - len(line.lstrip())
799+
if arg_indent is None and ":" in line:
800+
arg_indent = current_indent
801+
is_continuation = arg_indent is not None and current_indent > arg_indent
802+
if arg is not None and is_continuation:
803+
arg_descriptions[arg] += " " + line.strip()
804+
elif ":" in line:
789805
arg, desc = line.split(":", maxsplit=1)
790806
arg = arg.strip()
791807
arg_name, _, annotations_ = arg.partition(" ")

libs/core/tests/unit_tests/utils/test_function_calling.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from langchain_core.tools import BaseTool, StructuredTool, Tool, tool
3131
from langchain_core.utils.function_calling import (
3232
_convert_typed_dict_to_openai_function,
33+
_parse_google_docstring,
3334
convert_to_json_schema,
3435
convert_to_openai_function,
3536
convert_to_openai_tool,
@@ -1250,6 +1251,126 @@ def test_convert_to_openai_function_json_schema_missing_title_includes_schema()
12501251
convert_to_openai_function(schema_without_title)
12511252

12521253

1254+
class TestParseGoogleDocstring:
1255+
"""Tests for _parse_google_docstring continuation-line handling."""
1256+
1257+
def test_continuation_line_with_colon(self) -> None:
1258+
"""Continuation lines containing colons should not be treated as new args."""
1259+
# inspect.getdoc() returns dedented docstrings, so match that format
1260+
docstring = (
1261+
"Search the knowledge base.\n"
1262+
"\n"
1263+
"Args:\n"
1264+
" query: The search query to use\n"
1265+
" for finding things: important ones\n"
1266+
" top_k: Number of results to return"
1267+
)
1268+
_desc, args = _parse_google_docstring(docstring, ["query", "top_k"])
1269+
assert "query" in args
1270+
assert "top_k" in args
1271+
assert len(args) == 2
1272+
assert "for finding things: important ones" in args["query"]
1273+
1274+
def test_simple_args_still_work(self) -> None:
1275+
"""Basic single-line argument descriptions should still parse correctly."""
1276+
docstring = "Do something.\n\nArgs:\n x: The x value\n y: The y value"
1277+
_desc, args = _parse_google_docstring(docstring, ["x", "y"])
1278+
assert args == {"x": "The x value", "y": "The y value"}
1279+
1280+
def test_continuation_line_without_colon(self) -> None:
1281+
"""Colon-free continuation lines append to the current arg.
1282+
1283+
Documents preserved behavior: this case parsed correctly before the
1284+
continuation-detection fix (via the colon-free fallback branch) and
1285+
must continue to.
1286+
"""
1287+
docstring = (
1288+
"Do something.\n"
1289+
"\n"
1290+
"Args:\n"
1291+
" name: A very long description that\n"
1292+
" spans multiple lines\n"
1293+
" age: The age"
1294+
)
1295+
_desc, args = _parse_google_docstring(docstring, ["name", "age"])
1296+
assert "spans multiple lines" in args["name"]
1297+
assert args["age"] == "The age"
1298+
1299+
def test_multiple_continuation_lines_with_colons(self) -> None:
1300+
"""Multiple continuation lines with colons should all be appended."""
1301+
docstring = (
1302+
"Process data.\n"
1303+
"\n"
1304+
"Args:\n"
1305+
" config: Configuration string in format\n"
1306+
" key1: value1\n"
1307+
" key2: value2\n"
1308+
" verbose: Enable verbose output"
1309+
)
1310+
_desc, args = _parse_google_docstring(docstring, ["config", "verbose"])
1311+
assert "key1: value1" in args["config"]
1312+
assert "key2: value2" in args["config"]
1313+
assert args["verbose"] == "Enable verbose output"
1314+
1315+
def test_annotated_arg_with_colon_continuation(self) -> None:
1316+
"""A `(type)` annotation strips correctly alongside a colon continuation.
1317+
1318+
Exercises both code paths the fix touches at once: the parenthesized
1319+
type annotation is stripped from the arg name, and the colon-bearing
1320+
continuation line folds into that arg rather than creating a phantom
1321+
key (the original bug).
1322+
"""
1323+
docstring = (
1324+
"Run a query.\n"
1325+
"\n"
1326+
"Args:\n"
1327+
" query (str): The query to run\n"
1328+
" details: extra info\n"
1329+
" k (int): Number of results"
1330+
)
1331+
_desc, args = _parse_google_docstring(docstring, ["query", "k"])
1332+
assert set(args) == {"query", "k"}
1333+
assert "details: extra info" in args["query"]
1334+
assert args["k"] == "Number of results"
1335+
1336+
def test_returns_section_after_args_excluded(self) -> None:
1337+
"""A well-formed Returns: block after Args: must not leak in as an arg.
1338+
1339+
The blank line separating the sections terminates the Args block, so
1340+
`Returns`/`Raises` and their indented bodies stay out of
1341+
`arg_descriptions`.
1342+
"""
1343+
docstring = (
1344+
"Do work.\n\nArgs:\n x: The x value\n\nReturns:\n result: yes\n"
1345+
)
1346+
_desc, args = _parse_google_docstring(docstring, ["x"])
1347+
assert args == {"x": "The x value"}
1348+
1349+
def test_same_indent_colon_line_is_new_arg(self) -> None:
1350+
"""A colon line at the base arg indent starts a new arg, not a continuation.
1351+
1352+
Pins the `current_indent > arg_indent` boundary: only deeper-indented
1353+
lines are continuations.
1354+
"""
1355+
docstring = "Do work.\n\nArgs:\n a: first\n b: second"
1356+
_desc, args = _parse_google_docstring(docstring, ["a", "b"])
1357+
assert args == {"a": "first", "b": "second"}
1358+
1359+
def test_more_indented_second_arg_folds_into_previous(self) -> None:
1360+
"""Non-uniform indentation: a deeper second arg folds into the previous one.
1361+
1362+
Documents the intentional trade-off of indentation-based detection.
1363+
Google style requires uniform argument indentation; when a later arg is
1364+
indented deeper than the first, it is indistinguishable from a
1365+
colon-bearing continuation and is merged into the prior arg. This pins
1366+
that behavior so it stays intentional rather than incidental.
1367+
"""
1368+
docstring = "Do work.\n\nArgs:\n x: the x value\n y: the y value"
1369+
_desc, args = _parse_google_docstring(docstring, ["x", "y"])
1370+
assert set(args) == {"x"}
1371+
assert "y: the y value" in args["x"]
1372+
1373+
12531374
def test_convert_to_openai_tool_apply_patch_passthrough() -> None:
12541375
"""Test apply_patch is passed through as an OpenAI built-in tool."""
12551376
tool = {"type": "apply_patch"}

0 commit comments

Comments
 (0)