Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions libs/core/langchain_core/document_loaders/langsmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import datetime
import json
import uuid
from collections.abc import Callable, Iterator, Sequence
from collections.abc import Callable, Iterator, Mapping, Sequence
from typing import Any

from langsmith import Client as LangSmithClient
Expand Down Expand Up @@ -94,7 +94,11 @@ def __init__(
ValueError: If both `client` and `client_kwargs` are provided.
""" # noqa: E501
if client and client_kwargs:
raise ValueError
msg = (
"Received both `client` and `client_kwargs`. "
"Pass `client_kwargs` only when `client` is not provided."
)
raise ValueError(msg)
self._client = client or LangSmithClient(**client_kwargs)
self.content_key = list(content_key.split(".")) if content_key else []
self.format_content = format_content or _stringify
Expand Down Expand Up @@ -123,9 +127,7 @@ def lazy_load(self) -> Iterator[Document]:
metadata=self.metadata,
filter=self.filter,
):
content: Any = example.inputs
for key in self.content_key:
content = content[key]
content = _get_content_from_inputs(example.inputs, self.content_key)
content_str = self.format_content(content)
metadata = pydantic_to_dict(example)
# Stringify datetime and UUID types.
Expand All @@ -134,6 +136,44 @@ def lazy_load(self) -> Iterator[Document]:
yield Document(content_str, metadata=metadata)


def _get_content_from_inputs(inputs: Any, content_key: Sequence[str]) -> Any:
"""Resolve nested example input content for `LangSmithLoader`.

Args:
inputs: Example input payload returned by LangSmith.
content_key: Ordered key path used to extract the document content.

Returns:
The extracted content value.

Raises:
KeyError: If the requested `content_key` path cannot be resolved.
"""
content = inputs
full_path = ".".join(content_key)
traversed_keys: list[str] = []

for key in content_key:
if not isinstance(content, Mapping):
current_path = ".".join(traversed_keys) or "<root>"
msg = (
f"Could not resolve content_key {full_path!r}: expected a mapping at "
f"{current_path!r}, but found {type(content).__name__}."
)
raise KeyError(msg)
if key not in content:
current_path = ".".join(traversed_keys) or "<root>"
msg = (
f"Could not resolve content_key {full_path!r}: missing key {key!r} "
f"under {current_path!r}."
)
raise KeyError(msg)
content = content[key]
traversed_keys.append(key)

return content


def _stringify(x: str | dict[str, Any]) -> str:
if isinstance(x, str):
return x
Expand Down
20 changes: 20 additions & 0 deletions libs/core/tests/unit_tests/document_loaders/test_langsmith.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid
from unittest.mock import MagicMock, patch

import pytest
from langsmith.schemas import Example

from langchain_core.document_loaders import LangSmithLoader
Expand All @@ -13,6 +14,13 @@ def test_init() -> None:
LangSmithLoader(api_key="secret")


def test_init_with_client_and_client_kwargs_raises() -> None:
client = MagicMock()

with pytest.raises(ValueError, match="Received both `client` and `client_kwargs`"):
LangSmithLoader(client=client, api_key="secret")


EXAMPLES = [
Example(
inputs={"first": {"second": "foo"}},
Expand Down Expand Up @@ -60,3 +68,15 @@ def test_lazy_load() -> None:
)
actual = list(loader.lazy_load())
assert expected == actual


@patch("langsmith.Client.list_examples", MagicMock(return_value=iter(EXAMPLES[:1])))
def test_lazy_load_with_missing_content_key_raises() -> None:
loader = LangSmithLoader(
api_key="dummy",
dataset_id="mock",
content_key="first.third",
)

with pytest.raises(KeyError, match=r"Could not resolve content_key 'first\.third'"):
list(loader.lazy_load())
Loading