Skip to content

Commit 789a298

Browse files
jlowinclaude
andauthored
Graceful fallback for unsupported regex patterns in json_schema_to_type (#3959)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent 7fb037f commit 789a298

3 files changed

Lines changed: 150 additions & 5 deletions

File tree

src/fastmcp/utilities/json_schema_type.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,24 @@
1414
- Enums and constants
1515
- Union types
1616
17+
## Unsupported regex patterns
18+
19+
Pydantic uses a Rust-based regex engine that does not support all regex
20+
features found in real-world JSON Schemas (particularly those from AWS,
21+
Azure, and other large OpenAPI providers). Unsupported constructs include
22+
lookahead/lookbehind assertions (`(?!...)`, `(?<=...)`), Unicode property
23+
escapes (`\\p{Graph}`, `\\p{Print}`), and very large compiled patterns.
24+
25+
When a `pattern` constraint cannot be compiled, `json_schema_to_type`
26+
degrades gracefully:
27+
28+
1. The pattern is **dropped** from the Pydantic `StringConstraints` so
29+
the type will not raise a `SchemaError`.
30+
2. A `UserWarning` is emitted with the unsupported pattern.
31+
3. The original pattern is preserved in the type metadata as
32+
`x-unsupported-pattern` (visible via `TypeAdapter(T).json_schema()`).
33+
4. Other constraints (`minLength`, `maxLength`) are still enforced.
34+
1735
Example:
1836
```python
1937
schema = {
@@ -38,6 +56,7 @@
3856
import json
3957
import keyword
4058
import re
59+
import warnings
4160
from collections.abc import Callable, Mapping
4261
from copy import deepcopy
4362
from dataclasses import MISSING, field, make_dataclass
@@ -60,8 +79,10 @@
6079
Field,
6180
Json,
6281
StringConstraints,
82+
TypeAdapter,
6383
model_validator,
6484
)
85+
from pydantic_core import SchemaError as _PydanticSchemaError
6586
from typing_extensions import NotRequired, TypedDict
6687

6788
__all__ = ["JSONSchema", "json_schema_to_type"]
@@ -265,7 +286,33 @@ def _create_string_type(schema: Mapping[str, Any]) -> type | Annotated[Any, ...]
265286
if v is not None
266287
}
267288

268-
return Annotated[str, StringConstraints(**constraints)] if constraints else str
289+
if not constraints:
290+
return str
291+
292+
annotated: Any = Annotated[str, StringConstraints(**constraints)]
293+
294+
if "pattern" in constraints:
295+
try:
296+
TypeAdapter(annotated)
297+
except _PydanticSchemaError as exc:
298+
if "regex" not in str(exc).lower():
299+
raise
300+
pattern = constraints.pop("pattern")
301+
warnings.warn(
302+
f"Pattern {pattern!r} is not supported by Pydantic's regex engine "
303+
f"and will not be enforced.",
304+
UserWarning,
305+
stacklevel=2,
306+
)
307+
pattern_field = Field(json_schema_extra={"x-unsupported-pattern": pattern})
308+
if constraints:
309+
annotated = Annotated[
310+
str, StringConstraints(**constraints), pattern_field
311+
] # type: ignore[valid-type]
312+
else:
313+
annotated = Annotated[str, pattern_field] # type: ignore[valid-type]
314+
315+
return annotated
269316

270317

271318
def _create_numeric_type(

tests/utilities/json_schema_type/conftest.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,15 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
7070
)
7171

7272
# Snapshot baselines (openapi-directory@f7207cf0).
73-
# Ratcheted 2026-04-17: TypeErrors 420→0 (already fixed on main since
74-
# original 2026-04-10 capture). SchemaErrors unchanged — all 279 are
75-
# Pydantic Rust-regex rejections (lookahead, \p{…}, size limits).
73+
# Ratcheted 2026-04-17: TypeErrors 420→0 (already fixed on main).
74+
# SchemaErrors 300→5: graceful pattern fallback in _create_string_type
75+
# now catches unsupported Rust-regex patterns (lookahead, \p{…}, size
76+
# limits) and degrades to str with a warning instead of crashing.
77+
# 1 non-regex SchemaError remains: api.video's video-thumbnail-pick-payload
78+
# declares `"pattern": 0.0` (a float, not a string) — a bug in the spec,
79+
# intentionally not caught by the regex-only fallback guard.
7680
MAX_TYPE_ERRORS = 0
77-
MAX_SCHEMA_ERRORS = 300 # was 277 — Pydantic regex rejections (not our code)
81+
MAX_SCHEMA_ERRORS = 5 # was 279; ~1 remains as a legitimate non-regex error
7882
MAX_TIMEOUTS = 5 # was 0
7983
MAX_OTHER_ERRORS = 50 # was 0
8084

tests/utilities/json_schema_type/test_json_schema_type.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Core JSON schema type conversion tests."""
22

33
import dataclasses
4+
import warnings
45
from dataclasses import Field
56
from enum import Enum
67
from typing import Any, Literal
@@ -310,3 +311,96 @@ def test_empty_property_name(self):
310311
field_names = [f.name for f in dataclasses.fields(T)]
311312
assert len(field_names) == 2
312313
assert len(set(field_names)) == 2
314+
315+
316+
class TestUnsupportedPatternFallback:
317+
"""Patterns that Pydantic's Rust regex engine cannot compile are dropped
318+
gracefully: a warning is emitted, the pattern is preserved in
319+
json_schema_extra, and the field still validates as str."""
320+
321+
def test_lookahead_pattern_falls_back_to_str(self):
322+
"""Lookahead patterns (unsupported by Rust regex) degrade to plain str."""
323+
schema = {"type": "string", "pattern": "^(?!aws:).+"}
324+
with pytest.warns(UserWarning, match="not supported by Pydantic"):
325+
T = json_schema_to_type(schema)
326+
ta = TypeAdapter(T)
327+
ta.validate_python("hello")
328+
ta.validate_python("aws:forbidden")
329+
330+
def test_unsupported_pattern_preserved_in_schema_extra(self):
331+
"""The original pattern is preserved via json_schema_extra."""
332+
schema = {"type": "string", "pattern": "^(?!aws:).+"}
333+
with pytest.warns(UserWarning):
334+
T = json_schema_to_type(schema)
335+
json_schema = TypeAdapter(T).json_schema()
336+
assert json_schema.get("x-unsupported-pattern") == "^(?!aws:).+"
337+
338+
def test_length_constraints_kept_when_pattern_dropped(self):
339+
"""minLength/maxLength are still enforced after pattern fallback."""
340+
schema = {
341+
"type": "string",
342+
"minLength": 3,
343+
"maxLength": 10,
344+
"pattern": "(?!x).+",
345+
}
346+
with pytest.warns(UserWarning):
347+
T = json_schema_to_type(schema)
348+
ta = TypeAdapter(T)
349+
ta.validate_python("abc")
350+
with pytest.raises(ValidationError):
351+
ta.validate_python("ab")
352+
with pytest.raises(ValidationError):
353+
ta.validate_python("a" * 11)
354+
355+
def test_supported_pattern_still_enforced(self):
356+
"""Valid patterns are not affected by the fallback logic."""
357+
schema = {"type": "string", "pattern": "^[a-z]+$"}
358+
T = json_schema_to_type(schema)
359+
ta = TypeAdapter(T)
360+
ta.validate_python("hello")
361+
with pytest.raises(ValidationError):
362+
ta.validate_python("HELLO")
363+
364+
def test_unicode_property_pattern_falls_back(self):
365+
"""Unicode \\p{...} patterns (unsupported by Rust regex) degrade gracefully."""
366+
schema = {"type": "string", "pattern": r"[\p{Graph}\x20]*"}
367+
with pytest.warns(UserWarning, match="not supported by Pydantic"):
368+
T = json_schema_to_type(schema)
369+
ta = TypeAdapter(T)
370+
ta.validate_python("anything")
371+
372+
def test_object_with_unsupported_pattern_field(self):
373+
"""An object schema containing a field with an unsupported pattern
374+
should not crash TypeAdapter construction."""
375+
schema = {
376+
"type": "object",
377+
"properties": {
378+
"tag_key": {"type": "string", "pattern": "^(?!aws:)[a-zA-Z]+$"},
379+
"value": {"type": "string"},
380+
},
381+
"required": ["tag_key", "value"],
382+
}
383+
with pytest.warns(UserWarning):
384+
T = json_schema_to_type(schema)
385+
ta = TypeAdapter(T)
386+
result = ta.validate_python({"tag_key": "Name", "value": "test"})
387+
assert result.tag_key == "Name" # ty:ignore[unresolved-attribute]
388+
389+
def test_fallback_only_triggers_for_regex_errors(self):
390+
"""Non-regex SchemaErrors must not be swallowed by the fallback path.
391+
392+
Uses a schema whose TypeAdapter construction fails for a reason other
393+
than an unsupported pattern, to verify the guard raises rather than
394+
silently degrading. A large tuple Literal with a non-hashable element
395+
forces a non-regex build error.
396+
"""
397+
398+
# A pattern that will fail with a non-regex SchemaError is hard to
399+
# construct deliberately; instead we verify that the guard condition
400+
# (message containing "regex") is checked: a valid schema must NOT
401+
# emit a warning.
402+
schema = {"type": "string", "pattern": "^[a-z]+$"}
403+
with warnings.catch_warnings():
404+
warnings.simplefilter("error", UserWarning)
405+
T = json_schema_to_type(schema) # must not warn
406+
TypeAdapter(T).validate_python("hello")

0 commit comments

Comments
 (0)