Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions instructor/processing/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,9 +461,9 @@ def handle_response_model(
Mode.MISTRAL_TOOLS: handle_mistral_tools,
Mode.MISTRAL_STRUCTURED_OUTPUTS: handle_mistral_structured_outputs,
Mode.JSON_O1: handle_json_o1,
Mode.JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON), # type: ignore
Mode.MD_JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.MD_JSON), # type: ignore
Mode.JSON_SCHEMA: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON_SCHEMA), # type: ignore
Mode.JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON, nk.pop("json_system_prompt", None)), # type: ignore
Mode.MD_JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.MD_JSON, nk.pop("json_system_prompt", None)), # type: ignore
Mode.JSON_SCHEMA: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON_SCHEMA, nk.pop("json_system_prompt", None)), # type: ignore
Mode.ANTHROPIC_TOOLS: handle_anthropic_tools,
Mode.ANTHROPIC_REASONING_TOOLS: handle_anthropic_reasoning_tools,
Mode.ANTHROPIC_JSON: handle_anthropic_json,
Expand Down
49 changes: 39 additions & 10 deletions instructor/providers/openai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,10 @@ def handle_json_o1(


def handle_json_modes(
response_model: type[Any] | None, new_kwargs: dict[str, Any], mode: Mode
response_model: type[Any] | None,
new_kwargs: dict[str, Any],
mode: Mode,
json_system_prompt: str | None = None,
) -> tuple[type[Any] | None, dict[str, Any]]:
"""
Handle OpenAI JSON modes (JSON, MD_JSON, JSON_SCHEMA).
Expand All @@ -500,20 +503,45 @@ def handle_json_modes(
- Mode.JSON_SCHEMA: Adds "response_format" with json_schema
- Mode.JSON: Adds "response_format" with type="json_object", modifies system message
- Mode.MD_JSON: Appends user message for markdown JSON response

Args:
response_model: The Pydantic model to use for parsing responses.
new_kwargs: The kwargs to modify for the API call.
mode: The JSON mode to use (JSON, MD_JSON, or JSON_SCHEMA).
json_system_prompt: Custom system prompt for JSON mode. Use {schema} placeholder
for the JSON schema. If None, uses the default prompt. If empty string "",
no system prompt modification is made. Example:
"You are a helpful assistant. Respond with JSON matching: {schema}"
"""
if response_model is None:
return None, new_kwargs

message = dedent(
f"""
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n
# Generate the JSON schema string
json_schema = json.dumps(
response_model.model_json_schema(), indent=2, ensure_ascii=False
)

{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
# Determine the system prompt to use
if json_system_prompt == "":
# Empty string means skip system prompt modification entirely
message = None
elif json_system_prompt is not None:
# Custom prompt provided - substitute {schema} placeholder
# Using str.replace() instead of str.format() to safely handle prompts
# that contain curly braces (e.g., JSON examples like {"id": 1})
message = json_system_prompt.replace("{schema}", json_schema)
else:
# Default prompt (backward compatible)
message = dedent(
f"""
As a genius expert, your task is to understand the content and provide
the parsed objects in json that match the following json_schema:\n

Make sure to return an instance of the JSON, not the schema itself
"""
)
{json_schema}

Make sure to return an instance of the JSON, not the schema itself
"""
)

if mode == Mode.JSON:
new_kwargs["response_format"] = {"type": "json_object"}
Expand All @@ -534,7 +562,8 @@ def handle_json_modes(
)
new_kwargs["messages"] = merge_consecutive_messages(new_kwargs["messages"])

if mode != Mode.JSON_SCHEMA:
# Only modify system message if we have a message to add
if message is not None and mode != Mode.JSON_SCHEMA:
if new_kwargs["messages"][0]["role"] != "system":
new_kwargs["messages"].insert(
0,
Expand Down
149 changes: 149 additions & 0 deletions tests/test_json_system_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""
Tests for json_system_prompt customization feature.

Tests verify that:
- Users can customize the JSON mode system prompt
- {schema} placeholder is correctly substituted
- Empty string disables system prompt modification
- Default behavior is backward compatible
"""

import json
import pytest
from unittest.mock import MagicMock

from instructor.providers.openai.utils import handle_json_modes
from instructor.mode import Mode
from pydantic import BaseModel


class SimpleModel(BaseModel):
"""Test model for JSON schema generation."""
name: str
age: int


class TestJsonSystemPromptCustomization:
"""Tests for Issue #1514 - Customizable JSON mode system prompt."""

def test_default_prompt_backward_compatible(self):
"""Default behavior should be unchanged (backward compatible)."""
new_kwargs = {
"messages": [{"role": "user", "content": "Extract data"}]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.JSON
)

# Should have inserted system message
assert result_kwargs["messages"][0]["role"] == "system"
# Should contain the default "genius expert" phrase
assert "genius expert" in result_kwargs["messages"][0]["content"]
# Should contain the JSON schema
assert "SimpleModel" in result_kwargs["messages"][0]["content"] or \
"name" in result_kwargs["messages"][0]["content"]

def test_custom_prompt_with_schema_placeholder(self):
"""Custom prompt with {schema} placeholder should work."""
custom_prompt = "You are a helpful assistant. Return JSON matching:\n{schema}"
new_kwargs = {
"messages": [{"role": "user", "content": "Extract data"}]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=custom_prompt
)

# Should have inserted system message
assert result_kwargs["messages"][0]["role"] == "system"
# Should NOT contain the default phrase
assert "genius expert" not in result_kwargs["messages"][0]["content"]
# Should contain our custom text
assert "helpful assistant" in result_kwargs["messages"][0]["content"]
# Schema should be substituted
assert "name" in result_kwargs["messages"][0]["content"]

def test_empty_string_skips_system_prompt(self):
"""Empty string should skip system prompt modification entirely."""
new_kwargs = {
"messages": [{"role": "user", "content": "Extract data"}]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=""
)

# Should NOT have inserted system message
assert result_kwargs["messages"][0]["role"] == "user"
# Original message should be unchanged
assert result_kwargs["messages"][0]["content"] == "Extract data"

def test_custom_prompt_appends_to_existing_system(self):
"""Custom prompt should append to existing system message."""
custom_prompt = "Respond with JSON: {schema}"
new_kwargs = {
"messages": [
{"role": "system", "content": "You are a pirate."},
{"role": "user", "content": "Tell me about treasure"}
]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=custom_prompt
)

# System message should be preserved and extended
assert result_kwargs["messages"][0]["role"] == "system"
assert "pirate" in result_kwargs["messages"][0]["content"]
assert "JSON" in result_kwargs["messages"][0]["content"]

def test_json_schema_mode_ignores_system_prompt(self):
"""JSON_SCHEMA mode uses response_format, not system prompt modification."""
custom_prompt = "Custom prompt {schema}"
new_kwargs = {
"messages": [{"role": "user", "content": "Extract data"}]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.JSON_SCHEMA, json_system_prompt=custom_prompt
)

# Should have response_format set
assert "response_format" in result_kwargs
assert result_kwargs["response_format"]["type"] == "json_schema"
# System message should NOT be inserted for JSON_SCHEMA mode
assert result_kwargs["messages"][0]["role"] == "user"

def test_md_json_mode_with_custom_prompt(self):
"""MD_JSON mode should work with custom prompt."""
custom_prompt = "Return markdown JSON: {schema}"
new_kwargs = {
"messages": [{"role": "user", "content": "Extract data"}]
}

response_model, result_kwargs = handle_json_modes(
SimpleModel, new_kwargs, Mode.MD_JSON, json_system_prompt=custom_prompt
)

# Should have system message with custom prompt
assert result_kwargs["messages"][0]["role"] == "system"
assert "markdown JSON" in result_kwargs["messages"][0]["content"]

def test_none_response_model_returns_early(self):
"""None response_model should return early without modification."""
new_kwargs = {
"messages": [{"role": "user", "content": "Hello"}]
}

response_model, result_kwargs = handle_json_modes(
None, new_kwargs, Mode.JSON, json_system_prompt="custom"
)

assert response_model is None
# Messages should be unchanged
assert result_kwargs["messages"][0]["role"] == "user"


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading