567-labs · Waqar53 · Jan 25, 2026 · Jan 25, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/instructor/processing/response.py b/instructor/processing/response.py
@@ -461,9 +461,9 @@ def handle_response_model(
         Mode.MISTRAL_TOOLS: handle_mistral_tools,
         Mode.MISTRAL_STRUCTURED_OUTPUTS: handle_mistral_structured_outputs,
         Mode.JSON_O1: handle_json_o1,
-        Mode.JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON),  # type: ignore
-        Mode.MD_JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.MD_JSON),  # type: ignore
-        Mode.JSON_SCHEMA: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON_SCHEMA),  # type: ignore
+        Mode.JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON, nk.pop("json_system_prompt", None)),  # type: ignore
+        Mode.MD_JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.MD_JSON, nk.pop("json_system_prompt", None)),  # type: ignore
+        Mode.JSON_SCHEMA: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON_SCHEMA, nk.pop("json_system_prompt", None)),  # type: ignore
         Mode.ANTHROPIC_TOOLS: handle_anthropic_tools,
         Mode.ANTHROPIC_REASONING_TOOLS: handle_anthropic_reasoning_tools,
         Mode.ANTHROPIC_JSON: handle_anthropic_json,

diff --git a/instructor/providers/openai/utils.py b/instructor/providers/openai/utils.py
@@ -489,7 +489,10 @@ def handle_json_o1(
 
 
 def handle_json_modes(
-    response_model: type[Any] | None, new_kwargs: dict[str, Any], mode: Mode
+    response_model: type[Any] | None,
+    new_kwargs: dict[str, Any],
+    mode: Mode,
+    json_system_prompt: str | None = None,
 ) -> tuple[type[Any] | None, dict[str, Any]]:
     """
     Handle OpenAI JSON modes (JSON, MD_JSON, JSON_SCHEMA).
@@ -500,20 +503,45 @@ def handle_json_modes(
       - Mode.JSON_SCHEMA: Adds "response_format" with json_schema
       - Mode.JSON: Adds "response_format" with type="json_object", modifies system message
       - Mode.MD_JSON: Appends user message for markdown JSON response
+
+    Args:
+        response_model: The Pydantic model to use for parsing responses.
+        new_kwargs: The kwargs to modify for the API call.
+        mode: The JSON mode to use (JSON, MD_JSON, or JSON_SCHEMA).
+        json_system_prompt: Custom system prompt for JSON mode. Use {schema} placeholder
+            for the JSON schema. If None, uses the default prompt. If empty string "",
+            no system prompt modification is made. Example:
+            "You are a helpful assistant. Respond with JSON matching: {schema}"
     """
     if response_model is None:
         return None, new_kwargs
 
-    message = dedent(
-        f"""
-        As a genius expert, your task is to understand the content and provide
-        the parsed objects in json that match the following json_schema:\n
+    # Generate the JSON schema string
+    json_schema = json.dumps(
+        response_model.model_json_schema(), indent=2, ensure_ascii=False
+    )
 
-        {json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}
+    # Determine the system prompt to use
+    if json_system_prompt == "":
+        # Empty string means skip system prompt modification entirely
+        message = None
+    elif json_system_prompt is not None:
+        # Custom prompt provided - substitute {schema} placeholder
+        # Using str.replace() instead of str.format() to safely handle prompts
+        # that contain curly braces (e.g., JSON examples like {"id": 1})
+        message = json_system_prompt.replace("{schema}", json_schema)
+    else:
+        # Default prompt (backward compatible)
+        message = dedent(
+            f"""
+            As a genius expert, your task is to understand the content and provide
+            the parsed objects in json that match the following json_schema:\n
 
-        Make sure to return an instance of the JSON, not the schema itself
-        """
-    )
+            {json_schema}
+
+            Make sure to return an instance of the JSON, not the schema itself
+            """
+        )
 
     if mode == Mode.JSON:
         new_kwargs["response_format"] = {"type": "json_object"}
@@ -534,7 +562,8 @@ def handle_json_modes(
         )
         new_kwargs["messages"] = merge_consecutive_messages(new_kwargs["messages"])
 
-    if mode != Mode.JSON_SCHEMA:
+    # Only modify system message if we have a message to add
+    if message is not None and mode != Mode.JSON_SCHEMA:
         if new_kwargs["messages"][0]["role"] != "system":
             new_kwargs["messages"].insert(
                 0,

diff --git a/tests/test_json_system_prompt.py b/tests/test_json_system_prompt.py
@@ -0,0 +1,149 @@
+"""
+Tests for json_system_prompt customization feature.
+
+Tests verify that:
+- Users can customize the JSON mode system prompt
+- {schema} placeholder is correctly substituted
+- Empty string disables system prompt modification
+- Default behavior is backward compatible
+"""
+
+import json
+import pytest
+from unittest.mock import MagicMock
+
+from instructor.providers.openai.utils import handle_json_modes
+from instructor.mode import Mode
+from pydantic import BaseModel
+
+
+class SimpleModel(BaseModel):
+    """Test model for JSON schema generation."""
+    name: str
+    age: int
+
+
+class TestJsonSystemPromptCustomization:
+    """Tests for Issue #1514 - Customizable JSON mode system prompt."""
+
+    def test_default_prompt_backward_compatible(self):
+        """Default behavior should be unchanged (backward compatible)."""
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Extract data"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.JSON
+        )
+
+        # Should have inserted system message
+        assert result_kwargs["messages"][0]["role"] == "system"
+        # Should contain the default "genius expert" phrase
+        assert "genius expert" in result_kwargs["messages"][0]["content"]
+        # Should contain the JSON schema
+        assert "SimpleModel" in result_kwargs["messages"][0]["content"] or \
+               "name" in result_kwargs["messages"][0]["content"]
+
+    def test_custom_prompt_with_schema_placeholder(self):
+        """Custom prompt with {schema} placeholder should work."""
+        custom_prompt = "You are a helpful assistant. Return JSON matching:\n{schema}"
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Extract data"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=custom_prompt
+        )
+
+        # Should have inserted system message
+        assert result_kwargs["messages"][0]["role"] == "system"
+        # Should NOT contain the default phrase
+        assert "genius expert" not in result_kwargs["messages"][0]["content"]
+        # Should contain our custom text
+        assert "helpful assistant" in result_kwargs["messages"][0]["content"]
+        # Schema should be substituted
+        assert "name" in result_kwargs["messages"][0]["content"]
+
+    def test_empty_string_skips_system_prompt(self):
+        """Empty string should skip system prompt modification entirely."""
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Extract data"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=""
+        )
+
+        # Should NOT have inserted system message
+        assert result_kwargs["messages"][0]["role"] == "user"
+        # Original message should be unchanged
+        assert result_kwargs["messages"][0]["content"] == "Extract data"
+
+    def test_custom_prompt_appends_to_existing_system(self):
+        """Custom prompt should append to existing system message."""
+        custom_prompt = "Respond with JSON: {schema}"
+        new_kwargs = {
+            "messages": [
+                {"role": "system", "content": "You are a pirate."},
+                {"role": "user", "content": "Tell me about treasure"}
+            ]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.JSON, json_system_prompt=custom_prompt
+        )
+
+        # System message should be preserved and extended
+        assert result_kwargs["messages"][0]["role"] == "system"
+        assert "pirate" in result_kwargs["messages"][0]["content"]
+        assert "JSON" in result_kwargs["messages"][0]["content"]
+
+    def test_json_schema_mode_ignores_system_prompt(self):
+        """JSON_SCHEMA mode uses response_format, not system prompt modification."""
+        custom_prompt = "Custom prompt {schema}"
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Extract data"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.JSON_SCHEMA, json_system_prompt=custom_prompt
+        )
+
+        # Should have response_format set
+        assert "response_format" in result_kwargs
+        assert result_kwargs["response_format"]["type"] == "json_schema"
+        # System message should NOT be inserted for JSON_SCHEMA mode
+        assert result_kwargs["messages"][0]["role"] == "user"
+
+    def test_md_json_mode_with_custom_prompt(self):
+        """MD_JSON mode should work with custom prompt."""
+        custom_prompt = "Return markdown JSON: {schema}"
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Extract data"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            SimpleModel, new_kwargs, Mode.MD_JSON, json_system_prompt=custom_prompt
+        )
+
+        # Should have system message with custom prompt
+        assert result_kwargs["messages"][0]["role"] == "system"
+        assert "markdown JSON" in result_kwargs["messages"][0]["content"]
+
+    def test_none_response_model_returns_early(self):
+        """None response_model should return early without modification."""
+        new_kwargs = {
+            "messages": [{"role": "user", "content": "Hello"}]
+        }
+
+        response_model, result_kwargs = handle_json_modes(
+            None, new_kwargs, Mode.JSON, json_system_prompt="custom"
+        )
+
+        assert response_model is None
+        # Messages should be unchanged
+        assert result_kwargs["messages"][0]["role"] == "user"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])