liamlaverty · liamlaverty · Apr 22, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 22, 2026
diff --git a/src/datafiles/prompts/stroke_system_prompt.txt b/src/datafiles/prompts/stroke_system_prompt.txt
@@ -0,0 +1,28 @@
+You are an expert artist creating a piece in the style of {artist_name}.
+
+{stroke_types_section}
+
+Canvas dimensions: {canvas_width}x{canvas_height} pixels
+All coordinates must be within bounds (0 to {canvas_width} for x, 0 to {canvas_height} for y).
+Use 0 for the left/top edge and {canvas_width}/{canvas_height} for the right/bottom edge.
+
+Stroke constraints:
+- Thickness: {min_stroke_thickness} to {max_stroke_thickness} pixels
+- Opacity: {min_stroke_opacity} to {max_stroke_opacity} (0.0 = transparent, 1.0 = opaque)
+
+Consider:
+- {artist_name}'s characteristic techniques, color palette, and composition style
+- The current state of the canvas and how to build upon it
+- Creating cohesive, original artwork (not copying specific existing pieces)
+- Using varied stroke types to achieve different artistic effects{dry_chalk_consider}{wet_brush_consider}
+
+RESPONSE FORMAT (JSON only):
+{{
+  "strokes": [
+    // stroke object(s) here - each must include all required fields for its type
+  ],
+  "updated_strategy": "<optional strategy update for future iterations, or null>",
+  "batch_reasoning": "<REQUIRED: explanation for this batch of strokes>"{layer_complete_field}
+}}
+
+IMPORTANT: Respond ONLY with valid JSON. Do not include any markdown formatting, code blocks, or text before/after the JSON.
diff --git a/src/datafiles/prompts/stroke_user_prompt.txt b/src/datafiles/prompts/stroke_user_prompt.txt
@@ -0,0 +1,5 @@
+Current Canvas: [Image attached]
+{subject_section}
+Iteration: {iteration}{strategy_section}{plan_section}
+
+Task: Suggest {num_strokes} stroke(s) to add to this canvas that evoke the artist's style.
diff --git a/src/paint_by_language_model/config.py b/src/paint_by_language_model/config.py
@@ -51,8 +51,8 @@
 MISTRAL_BASE_URL = "https://api.mistral.ai/v1"
 MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY", "")
 MISTRAL_DEFAULT_MODEL = "mistral-small-latest"
-MISTRAL_VLM_MODEL = "mistral-large-2512"
-MISTRAL_EVALUATION_VLM_MODEL = "mistral-large-2512"
+MISTRAL_VLM_MODEL = "mistral-large-latest"
+MISTRAL_EVALUATION_VLM_MODEL = "mistral-large-latest"
 
 # LMStudio API settings (for local development)
 LMSTUDIO_BASE_URL = "http://localhost:1234/v1"

diff --git a/src/paint_by_language_model/generation_orchestrator.py b/src/paint_by_language_model/generation_orchestrator.py
@@ -530,9 +530,9 @@ def _check_stopping_conditions(self, iteration: int, evaluation: EvaluationResul
         # Condition 2: Target score achieved (after minimum iterations)
         if iteration >= MIN_ITERATIONS and evaluation["score"] >= self.target_style_score:
             logger.info(
-                f"Target score ({self.target_style_score}) reached with score {evaluation['score']:.1f}"
+                f"Target score ({self.target_style_score}) reached with score {evaluation['score']:.1f}, but continuing until max_iterations has been reached"
             )
-            return True
+            # return True
 
         # Condition 3: Score plateauing (optional, future enhancement)
         # Check if score hasn't improved significantly in recent iterations

diff --git a/src/paint_by_language_model/services/clients/evaluation_vlm_client.py b/src/paint_by_language_model/services/clients/evaluation_vlm_client.py
@@ -91,8 +91,8 @@ def evaluate_style(
         """
         logger.info(f"Requesting style evaluation for iteration {iteration}")
 
-        # Build prompt
-        prompt = self._build_evaluation_prompt(
+        # Build prompts (system = static per-run, user = dynamic per-evaluation)
+        system_prompt, user_prompt = self._build_evaluation_prompts(
             artist_name=artist_name,
             subject=subject,
             iteration=iteration,
@@ -102,7 +102,11 @@ def evaluate_style(
 
         # Query VLM
         try:
-            response_text = self.client.query_multimodal(prompt=prompt, image_bytes=canvas_image)
+            response_text = self.client.query_multimodal(
+                prompt=user_prompt,
+                image_bytes=canvas_image,
+                system_prompt=system_prompt,
+            )
 
             # Store raw response immediately so it is available even if parsing fails
             self.last_raw_response = response_text
@@ -122,15 +126,15 @@ def evaluate_style(
                 iteration=iteration,
                 artist_name=artist_name,
                 subject=subject,
-                prompt=prompt,
+                prompt=user_prompt,
                 raw_response=response_text,
                 parsed_response=evaluation,
             )
 
             if self.prompt_logger:
                 self.prompt_logger.log_interaction(
                     prompt_type="evaluation",
-                    prompt=prompt,
+                    prompt=user_prompt,
                     raw_response=response_text,
                     model=self.model,
                     provider=self.client.provider,
@@ -159,33 +163,36 @@ def evaluate_style(
             logger.error(f"Unexpected error during VLM evaluation: {e}")
             raise RuntimeError(f"VLM evaluation failed: {e}") from e
 
-    def _build_evaluation_prompt(
+    def _build_evaluation_prompts(
         self,
         artist_name: str,
         subject: str,
         iteration: int,
         painting_plan: PaintingPlan | None = None,
         current_layer: PlanLayer | None = None,
-    ) -> str:
+    ) -> tuple[str, str]:
         """
-        Build prompt for style evaluation.
+        Build system and user prompts for style evaluation.
+
+        The system prompt is stable within a run for a given artist (persona,
+        scoring rubric, scale definitions, JSON format spec) so Anthropic prompt
+        caching can engage across repeated evaluations. The user prompt is dynamic
+        per evaluation (canvas image reference, current subject, task line).
 
         Args:
             artist_name (str): Target artist name
             subject (str): Subject being painted
-            iteration (int): Current iteration number
+            iteration (int): Current iteration number (reserved for future use)
             painting_plan (PaintingPlan | None): Complete painting plan
             current_layer (PlanLayer | None): Current layer information
 
         Returns:
-            str: Formatted prompt
+            tuple[str, str]: A ``(system_prompt, user_prompt)`` pair where
+                ``system_prompt`` is the static art-critic persona, rubric, and
+                JSON format specification, and ``user_prompt`` is the dynamic
+                per-evaluation request referencing the attached canvas image.
         """
-        prompt = f"""You are an art critic evaluating artwork for stylistic similarity to {artist_name}.
-
-Current Canvas: [Image attached]
-Subject: {subject}
-
-Task: Rate how well this image embodies {artist_name}'s artistic style on a scale of 0-100.
+        system_prompt = f"""You are an art critic evaluating artwork for stylistic similarity to {artist_name}.
 
 Consider:
 - Color palette characteristic of {artist_name}
@@ -207,7 +214,12 @@ def _build_evaluation_prompt(
 
 IMPORTANT: Respond ONLY with valid JSON. Do not include any text before or after the JSON object."""
 
-        return prompt
+        user_prompt = f"""Current Canvas: [Image attached]
+Subject: {subject}
+
+Task: Rate how well this image embodies {artist_name}'s artistic style."""
+
+        return system_prompt, user_prompt
 
     def _parse_evaluation_response(
         self,

diff --git a/src/paint_by_language_model/services/clients/planner_llm_client.py b/src/paint_by_language_model/services/clients/planner_llm_client.py
@@ -165,8 +165,8 @@ def generate_plan(
         """
         logger.info(f"Generating painting plan for '{subject}' in style of {artist_name}")
 
-        # Build prompt
-        prompt = self._build_planning_prompt(
+        # Build prompts (system = static persona/format, user = dynamic per-run context)
+        system_prompt, user_prompt = self._build_planning_prompts(
             artist_name=artist_name,
             subject=subject,
             expanded_subject=expanded_subject,
@@ -175,7 +175,9 @@ def generate_plan(
 
         # Query LLM (text-only, no image) with sufficient max_tokens for detailed plans
         try:
-            response_text = self.client.query(prompt=prompt, max_tokens=PLANNER_MAX_TOKENS)
+            response_text = self.client.query(
+                prompt=user_prompt, max_tokens=PLANNER_MAX_TOKENS, system_prompt=system_prompt
+            )
 
             # Store raw response immediately so it is available even if parsing fails
             self.last_raw_response = response_text
@@ -192,7 +194,7 @@ def generate_plan(
             self._record_interaction(
                 artist_name=artist_name,
                 subject=subject,
-                prompt=prompt,
+                prompt=user_prompt,
                 raw_response=response_text,
                 parsed_response=painting_plan,
                 layer_count=painting_plan["total_layers"],
@@ -201,7 +203,7 @@ def generate_plan(
             if self.prompt_logger:
                 self.prompt_logger.log_interaction(
                     prompt_type="plan",
-                    prompt=prompt,
+                    prompt=user_prompt,
                     raw_response=response_text,
                     model=self.model,
                     provider=self.client.provider,
@@ -232,7 +234,7 @@ def generate_plan(
                     subject=subject,
                     raw_response=response_text,
                     exception=e,
-                    prompt=prompt if "prompt" in locals() else "",
+                    prompt=user_prompt if "user_prompt" in locals() else "",
                 )
                 logger.error(f"Raw LLM response saved to: {log_path}")
             raise ValueError(f"LLM returned invalid JSON: {e}") from e
@@ -244,19 +246,23 @@ def generate_plan(
                     subject=subject,
                     raw_response=response_text,
                     exception=e,
-                    prompt=prompt if "prompt" in locals() else "",
+                    prompt=user_prompt if "user_prompt" in locals() else "",
                 )
             raise RuntimeError(f"LLM query failed: {e}") from e
 
-    def _build_planning_prompt(
+    def _build_planning_prompts(
         self,
         artist_name: str,
         subject: str,
         expanded_subject: str | None,
         stroke_types: list[str],
-    ) -> str:
+    ) -> tuple[str, str]:
         """
-        Build prompt for painting plan generation.
+        Build system and user prompts for painting plan generation.
+
+        The system prompt is stable (planner persona, layer planning rules, JSON
+        response format) while the user prompt supplies the dynamic per-run
+        context (artist name, subject, stroke types, canvas and layer constraints).
 
         Args:
             artist_name (str): Target artist whose style to emulate
@@ -265,24 +271,14 @@ def _build_planning_prompt(
             stroke_types (list[str]): Available stroke types for the painting
 
         Returns:
-            str: Formatted prompt
+            tuple[str, str]: A ``(system_prompt, user_prompt)`` pair where
+                ``system_prompt`` is the static planner persona and JSON format
+                specification, and ``user_prompt`` is the dynamic per-run task
+                with artist, subject, and stroke-type details.
         """
-        expanded_section = f"\nExpanded description: {expanded_subject}" if expanded_subject else ""
-
-        prompt = f"""You are an expert art director planning a painting in the style of {artist_name}.
-
-Subject: {subject}{expanded_section}
-
-Available stroke types: {", ".join(stroke_types)}
-Canvas dimensions: {CANVAS_WIDTH}x{CANVAS_HEIGHT} pixels
-Minimum iterations per layer: {self.min_strokes_per_layer} (each iteration applies up to
-{DEFAULT_STROKES_PER_QUERY} strokes, so each layer will contain at most
-{self.min_strokes_per_layer * DEFAULT_STROKES_PER_QUERY} individual strokes before it can be
-marked complete)
-
-Task: Create a step-by-step layer plan for painting this image. Each layer will be
-executed sequentially — the painter can only ADD onto the canvas, not remove or switch
-between layers. Earlier layers will be painted over by later ones.
+        system_prompt = """You are an expert art director tasked with creating detailed, layered painting plans.
+Your plans guide an AI painter to create artwork in a specific artist's style. Each painting plan
+consists of ordered layers that are painted sequentially — earlier layers are painted over by later ones.
 
 Plan for 4-8 layers total. Fewer, well-defined layers work better than many small ones.
 Common layer sequence: background → mid-ground → main subjects → details/highlights.
@@ -296,21 +292,14 @@ def _build_planning_prompt(
 - shapes: Typical shapes and forms (e.g. "horizontal bands", "organic curves")
 - highlights: Guidance on emphasis, lighting, and texture in this layer
 
-Consider {artist_name}'s characteristic:
-- Colour choices and palette
-- Brushwork and mark-making style
-- Compositional approach
-- Treatment of light and shadow
-- Overall mood and atmosphere
-
 RESPONSE FORMAT (JSON only):
-{{
-  "artist_name": "{artist_name}",
-  "subject": "{subject}",
-  "expanded_subject": {json.dumps(expanded_subject)},
+{
+  "artist_name": "<artist name>",
+  "subject": "<subject>",
+  "expanded_subject": "<expanded subject string or null>",
   "total_layers": <int>,
   "layers": [
-    {{
+    {
       "layer_number": 1,
       "name": "...",
       "description": "...",
@@ -319,15 +308,36 @@ def _build_planning_prompt(
       "techniques": "...",
       "shapes": "...",
       "highlights": "..."
-    }}
+    }
     // ... more layers
   ],
   "overall_notes": "..."
-}}
+}
 
 IMPORTANT: Respond ONLY with valid JSON. Do not include markdown formatting."""
 
-        return prompt
+        expanded_section = f"\nExpanded description: {expanded_subject}" if expanded_subject else ""
+
+        user_prompt = f"""Create a step-by-step layer plan for painting this image.
+
+Artist: {artist_name}
+Subject: {subject}{expanded_section}
+
+Available stroke types: {", ".join(stroke_types)}
+Canvas dimensions: {CANVAS_WIDTH}x{CANVAS_HEIGHT} pixels
+Minimum iterations per layer: {self.min_strokes_per_layer} (each iteration applies up to
+{DEFAULT_STROKES_PER_QUERY} strokes, so each layer will contain at most
+{self.min_strokes_per_layer * DEFAULT_STROKES_PER_QUERY} individual strokes before it can be
+marked complete)
+
+Consider {artist_name}'s characteristic:
+- Colour choices and palette
+- Brushwork and mark-making style
+- Compositional approach
+- Treatment of light and shadow
+- Overall mood and atmosphere"""
+
+        return system_prompt, user_prompt
 
     def _parse_plan_response(
         self, response_text: str, allowed_stroke_types: list[str] | None = None