@@ -87,10 +87,13 @@ def _log_request(
8787 payload (dict): Request body sent to the API
8888 response (requests.Response): The HTTP response received
8989 """
90- log_dir = Path (GLOBAL_PROMPT_LOG_DIR )
90+ now = datetime .now ()
91+ log_dir = (
92+ Path (GLOBAL_PROMPT_LOG_DIR ) / f"{ now .year :04d} " / f"{ now .month :02d} " / f"{ now .day :02d} "
93+ )
9194 log_dir .mkdir (parents = True , exist_ok = True )
9295
93- timestamp = datetime . now () .strftime ("%Y%m%d_%H%M%S_%f" )
96+ timestamp = now .strftime ("%Y%m%d_%H%M%S_%f" )
9497 log_path = log_dir / f"{ timestamp } -request.log"
9598
9699 # Mask sensitive header values
@@ -475,27 +478,71 @@ def _build_multi_image_payload(
475478 max_tokens : int ,
476479 * ,
477480 system_prompt : str ,
481+ cached_images : list [tuple [bytes , str ]] | None = None ,
478482 ) -> dict :
479483 """
480484 Build request payload for a multi-image multimodal query.
481485
482486 Each image is preceded by a text label block. A final text block
483487 containing the main prompt is appended after all image blocks.
484488
489+ When ``cached_images`` is provided, those (image_bytes, label) pairs are
490+ prepended to the user message before the dynamic ``images``. On
491+ Anthropic, ``cache_control: ephemeral`` is placed on the **last cached
492+ image block**, telling Anthropic to cache the entire prompt prefix up
493+ to and including that block (system prompt + all cached images). The
494+ dynamic per-request ``images`` and the final prompt text follow and are
495+ not cached.
496+
485497 Args:
486498 prompt (str): The main text prompt appended after all images
487- images (list[tuple[bytes, str]]): List of (image_bytes, label) pairs
499+ images (list[tuple[bytes, str]]): Dynamic per-request
500+ (image_bytes, label) pairs that change between calls.
488501 max_tokens (int): Maximum tokens in the response
489502 system_prompt (str): System-level instructions; provider-agnostic.
490503 Anthropic: placed in top-level ``system`` field as a content
491504 block array with block-level ``cache_control``. OpenAI-compatible
492505 providers: prepended as a ``role: system`` message.
506+ cached_images (list[tuple[bytes, str]] | None): Optional list of
507+ static (image_bytes, label) pairs that are byte-identical
508+ across requests. Prepended to the user message before the
509+ dynamic ``images``. On Anthropic, the **last** of these image
510+ blocks carries ``cache_control: ephemeral`` to mark the cache
511+ prefix boundary. On OpenAI-compatible providers, they are
512+ still prepended (no cache marker — caching is Anthropic-only).
493513
494514 Returns:
495515 dict: Request payload structure for the API
496516 """
497517 message_content : list [dict ] = []
498518
519+ # Prepend cached static images (with cache_control on the last image
520+ # block for Anthropic). Anthropic caches everything up to and
521+ # including the marked block, so the system prefix + all of these
522+ # images become the cache prefix.
523+ cached_list = cached_images or []
524+ for idx , (image_bytes , label ) in enumerate (cached_list ):
525+ base64_image = base64 .b64encode (image_bytes ).decode ("utf-8" )
526+ message_content .append ({"type" : "text" , "text" : label })
527+ is_last_cached = idx == len (cached_list ) - 1
528+ if self .provider == "anthropic" :
529+ image_block : dict = {
530+ "type" : "image" ,
531+ "source" : {
532+ "type" : "base64" ,
533+ "media_type" : "image/png" ,
534+ "data" : base64_image ,
535+ },
536+ }
537+ if is_last_cached :
538+ image_block ["cache_control" ] = {"type" : "ephemeral" }
539+ message_content .append (image_block )
540+ else :
541+ # OpenAI-compatible: use data URL format, no cache support
542+ data_url = f"data:image/png;base64,{ base64_image } "
543+ message_content .append ({"type" : "image_url" , "image_url" : {"url" : data_url }})
544+
545+ # Append dynamic per-request images
499546 for image_bytes , label in images :
500547 base64_image = base64 .b64encode (image_bytes ).decode ("utf-8" )
501548 # Label block before each image
@@ -523,6 +570,11 @@ def _build_multi_image_payload(
523570 payload ["model" ] = self .model
524571
525572 if self .provider == "anthropic" :
573+ # When cached_images are present, the cache breakpoint lives on
574+ # the last cached image block in the user message. The system
575+ # block keeps its own breakpoint for text-only / fallback cases,
576+ # which is harmless (Anthropic allows up to 4 breakpoints per
577+ # request and caches the longest matching prefix).
526578 payload ["system" ] = [
527579 {
528580 "type" : "text" ,
@@ -549,6 +601,7 @@ def query_multimodal_multi_image(
549601 max_tokens : int = MAX_TOKENS ,
550602 * ,
551603 system_prompt : str ,
604+ cached_images : list [tuple [bytes , str ]] | None = None ,
552605 ) -> str :
553606 """
554607 Send multiple labelled images and a text prompt to the VLM in one request.
@@ -559,13 +612,20 @@ def query_multimodal_multi_image(
559612
560613 Args:
561614 prompt (str): The main text prompt sent after all images
562- images (list[tuple[bytes, str]]): List of (image_bytes, label) pairs
615+ images (list[tuple[bytes, str]]): Dynamic per-request
616+ (image_bytes, label) pairs that change between calls.
563617 max_tokens (int): Maximum tokens in response (default from config)
564618 system_prompt (str): System-level instructions sent to the model.
565619 Required keyword-only argument. Anthropic: placed in the
566620 top-level ``system`` field as a content block with
567621 ``cache_control``. OpenAI-compatible providers: prepended as
568622 the first ``role: system`` message.
623+ cached_images (list[tuple[bytes, str]] | None): Optional list of
624+ static images to include in the cached prompt prefix.
625+ Prepended to the user message. On Anthropic, ``cache_control``
626+ is placed on the last cached image block to mark the cache
627+ boundary. Ignored as a cache marker on OpenAI-compatible
628+ providers (still prepended for content parity).
569629
570630 Returns:
571631 str: The VLM's response text
@@ -575,14 +635,22 @@ def query_multimodal_multi_image(
575635 ValueError: If image encoding fails
576636 requests.RequestException: For other HTTP errors
577637 """
638+ cached_count = len (cached_images ) if cached_images else 0
578639 total_bytes = sum (len (img_bytes ) for img_bytes , _ in images )
640+ cached_bytes = sum (len (img_bytes ) for img_bytes , _ in cached_images ) if cached_images else 0
579641 logger .info (
580- f"Sending multi-image query to VLM ({ len (images )} images, { total_bytes } total bytes)"
642+ f"Sending multi-image query to VLM "
643+ f"({ len (images )} dynamic images / { total_bytes } bytes; "
644+ f"{ cached_count } cached images / { cached_bytes } bytes)"
581645 )
582646
583647 try :
584648 payload = self ._build_multi_image_payload (
585- prompt , images , max_tokens , system_prompt = system_prompt
649+ prompt ,
650+ images ,
651+ max_tokens ,
652+ system_prompt = system_prompt ,
653+ cached_images = cached_images ,
586654 )
587655
588656 # Retry loop for rate limiting
@@ -624,6 +692,7 @@ def query_multimodal_multi_image(
624692 response .raise_for_status ()
625693
626694 response_data = response .json ()
695+ self .last_usage = response_data .get ("usage" )
627696 response_text : str = self ._extract_response_text (response_data )
628697
629698 logger .info (f"Received VLM response ({ len (response_text )} characters)" )
0 commit comments