Merge pull request #127 from Aleph-Alpha/fix-optimized-prompt-parsing

volkerstampa · web-flow · commit ce50e8269c63 · 2023-08-24T09:46:43.000+02:00
Fix optimized prompt parsing
diff --git a/Changelog.md b/Changelog.md
@@ -1,18 +1,28 @@
 # Changelog
 
-# 3.2.4
+## next release
+
+### Features
+
+- Add `PromptTemplate` to support easy creation of multi-modal prompts
+
+### Bugs
+
+- Fix parsing of optimized prompt returned in a `CompletionResponse`
+
+## 3.2.4
 
 - Make sure `control_factor` gets passed along with `ExplanationRequest`
 
-# 3.2.3
+## 3.2.3
 
 - Make sure model name gets passed along for async batch semnatic embed
 
-# 3.2.2
+## 3.2.2
 
 - Re-relase 3.2.1 again because of deployment issue
 
-# 3.2.1
+## 3.2.1
 
 - Add progress_bar option to batch semantic embedding API
 - Add batch_size option to batch semantic embedding API
@@ -35,7 +45,7 @@
 
 ### Bug fixes
 
-- Add missing import of **PromptGranularity** in *__init__.py*.
+- Add missing import of **PromptGranularity** in _**init**.py_.
 
 ## 3.1.2
 
diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
@@ -239,16 +239,19 @@ def from_json(json: Dict[str, Any]) -> "CompletionResult":
 class CompletionResponse(NamedTuple):
     model_version: str
     completions: Sequence[CompletionResult]
-    optimized_prompt: Optional[Sequence[str]] = None
+    optimized_prompt: Optional[Prompt] = None
 
     @staticmethod
     def from_json(json: Dict[str, Any]) -> "CompletionResponse":
+        optimized_prompt_json = json.get("optimized_prompt")
         return CompletionResponse(
             model_version=json["model_version"],
             completions=[
                 CompletionResult.from_json(item) for item in json["completions"]
             ],
-            optimized_prompt=json.get("optimized_prompt"),
+            optimized_prompt=Prompt.from_json(optimized_prompt_json)
+            if optimized_prompt_json
+            else None,
         )
 
     def to_json(self) -> Mapping[str, Any]:
diff --git a/aleph_alpha_client/prompt.py b/aleph_alpha_client/prompt.py
@@ -99,6 +99,10 @@ def to_json(self) -> Mapping[str, Any]:
             "controls": [c.to_json() for c in self.controls],
         }
 
+    @staticmethod
+    def from_json(json: Mapping[str, Any]) -> "Tokens":
+        return Tokens(tokens=json["data"], controls=[])
+
     @staticmethod
     def from_token_ids(token_ids: Sequence[int]) -> "Tokens":
         return Tokens(token_ids, [])
@@ -173,20 +177,23 @@ def to_json(self) -> Mapping[str, Any]:
             "controls": [control.to_json() for control in self.controls],
         }
 
+    @staticmethod
+    def from_json(json: Mapping[str, Any]) -> "Text":
+        return Text.from_text(json["data"])
+
     @staticmethod
     def from_text(text: str) -> "Text":
         return Text(text, [])
 
 
-class Cropping:
+class Cropping(NamedTuple):
     """
     Describes a quadratic crop of the file.
     """
 
-    def __init__(self, upper_left_x: int, upper_left_y: int, size: int):
-        self.upper_left_x = upper_left_x
-        self.upper_left_y = upper_left_y
-        self.size = size
+    upper_left_x: int
+    upper_left_y: int
+    size: int
 
 
 class ImageControl(NamedTuple):
@@ -254,7 +261,7 @@ def to_json(self) -> Mapping[str, Any]:
         return payload
 
 
-class Image:
+class Image(NamedTuple):
     """
     An image send as part of a prompt to a model. The image is represented as
     base64.
@@ -272,17 +279,11 @@ class Image:
         >>> image = Image.from_url(url)
     """
 
-    def __init__(
-        self,
-        base_64: str,
-        cropping: Optional[Cropping],
-        controls: Sequence[ImageControl],
-    ):
-        # We use a base_64 reperesentation, because we want to embed the image
-        # into a prompt send in JSON.
-        self.base_64 = base_64
-        self.cropping = cropping
-        self.controls: Sequence[ImageControl] = controls
+    # We use a base_64 reperesentation, because we want to embed the image
+    # into a prompt send in JSON.
+    base_64: str
+    cropping: Optional[Cropping]
+    controls: Sequence[ImageControl]
 
     @classmethod
     def from_image_source(
@@ -357,7 +358,9 @@ def from_url_with_cropping(
         return cls.from_bytes(bytes, cropping=cropping, controls=controls or [])
 
     @classmethod
-    def from_file(cls, path: Union[str, Path], controls: Optional[Sequence[ImageControl]] = None):
+    def from_file(
+        cls, path: Union[str, Path], controls: Optional[Sequence[ImageControl]] = None
+    ):
         """
         Load an image from disk and prepare it to be used in a prompt
         If they are not provided then the image will be [center cropped](https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.CenterCrop)
@@ -412,6 +415,10 @@ def to_json(self) -> Dict[str, Any]:
                 "controls": [control.to_json() for control in self.controls],
             }
 
+    @staticmethod
+    def from_json(json: Mapping[str, Any]) -> "Image":
+        return Image(base_64=json["data"], cropping=None, controls=[])
+
     def to_image(self) -> PILImage:
         return PIL.Image.open(io.BytesIO(base64.b64decode(self.base_64)))
 
@@ -464,6 +471,29 @@ def from_tokens(
     def to_json(self) -> Sequence[Mapping[str, Any]]:
         return [_to_json(item) for item in self.items]
 
+    @staticmethod
+    def from_json(items_json: Sequence[Mapping[str, Any]]) -> "Prompt":
+        return Prompt(
+            [
+                item
+                for item in (_prompt_item_from_json(item) for item in items_json)
+                if item
+            ]
+        )
+
+
+def _prompt_item_from_json(item: Mapping[str, Any]) -> Optional[PromptItem]:
+    item_type = item.get("type")
+    if item_type == "text":
+        return Text.from_json(item)
+    if item_type == "image":
+        return Image.from_json(item)
+    if item_type == "token_ids":
+        return Tokens.from_json(item)
+    # Skip item instead of raising an error to prevent failures of old clients
+    # when item types are extended
+    return None
+
 
 def _to_json(item: PromptItem) -> Mapping[str, Any]:
     if hasattr(item, "to_json"):
diff --git a/tests/test_complete.py b/tests/test_complete.py
@@ -1,12 +1,20 @@
 import pytest
 from aleph_alpha_client import AsyncClient, Client
 from aleph_alpha_client.completion import CompletionRequest
-from aleph_alpha_client.prompt import ControlTokenOverlap, Prompt, Text, TextControl
+from aleph_alpha_client.prompt import (
+    ControlTokenOverlap,
+    Image,
+    Prompt,
+    Text,
+    TextControl,
+    Tokens,
+)
 
 from tests.common import (
     sync_client,
     async_client,
     model_name,
+    prompt_image,
 )
 
 
@@ -72,3 +80,22 @@ def test_complete_with_token_ids(sync_client: Client, model_name: str):
 
     assert len(response.completions) == 1
     assert response.model_version is not None
+
+
+@pytest.mark.system_test
+def test_complete_with_optimized_prompt(
+    sync_client: Client, model_name: str, prompt_image: Image
+):
+    prompt_text = " Hello World! "
+    prompt_tokens = Tokens.from_token_ids([1, 2])
+    request = CompletionRequest(
+        prompt=Prompt([Text.from_text(prompt_text), prompt_image, prompt_tokens]),
+        maximum_tokens=5,
+    )
+
+    response = sync_client.complete(request, model=model_name)
+
+    assert response.optimized_prompt
+    assert response.optimized_prompt.items[0] == Text.from_text(prompt_text.strip())
+    assert response.optimized_prompt.items[2] == prompt_tokens
+    assert isinstance(response.optimized_prompt.items[1], Image)