Fixed tests and made adjustments

fm1320 · fm1320 · commit 922681a4ac84 · 2025-01-26T20:11:49.000+01:00
diff --git a/adalflow/adalflow/components/model_client/openai_client.py b/adalflow/adalflow/components/model_client/openai_client.py
@@ -110,7 +110,6 @@ class OpenAIClient(ModelClient):
         api_key (Optional[str], optional): OpenAI API key. Defaults to None.
         chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
         input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
-        model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
 
     Note:
         We suggest users not to use `response_format` to enforce output data type or `tools` and `tool_choice`  in your model_kwargs when calling the API.
@@ -142,15 +141,13 @@ def __init__(
         api_key: Optional[str] = None,
         chat_completion_parser: Callable[[Completion], Any] = None,
         input_type: Literal["text", "messages"] = "text",
-        model_type: ModelType = ModelType.LLM,
     ):
         r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument.
 
         Args:
             api_key (Optional[str], optional): OpenAI API key. Defaults to None.
             chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion to a str. Defaults to None.
             input_type (Literal["text", "messages"], optional): The type of input to use. Defaults to "text".
-            model_type (ModelType, optional): The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Defaults to ModelType.LLM.
         """
         super().__init__()
         self._api_key = api_key
@@ -160,7 +157,6 @@ def __init__(
             chat_completion_parser or get_first_message_content
         )
         self._input_type = input_type
-        self.model_type = model_type
 
     def init_sync_client(self):
         api_key = self._api_key or os.getenv("OPENAI_API_KEY")
@@ -235,6 +231,7 @@ def convert_inputs_to_api_kwargs(
         self,
         input: Optional[Any] = None,
         model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,  # Now required in practice
     ) -> Dict:
         r"""
         Specify the API input type and output api_kwargs that will be used in _call and _acall methods.
@@ -259,20 +256,23 @@ def convert_inputs_to_api_kwargs(
                 - mask: Path to the mask image
                 For variations (DALL-E 2 only):
                 - image: Path to the input image
+            model_type: The type of model to use (EMBEDDER, LLM, or IMAGE_GENERATION). Required.
 
         Returns:
             Dict: API-specific kwargs for the model call
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
 
         final_model_kwargs = model_kwargs.copy()
-        if self.model_type == ModelType.EMBEDDER:
+        if model_type == ModelType.EMBEDDER:
             if isinstance(input, str):
                 input = [input]
             # convert input to input
             if not isinstance(input, Sequence):
                 raise TypeError("input must be a sequence of text")
             final_model_kwargs["input"] = input
-        elif self.model_type == ModelType.LLM:
+        elif model_type == ModelType.LLM:
             # convert input to messages
             messages: List[Dict[str, str]] = []
             images = final_model_kwargs.pop("images", None)
@@ -317,7 +317,7 @@ def convert_inputs_to_api_kwargs(
                 else:
                     messages.append({"role": "system", "content": input})
             final_model_kwargs["messages"] = messages
-        elif self.model_type == ModelType.IMAGE_GENERATION:
+        elif model_type == ModelType.IMAGE_GENERATION:
             # For image generation, input is the prompt
             final_model_kwargs["prompt"] = input
             # Ensure model is specified
@@ -362,7 +362,7 @@ def convert_inputs_to_api_kwargs(
             else:
                 raise ValueError(f"Invalid operation: {operation}")
         else:
-            raise ValueError(f"model_type {self.model_type} is not supported")
+            raise ValueError(f"model_type {model_type} is not supported")
         return final_model_kwargs
 
     def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput:
@@ -379,11 +379,7 @@ def parse_image_generation_response(self, response: List[Image]) -> GeneratorOut
             )
         except Exception as e:
             log.error(f"Error parsing image generation response: {e}")
-            return GeneratorOutput(
-                data=None,
-                error=str(e),
-                raw_response=str(response)
-            )
+            return GeneratorOutput(data=None, error=str(e), raw_response=str(response))
 
     @backoff.on_exception(
         backoff.expo,
@@ -400,6 +396,9 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE
         """
         kwargs is the combined input and model_kwargs.  Support streaming call.
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
+
         log.info(f"api_kwargs: {api_kwargs}")
         if model_type == ModelType.EMBEDDER:
             return self.sync_client.embeddings.create(**api_kwargs)
@@ -449,6 +448,9 @@ async def acall(
         """
         kwargs is the combined input and model_kwargs
         """
+        if model_type == ModelType.UNDEFINED:
+            raise ValueError("model_type must be specified")
+
         if self.async_client is None:
             self.async_client = self.init_async_client()
         if model_type == ModelType.EMBEDDER:
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -73,6 +73,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
         name (Optional[str], optional): The name of the generator. Defaults to None.
         cache_path (Optional[str], optional): The path to save the cache. Defaults to None.
         use_cache (bool, optional): Whether to use cache. Defaults to False.
+        model_type (ModelType, optional): The type of the model. Defaults to ModelType.LLM.
     """
 
     def __init__(
@@ -90,6 +91,7 @@ def __init__(
         # args for the cache
         cache_path: Optional[str] = None,
         use_cache: bool = False,
+        model_type: ModelType = ModelType.LLM,  # Add model_type parameter with default
     ) -> None:
         r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
         - task_desc_str
@@ -122,7 +124,7 @@ def __init__(
         CallbackManager.__init__(self)
 
         self.name = name or self.__class__.__name__
-        self.model_type = model_client.model_type  # Get model type from client
+        self.model_type = model_type  # Use the passed model_type instead of getting from client
 
         self._init_prompt(template, prompt_kwargs)
 
@@ -326,6 +328,7 @@ def _pre_call(self, prompt_kwargs: Dict, model_kwargs: Dict) -> Dict[str, Any]:
         api_kwargs = self.model_client.convert_inputs_to_api_kwargs(
             input=prompt_str,
             model_kwargs=composed_model_kwargs,
+            model_type=self.model_type,
         )
         return api_kwargs
 
diff --git a/adalflow/tests/test_generator.py b/adalflow/tests/test_generator.py
@@ -15,6 +15,7 @@
 from adalflow.core.model_client import ModelClient
 from adalflow.components.model_client.groq_client import GroqAPIClient
 from adalflow.tracing import GeneratorStateLogger
+from adalflow.core.types import ModelType
 
 
 class TestGenerator(IsolatedAsyncioTestCase):
@@ -32,7 +33,7 @@ def setUp(self):
             )
             self.mock_api_client = mock_api_client
 
-            self.generator = Generator(model_client=mock_api_client)
+            self.generator = Generator(model_client=mock_api_client, model_type=ModelType.LLM)
             self.save_dir = "./tests/log"
             self.project_name = "TestGenerator"
             self.filename = "prompt_logger_test.json"
@@ -182,7 +183,7 @@ def test_groq_client_call(self, mock_call):
         template = "Hello, {{ input_str }}!"
 
         # Initialize the Generator with the mocked client
-        generator = Generator(model_client=self.client, template=template)
+        generator = Generator(model_client=self.client, template=template, model_type=ModelType.LLM)
 
         # Call the generator and get the output
         output = generator.call(prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs)
diff --git a/docs/source/tutorials/multimodal_client.rst b/docs/source/tutorials/multimodal_client.rst
@@ -0,0 +1,107 @@
+Multimodal Client Tutorial
+=======================
+
+This tutorial demonstrates how to use the OpenAI client for different types of tasks: text generation, vision analysis, and image generation.
+
+Model Types
+----------
+
+The OpenAI client supports three types of operations:
+
+1. Text/Chat Completion (``ModelType.LLM``)
+   - Standard text generation
+   - Vision analysis (with GPT-4V)
+2. Image Generation (``ModelType.IMAGE_GENERATION``)
+   - DALL-E image generation
+3. Embeddings (``ModelType.EMBEDDER``)
+   - Text embeddings
+
+Basic Usage
+----------
+
+The model type is specified when creating a ``Generator`` instance:
+
+.. code-block:: python
+
+    from adalflow.core import Generator
+    from adalflow.components.model_client.openai_client import OpenAIClient
+    from adalflow.core.types import ModelType
+
+    # Create the client
+    client = OpenAIClient()
+
+    # For text generation
+    gen = Generator(
+        model_client=client,
+        model_kwargs={"model": "gpt-4", "max_tokens": 100},
+        model_type=ModelType.LLM  # Specify LLM type
+    )
+    response = gen({"input_str": "Hello, world!"})
+
+Vision Tasks
+-----------
+
+Vision tasks use ``ModelType.LLM`` since they are handled by GPT-4V:
+
+.. code-block:: python
+
+    # Vision analysis
+    vision_gen = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": "gpt-4o-mini",
+            "images": "path/to/image.jpg",
+            "max_tokens": 300,
+        },
+        model_type=ModelType.LLM  # Vision uses LLM type
+    )
+    response = vision_gen({"input_str": "What do you see in this image?"})
+
+Image Generation
+--------------
+
+For DALL-E image generation, use ``ModelType.IMAGE_GENERATION``:
+
+.. code-block:: python
+
+    # Image generation with DALL-E
+    dalle_gen = Generator(
+        model_client=client,
+        model_kwargs={
+            "model": "dall-e-3",
+            "size": "1024x1024",
+            "quality": "standard",
+            "n": 1,
+        },
+        model_type=ModelType.IMAGE_GENERATION  # Specify image generation type
+    )
+    response = dalle_gen({"input_str": "A cat playing with yarn"})
+
+Backward Compatibility
+--------------------
+
+For backward compatibility with existing code:
+
+1. ``model_type`` defaults to ``ModelType.LLM`` if not specified
+2. Older models that only support text continue to work with ``ModelType.LLM``
+3. The OpenAI client handles the appropriate API endpoints based on the model type
+
+Error Handling
+-------------
+
+The client includes error handling for:
+
+1. Invalid model types for operations
+2. Invalid image URLs or file paths
+3. Unsupported model capabilities
+4. API errors and rate limits
+
+Complete Example
+--------------
+
+See the complete example in ``tutorials/multimodal_client_testing_examples.py``, which demonstrates:
+
+1. Basic text generation
+2. Vision analysis with image input
+3. DALL-E image generation
+4. Error handling for invalid inputs 
diff --git a/tests/test_generator.py b/tests/test_generator.py
@@ -0,0 +1 @@
+ 
diff --git a/tutorials/multimodal_client_testing_examples.py b/tutorials/multimodal_client_testing_examples.py