feat: add generate function to ai voice generator (#102)

davidhu2000 · web-flow · commit 5929172f5cfb · 2025-09-20T12:05:03.000-07:00
diff --git a/.sdk.json b/.sdk.json
@@ -1,5 +1,5 @@
 {
-  "id": "719cfc51-c189-4e81-9bf6-d25195f86e4f",
+  "id": "2d9e6849-88f7-4a65-9c0f-1532d0c32953",
   "tracked_paths": [
     {
       "editable": true,
diff --git a/README.md b/README.md
@@ -211,6 +211,7 @@ download_urls = result.downloads
 ### [v1.ai_voice_generator](magic_hour/resources/v1/ai_voice_generator/README.md)
 
 * [create](magic_hour/resources/v1/ai_voice_generator/README.md#create) - AI Voice Generator
+* [generate](magic_hour/resources/v1/ai_voice_generator/README.md#generate) - Ai Voice Generator Generate Workflow
 
 ### [v1.animation](magic_hour/resources/v1/animation/README.md)
 
diff --git a/magic_hour/resources/v1/ai_voice_generator/README.md b/magic_hour/resources/v1/ai_voice_generator/README.md
@@ -2,6 +2,62 @@
 
 ## Module Functions
 
+<!-- CUSTOM DOCS START -->
+
+### Ai Talking Photo Generate Workflow <a name="generate"></a>
+
+The workflow performs the following action
+
+1. upload local assets to Magic Hour storage. So you can pass in a local path instead of having to upload files yourself
+2. trigger a generation
+3. poll for a completion status. This is configurable
+4. if success, download the output to local directory
+
+> [!TIP]
+> This is the recommended way to use the SDK unless you have specific needs where it is necessary to split up the actions.
+
+#### Parameters
+
+In Additional to the parameters listed in the `.create` section below, `.generate` introduces 3 new parameters:
+
+- `wait_for_completion` (bool, default True): Whether to wait for the project to complete.
+- `download_outputs` (bool, default True): Whether to download the generated files
+- `download_directory` (str, optional): Directory to save downloaded files (defaults to current directory)
+
+#### Synchronous Client
+
+```python
+from magic_hour import Client
+from os import getenv
+
+client = Client(token=getenv("API_TOKEN"))
+res = client.v1.ai_talking_photo.generate(
+    style={"prompt": "Hello, how are you?", "voice_name": "Elon Musk"},
+    name="Voice Generator audio",
+    wait_for_completion=True,
+    download_outputs=True,
+    download_directory="outputs"
+)
+```
+
+#### Asynchronous Client
+
+```python
+from magic_hour import AsyncClient
+from os import getenv
+
+client = AsyncClient(token=getenv("API_TOKEN"))
+res = await client.v1.ai_talking_photo.generate(
+    style={"prompt": "Hello, how are you?", "voice_name": "Elon Musk"},
+    name="Voice Generator audio",
+    wait_for_completion=True,
+    download_outputs=True,
+    download_directory="outputs"
+)
+```
+
+<!-- CUSTOM DOCS END -->
+
 ### AI Voice Generator <a name="create"></a>
 
 Generate speech from text. Each character costs 0.05 credits. The cost is rounded up to the nearest whole number.
diff --git a/magic_hour/resources/v1/ai_voice_generator/client.py b/magic_hour/resources/v1/ai_voice_generator/client.py
@@ -1,5 +1,10 @@
 import typing
 
+from magic_hour.helpers.logger import get_sdk_logger
+from magic_hour.resources.v1.audio_projects.client import (
+    AsyncAudioProjectsClient,
+    AudioProjectsClient,
+)
 from magic_hour.types import models, params
 from make_api_request import (
     AsyncBaseClient,
@@ -11,10 +16,70 @@
 )
 
 
+logger = get_sdk_logger(__name__)
+
+
 class AiVoiceGeneratorClient:
     def __init__(self, *, base_client: SyncBaseClient):
         self._base_client = base_client
 
+    def generate(
+        self,
+        *,
+        style: params.V1AiVoiceGeneratorCreateBodyStyle,
+        name: typing.Union[
+            typing.Optional[str], type_utils.NotGiven
+        ] = type_utils.NOT_GIVEN,
+        wait_for_completion: bool = True,
+        download_outputs: bool = True,
+        download_directory: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ):
+        """
+        Generate AI voice (alias for create with additional functionality).
+
+        Generate speech from text. Each character costs 0.05 credits. The cost is rounded up to the nearest whole number.
+
+        Args:
+            style: The content used to generate speech.
+            name: The name of audio. This value is mainly used for your own identification of the audio.
+            wait_for_completion: Whether to wait for the audio project to complete
+            download_outputs: Whether to download the outputs
+            download_directory: The directory to download the outputs to. If not provided, the outputs will be downloaded to the current working directory
+            request_options: Additional options to customize the HTTP request
+
+        Returns:
+            V1AudioProjectsGetResponseWithDownloads: The response from the AI Voice Generator API with the downloaded paths if `download_outputs` is True.
+
+        Examples:
+        ```py
+        response = client.v1.ai_voice_generator.generate(
+            style={"prompt": "Hello, how are you?", "voice_name": "Elon Musk"},
+            name="Generated Voice",
+            wait_for_completion=True,
+            download_outputs=True,
+            download_directory="outputs/",
+        )
+        ```
+        """
+
+        create_response = self.create(
+            style=style,
+            name=name,
+            request_options=request_options,
+        )
+        logger.info(f"AI Voice Generator response: {create_response}")
+
+        audio_projects_client = AudioProjectsClient(base_client=self._base_client)
+        response = audio_projects_client.check_result(
+            id=create_response.id,
+            wait_for_completion=wait_for_completion,
+            download_outputs=download_outputs,
+            download_directory=download_directory,
+        )
+
+        return response
+
     def create(
         self,
         *,
@@ -69,6 +134,63 @@ class AsyncAiVoiceGeneratorClient:
     def __init__(self, *, base_client: AsyncBaseClient):
         self._base_client = base_client
 
+    async def generate(
+        self,
+        *,
+        style: params.V1AiVoiceGeneratorCreateBodyStyle,
+        name: typing.Union[
+            typing.Optional[str], type_utils.NotGiven
+        ] = type_utils.NOT_GIVEN,
+        wait_for_completion: bool = True,
+        download_outputs: bool = True,
+        download_directory: typing.Optional[str] = None,
+        request_options: typing.Optional[RequestOptions] = None,
+    ):
+        """
+        Generate AI voice (alias for create with additional functionality).
+
+        Generate speech from text. Each character costs 0.05 credits. The cost is rounded up to the nearest whole number.
+
+        Args:
+            style: The content used to generate speech.
+            name: The name of audio. This value is mainly used for your own identification of the audio.
+            wait_for_completion: Whether to wait for the audio project to complete
+            download_outputs: Whether to download the outputs
+            download_directory: The directory to download the outputs to. If not provided, the outputs will be downloaded to the current working directory
+            request_options: Additional options to customize the HTTP request
+
+        Returns:
+            V1AudioProjectsGetResponseWithDownloads: The response from the AI Voice Generator API with the downloaded paths if `download_outputs` is True.
+
+        Examples:
+        ```py
+        response = await client.v1.ai_voice_generator.generate(
+            style={"prompt": "Hello, how are you?", "voice_name": "Elon Musk"},
+            name="Generated Voice",
+            wait_for_completion=True,
+            download_outputs=True,
+            download_directory="outputs/",
+        )
+        ```
+        """
+
+        create_response = await self.create(
+            style=style,
+            name=name,
+            request_options=request_options,
+        )
+        logger.info(f"AI Voice Generator response: {create_response}")
+
+        audio_projects_client = AsyncAudioProjectsClient(base_client=self._base_client)
+        response = await audio_projects_client.check_result(
+            id=create_response.id,
+            wait_for_completion=wait_for_completion,
+            download_outputs=download_outputs,
+            download_directory=download_directory,
+        )
+
+        return response
+
     async def create(
         self,
         *,
diff --git a/magic_hour/resources/v1/audio_projects/__init__.py b/magic_hour/resources/v1/audio_projects/__init__.py
@@ -1,12 +1,12 @@
 from .client import (
     AsyncAudioProjectsClient,
-    V1AudioProjectsGetResponseWithDownloads,
     AudioProjectsClient,
+    V1AudioProjectsGetResponseWithDownloads,
 )
 
 
 __all__ = [
     "AsyncAudioProjectsClient",
-    "V1AudioProjectsGetResponseWithDownloads",
     "AudioProjectsClient",
+    "V1AudioProjectsGetResponseWithDownloads",
 ]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "magic_hour"
-version = "0.41.1"
+version = "0.42.0"
 description = "Python SDK for Magic Hour API"
 readme = "README.md"
 authors = []

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`		`- "id": "719cfc51-c189-4e81-9bf6-d25195f86e4f",`
	`2`	`+ "id": "2d9e6849-88f7-4a65-9c0f-1532d0c32953",`
`3`	`3`	`"tracked_paths": [`
`4`	`4`	`{`
`5`	`5`	`"editable": true,`
Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,12 @@`
`1`	`1`	`from .client import (`
`2`	`2`	`AsyncAudioProjectsClient,`
`3`		`- V1AudioProjectsGetResponseWithDownloads,`
`4`	`3`	`AudioProjectsClient,`
	`4`	`+ V1AudioProjectsGetResponseWithDownloads,`
`5`	`5`	`)`
`6`	`6`
`7`	`7`
`8`	`8`	`__all__ = [`
`9`	`9`	`"AsyncAudioProjectsClient",`
`10`		`- "V1AudioProjectsGetResponseWithDownloads",`
`11`	`10`	`"AudioProjectsClient",`
	`11`	`+ "V1AudioProjectsGetResponseWithDownloads",`
`12`	`12`	`]`