diff --git a/.mock/definition/empathic-voice/chat.yml b/.mock/definition/empathic-voice/chat.yml
index 6d9d6ac5..ee5e412a 100644
--- a/.mock/definition/empathic-voice/chat.yml
+++ b/.mock/definition/empathic-voice/chat.yml
@@ -113,12 +113,12 @@ channel:
         For more details, refer to the [Authentication Strategies
         Guide](/docs/introduction/api-key#authentication-strategies).
   messages:
-    publish:
-      origin: client
-      body: PublishEvent
     subscribe:
       origin: server
       body: SubscribeEvent
+    publish:
+      origin: client
+      body: PublishEvent
   examples:
     - messages:
         - type: publish
@@ -131,19 +131,6 @@ channel:
 imports:
   root: __package__.yml
 types:
-  PublishEvent:
-    discriminated: false
-    union:
-      - type: root.AudioInput
-      - type: root.SessionSettings
-      - type: root.UserInput
-      - type: root.AssistantInput
-      - type: root.ToolResponseMessage
-      - type: root.ToolErrorMessage
-      - type: root.PauseAssistantMessage
-      - type: root.ResumeAssistantMessage
-    source:
-      openapi: evi-asyncapi.json
   SubscribeEvent:
     discriminated: false
     union:
@@ -159,3 +146,16 @@ types:
       - type: root.ToolErrorMessage
     source:
       openapi: evi-asyncapi.json
+  PublishEvent:
+    discriminated: false
+    union:
+      - type: root.AudioInput
+      - type: root.SessionSettings
+      - type: root.UserInput
+      - type: root.AssistantInput
+      - type: root.ToolResponseMessage
+      - type: root.ToolErrorMessage
+      - type: root.PauseAssistantMessage
+      - type: root.ResumeAssistantMessage
+    source:
+      openapi: evi-asyncapi.json
diff --git a/.mock/definition/empathic-voice/configs.yml b/.mock/definition/empathic-voice/configs.yml
index 04623260..430d92d3 100644
--- a/.mock/definition/empathic-voice/configs.yml
+++ b/.mock/definition/empathic-voice/configs.yml
@@ -658,6 +658,7 @@ service:
       response:
         docs: Success
         type: text
+        status-code: 200
       errors:
         - root.BadRequestError
       examples:
diff --git a/.mock/definition/empathic-voice/prompts.yml b/.mock/definition/empathic-voice/prompts.yml
index ad364ad5..9b00c47d 100644
--- a/.mock/definition/empathic-voice/prompts.yml
+++ b/.mock/definition/empathic-voice/prompts.yml
@@ -377,6 +377,7 @@ service:
       response:
         docs: Success
         type: text
+        status-code: 200
       errors:
         - root.BadRequestError
       examples:
diff --git a/.mock/definition/empathic-voice/tools.yml b/.mock/definition/empathic-voice/tools.yml
index 1a0964bf..c926e230 100644
--- a/.mock/definition/empathic-voice/tools.yml
+++ b/.mock/definition/empathic-voice/tools.yml
@@ -427,6 +427,8 @@ service:
         content-type: application/json
       response:
         docs: Success
+        type: text
+        status-code: 200
       errors:
         - root.BadRequestError
       examples:
diff --git a/.mock/definition/expression-measurement/stream/stream.yml b/.mock/definition/expression-measurement/stream/stream.yml
index bdb6f041..1ed74b71 100644
--- a/.mock/definition/expression-measurement/stream/stream.yml
+++ b/.mock/definition/expression-measurement/stream/stream.yml
@@ -7,14 +7,14 @@ channel:
       type: string
       name: humeApiKey
   messages:
+    subscribe:
+      origin: server
+      body: SubscribeEvent
     publish:
       origin: client
       body:
         type: StreamModelsEndpointPayload
         docs: Models endpoint payload
-    subscribe:
-      origin: server
-      body: SubscribeEvent
   examples:
     - messages:
         - type: publish
@@ -22,209 +22,6 @@ channel:
         - type: subscribe
           body: {}
 types:
-  StreamFace:
-    docs: >
-      Configuration for the facial expression emotion model.
-
-
-      Note: Using the `reset_stream` parameter does not have any effect on face
-      identification. A single face identifier cache is maintained over a full
-      session whether `reset_stream` is used or not.
-    properties:
-      facs:
-        type: optional<map<string, unknown>>
-        docs: >-
-          Configuration for FACS predictions. If missing or null, no FACS
-          predictions will be generated.
-      descriptions:
-        type: optional<map<string, unknown>>
-        docs: >-
-          Configuration for Descriptions predictions. If missing or null, no
-          Descriptions predictions will be generated.
-      identify_faces:
-        type: optional<boolean>
-        docs: >
-          Whether to return identifiers for faces across frames. If true, unique
-          identifiers will be assigned to face bounding boxes to differentiate
-          different faces. If false, all faces will be tagged with an "unknown"
-          ID.
-        default: false
-      fps_pred:
-        type: optional<double>
-        docs: >
-          Number of frames per second to process. Other frames will be omitted
-          from the response.
-        default: 3
-      prob_threshold:
-        type: optional<double>
-        docs: >
-          Face detection probability threshold. Faces detected with a
-          probability less than this threshold will be omitted from the
-          response.
-        default: 3
-      min_face_size:
-        type: optional<double>
-        docs: >
-          Minimum bounding box side length in pixels to treat as a face. Faces
-          detected with a bounding box side length in pixels less than this
-          threshold will be omitted from the response.
-        default: 3
-    source:
-      openapi: streaming-asyncapi.yml
-    inline: true
-  StreamLanguage:
-    docs: Configuration for the language emotion model.
-    properties:
-      sentiment:
-        type: optional<map<string, unknown>>
-        docs: >-
-          Configuration for sentiment predictions. If missing or null, no
-          sentiment predictions will be generated.
-      toxicity:
-        type: optional<map<string, unknown>>
-        docs: >-
-          Configuration for toxicity predictions. If missing or null, no
-          toxicity predictions will be generated.
-      granularity:
-        type: optional<string>
-        docs: >-
-          The granularity at which to generate predictions. Values are `word`,
-          `sentence`, `utterance`, or `passage`. To get a single prediction for
-          the entire text of your streaming payload use `passage`. Default value
-          is `word`.
-    source:
-      openapi: streaming-asyncapi.yml
-    inline: true
-  Config:
-    docs: >
-      Configuration used to specify which models should be used and with what
-      settings.
-    properties:
-      burst:
-        type: optional<map<string, unknown>>
-        docs: |
-          Configuration for the vocal burst emotion model.
-
-          Note: Model configuration is not currently available in streaming.
-
-          Please use the default configuration by passing an empty object `{}`.
-      face:
-        type: optional<StreamFace>
-        docs: >
-          Configuration for the facial expression emotion model.
-
-
-          Note: Using the `reset_stream` parameter does not have any effect on
-          face identification. A single face identifier cache is maintained over
-          a full session whether `reset_stream` is used or not.
-      facemesh:
-        type: optional<map<string, unknown>>
-        docs: |
-          Configuration for the facemesh emotion model.
-
-          Note: Model configuration is not currently available in streaming.
-
-          Please use the default configuration by passing an empty object `{}`.
-      language:
-        type: optional<StreamLanguage>
-        docs: Configuration for the language emotion model.
-      prosody:
-        type: optional<map<string, unknown>>
-        docs: |
-          Configuration for the speech prosody emotion model.
-
-          Note: Model configuration is not currently available in streaming.
-
-          Please use the default configuration by passing an empty object `{}`.
-    source:
-      openapi: streaming-asyncapi.yml
-    inline: true
-  StreamModelsEndpointPayload:
-    docs: Models endpoint payload
-    properties:
-      data:
-        type: optional<string>
-      models:
-        type: optional<Config>
-        docs: >
-          Configuration used to specify which models should be used and with
-          what settings.
-      stream_window_ms:
-        type: optional<double>
-        docs: >
-          Length in milliseconds of streaming sliding window.
-
-
-          Extending the length of this window will prepend media context from
-          past payloads into the current payload.
-
-
-          For example, if on the first payload you send 500ms of data and on the
-          second payload you send an additional 500ms of data, a window of at
-          least 1000ms will allow the model to process all 1000ms of stream
-          data.
-
-
-          A window of 600ms would append the full 500ms of the second payload to
-          the last 100ms of the first payload.
-
-
-          Note: This feature is currently only supported for audio data and
-          audio models. For other file types and models this parameter will be
-          ignored.
-        default: 5000
-        validation:
-          min: 500
-          max: 10000
-      reset_stream:
-        type: optional<boolean>
-        docs: >
-          Whether to reset the streaming sliding window before processing the
-          current payload.
-
-
-          If this parameter is set to `true` then past context will be deleted
-          before processing the current payload.
-
-
-          Use reset_stream when one audio file is done being processed and you
-          do not want context to leak across files.
-        default: false
-      raw_text:
-        type: optional<boolean>
-        docs: >
-          Set to `true` to enable the data parameter to be parsed as raw text
-          rather than base64 encoded bytes.
-
-          This parameter is useful if you want to send text to be processed by
-          the language model, but it cannot be used with other file types like
-          audio, image, or video.
-        default: false
-      job_details:
-        type: optional<boolean>
-        docs: >
-          Set to `true` to get details about the job.
-
-
-          This parameter can be set in the same payload as data or it can be set
-          without data and models configuration to get the job details between
-          payloads.
-
-
-          This parameter is useful to get the unique job ID.
-        default: false
-      payload_id:
-        type: optional<string>
-        docs: >
-          Pass an arbitrary string as the payload ID and get it back at the top
-          level of the socket response.
-
-
-          This can be useful if you have multiple requests running
-          asynchronously and want to disambiguate responses as they are
-          received.
-    source:
-      openapi: streaming-asyncapi.yml
   StreamModelPredictionsJobDetails:
     docs: >
       If the job_details flag was set in the request, details about the current
@@ -434,5 +231,208 @@ types:
         docs: Warning message
     source:
       openapi: streaming-asyncapi.yml
+  StreamFace:
+    docs: >
+      Configuration for the facial expression emotion model.
+
+
+      Note: Using the `reset_stream` parameter does not have any effect on face
+      identification. A single face identifier cache is maintained over a full
+      session whether `reset_stream` is used or not.
+    properties:
+      facs:
+        type: optional<map<string, unknown>>
+        docs: >-
+          Configuration for FACS predictions. If missing or null, no FACS
+          predictions will be generated.
+      descriptions:
+        type: optional<map<string, unknown>>
+        docs: >-
+          Configuration for Descriptions predictions. If missing or null, no
+          Descriptions predictions will be generated.
+      identify_faces:
+        type: optional<boolean>
+        docs: >
+          Whether to return identifiers for faces across frames. If true, unique
+          identifiers will be assigned to face bounding boxes to differentiate
+          different faces. If false, all faces will be tagged with an "unknown"
+          ID.
+        default: false
+      fps_pred:
+        type: optional<double>
+        docs: >
+          Number of frames per second to process. Other frames will be omitted
+          from the response.
+        default: 3
+      prob_threshold:
+        type: optional<double>
+        docs: >
+          Face detection probability threshold. Faces detected with a
+          probability less than this threshold will be omitted from the
+          response.
+        default: 3
+      min_face_size:
+        type: optional<double>
+        docs: >
+          Minimum bounding box side length in pixels to treat as a face. Faces
+          detected with a bounding box side length in pixels less than this
+          threshold will be omitted from the response.
+        default: 3
+    source:
+      openapi: streaming-asyncapi.yml
+    inline: true
+  StreamLanguage:
+    docs: Configuration for the language emotion model.
+    properties:
+      sentiment:
+        type: optional<map<string, unknown>>
+        docs: >-
+          Configuration for sentiment predictions. If missing or null, no
+          sentiment predictions will be generated.
+      toxicity:
+        type: optional<map<string, unknown>>
+        docs: >-
+          Configuration for toxicity predictions. If missing or null, no
+          toxicity predictions will be generated.
+      granularity:
+        type: optional<string>
+        docs: >-
+          The granularity at which to generate predictions. Values are `word`,
+          `sentence`, `utterance`, or `passage`. To get a single prediction for
+          the entire text of your streaming payload use `passage`. Default value
+          is `word`.
+    source:
+      openapi: streaming-asyncapi.yml
+    inline: true
+  Config:
+    docs: >
+      Configuration used to specify which models should be used and with what
+      settings.
+    properties:
+      burst:
+        type: optional<map<string, unknown>>
+        docs: |
+          Configuration for the vocal burst emotion model.
+
+          Note: Model configuration is not currently available in streaming.
+
+          Please use the default configuration by passing an empty object `{}`.
+      face:
+        type: optional<StreamFace>
+        docs: >
+          Configuration for the facial expression emotion model.
+
+
+          Note: Using the `reset_stream` parameter does not have any effect on
+          face identification. A single face identifier cache is maintained over
+          a full session whether `reset_stream` is used or not.
+      facemesh:
+        type: optional<map<string, unknown>>
+        docs: |
+          Configuration for the facemesh emotion model.
+
+          Note: Model configuration is not currently available in streaming.
+
+          Please use the default configuration by passing an empty object `{}`.
+      language:
+        type: optional<StreamLanguage>
+        docs: Configuration for the language emotion model.
+      prosody:
+        type: optional<map<string, unknown>>
+        docs: |
+          Configuration for the speech prosody emotion model.
+
+          Note: Model configuration is not currently available in streaming.
+
+          Please use the default configuration by passing an empty object `{}`.
+    source:
+      openapi: streaming-asyncapi.yml
+    inline: true
+  StreamModelsEndpointPayload:
+    docs: Models endpoint payload
+    properties:
+      data:
+        type: optional<string>
+      models:
+        type: optional<Config>
+        docs: >
+          Configuration used to specify which models should be used and with
+          what settings.
+      stream_window_ms:
+        type: optional<double>
+        docs: >
+          Length in milliseconds of streaming sliding window.
+
+
+          Extending the length of this window will prepend media context from
+          past payloads into the current payload.
+
+
+          For example, if on the first payload you send 500ms of data and on the
+          second payload you send an additional 500ms of data, a window of at
+          least 1000ms will allow the model to process all 1000ms of stream
+          data.
+
+
+          A window of 600ms would append the full 500ms of the second payload to
+          the last 100ms of the first payload.
+
+
+          Note: This feature is currently only supported for audio data and
+          audio models. For other file types and models this parameter will be
+          ignored.
+        default: 5000
+        validation:
+          min: 500
+          max: 10000
+      reset_stream:
+        type: optional<boolean>
+        docs: >
+          Whether to reset the streaming sliding window before processing the
+          current payload.
+
+
+          If this parameter is set to `true` then past context will be deleted
+          before processing the current payload.
+
+
+          Use reset_stream when one audio file is done being processed and you
+          do not want context to leak across files.
+        default: false
+      raw_text:
+        type: optional<boolean>
+        docs: >
+          Set to `true` to enable the data parameter to be parsed as raw text
+          rather than base64 encoded bytes.
+
+          This parameter is useful if you want to send text to be processed by
+          the language model, but it cannot be used with other file types like
+          audio, image, or video.
+        default: false
+      job_details:
+        type: optional<boolean>
+        docs: >
+          Set to `true` to get details about the job.
+
+
+          This parameter can be set in the same payload as data or it can be set
+          without data and models configuration to get the job details between
+          payloads.
+
+
+          This parameter is useful to get the unique job ID.
+        default: false
+      payload_id:
+        type: optional<string>
+        docs: >
+          Pass an arbitrary string as the payload ID and get it back at the top
+          level of the socket response.
+
+
+          This can be useful if you have multiple requests running
+          asynchronously and want to disambiguate responses as they are
+          received.
+    source:
+      openapi: streaming-asyncapi.yml
 imports:
   streamRoot: __package__.yml
diff --git a/.mock/definition/tts/__package__.yml b/.mock/definition/tts/__package__.yml
index 5c7af469..e094b295 100644
--- a/.mock/definition/tts/__package__.yml
+++ b/.mock/definition/tts/__package__.yml
@@ -73,11 +73,12 @@ service:
                   audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
                   snippets:
                     - - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
+                        generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
                         id: 37b1b1b1-1b1b-1b1b-1b1b-1b1b1b1b1b1b
                         text: >-
                           Beauty is no quality in things themselves: It exists
                           merely in the mind which contemplates them.
-                        generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
+                        utterance_index: 0
               request_id: 66e01f90-4501-4aa0-bbaf-74f45dc15aa725906
     synthesize-file:
       path: /v0/tts/file
@@ -260,9 +261,10 @@ types:
           multiple requests.
       snippets:
         docs: >-
-          A list of speech segments, each containing a portion of the original
-          text optimized for  natural speech delivery. These segments represent
-          the input text divided into more natural-sounding units.
+          A list of snippet groups where each group corresponds to an utterance
+          in the request. Each  group contains segmented snippets that represent
+          the original utterance divided into more natural-sounding units
+          optimized for speech delivery.
         type: list<list<Snippet>>
     source:
       openapi: tts-openapi.yml
diff --git a/.mock/definition/tts/voices.yml b/.mock/definition/tts/voices.yml
index 8718b05c..e7a2b496 100644
--- a/.mock/definition/tts/voices.yml
+++ b/.mock/definition/tts/voices.yml
@@ -44,7 +44,8 @@ service:
 
               For example, if `page_size` is set to 10, each page will include
               up to 10 items. Defaults to 10.
-          ascending_order: optional<boolean>
+          ascending_order:
+            type: optional<boolean>
       response:
         docs: Success
         type: root.ReturnPagedVoices
@@ -59,8 +60,10 @@ service:
               voices_page:
                 - name: David Hume
                   id: c42352c0-4566-455d-b180-0f654b65b525
+                  provider: CUSTOM_VOICE
                 - name: Goliath Hume
                   id: d87352b0-26a3-4b11-081b-d157a5674d19
+                  provider: CUSTOM_VOICE
     create:
       path: /v0/tts/voices
       method: POST
@@ -100,6 +103,7 @@ service:
             body:
               name: David Hume
               id: c42352c0-4566-455d-b180-0f654b65b525
+              provider: CUSTOM_VOICE
     delete:
       path: /v0/tts/voices
       method: DELETE
diff --git a/.mock/fern.config.json b/.mock/fern.config.json
index 9d619049..a8152b41 100644
--- a/.mock/fern.config.json
+++ b/.mock/fern.config.json
@@ -1,4 +1,4 @@
 {
   "organization" : "hume",
-  "version" : "0.57.14"
+  "version" : "0.56.23"
 }
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 36ae50e1..1e56dcda 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "hume"
-version = "0.7.13"
+version = "0.7.14"
 description = "A Python SDK for Hume AI"
 readme = "README.md"
 authors = []
diff --git a/src/hume/tts/types/return_generation.py b/src/hume/tts/types/return_generation.py
index 27672325..13af43e1 100644
--- a/src/hume/tts/types/return_generation.py
+++ b/src/hume/tts/types/return_generation.py
@@ -32,7 +32,7 @@ class ReturnGeneration(UniversalBaseModel):
 
     snippets: typing.List[typing.List[Snippet]] = pydantic.Field()
     """
-    A list of speech segments, each containing a portion of the original text optimized for  natural speech delivery. These segments represent the input text divided into more natural-sounding units.
+    A list of snippet groups where each group corresponds to an utterance in the request. Each  group contains segmented snippets that represent the original utterance divided into more natural-sounding units optimized for speech delivery.
     """
 
     if IS_PYDANTIC_V2:
diff --git a/tests/tts/test_root.py b/tests/tts/test_root.py
index c95a8215..c70eb237 100644
--- a/tests/tts/test_root.py
+++ b/tests/tts/test_root.py
@@ -22,9 +22,10 @@ async def test_synthesize_json(client: HumeClient, async_client: AsyncHumeClient
                     [
                         {
                             "audio": "//PExAA0DDYRvkpNfhv3JI5JZ...etc.",
+                            "generation_id": "795c949a-1510-4a80-9646-7d0863b023ab",
                             "id": "37b1b1b1-1b1b-1b1b-1b1b-1b1b1b1b1b1b",
                             "text": "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
-                            "generation_id": "795c949a-1510-4a80-9646-7d0863b023ab",
+                            "utterance_index": 0,
                         }
                     ]
                 ],
@@ -44,7 +45,20 @@ async def test_synthesize_json(client: HumeClient, async_client: AsyncHumeClient
                     "audio": None,
                     "snippets": (
                         "list",
-                        {0: ("list", {0: {"audio": None, "id": None, "text": None, "generation_id": None}})},
+                        {
+                            0: (
+                                "list",
+                                {
+                                    0: {
+                                        "audio": None,
+                                        "generation_id": None,
+                                        "id": None,
+                                        "text": None,
+                                        "utterance_index": "integer",
+                                    }
+                                },
+                            )
+                        },
                     ),
                 }
             },
diff --git a/tests/tts/test_voices.py b/tests/tts/test_voices.py
index bdb8f88c..564a3007 100644
--- a/tests/tts/test_voices.py
+++ b/tests/tts/test_voices.py
@@ -9,12 +9,15 @@
 async def test_list_(client: HumeClient, async_client: AsyncHumeClient) -> None:
     expected_response: typing.Any = {
         "voices_page": [
-            {"name": "David Hume", "id": "c42352c0-4566-455d-b180-0f654b65b525"},
-            {"name": "Goliath Hume", "id": "d87352b0-26a3-4b11-081b-d157a5674d19"},
+            {"name": "David Hume", "id": "c42352c0-4566-455d-b180-0f654b65b525", "provider": "CUSTOM_VOICE"},
+            {"name": "Goliath Hume", "id": "d87352b0-26a3-4b11-081b-d157a5674d19", "provider": "CUSTOM_VOICE"},
         ]
     }
     expected_types: typing.Any = {
-        "voices_page": ("list", {0: {"name": None, "id": None}, 1: {"name": None, "id": None}})
+        "voices_page": (
+            "list",
+            {0: {"name": None, "id": None, "provider": None}, 1: {"name": None, "id": None, "provider": None}},
+        )
     }
     response = client.tts.voices.list(provider="CUSTOM_VOICE")
     validate_response(response, expected_response, expected_types)
@@ -24,8 +27,12 @@ async def test_list_(client: HumeClient, async_client: AsyncHumeClient) -> None:
 
 
 async def test_create(client: HumeClient, async_client: AsyncHumeClient) -> None:
-    expected_response: typing.Any = {"name": "David Hume", "id": "c42352c0-4566-455d-b180-0f654b65b525"}
-    expected_types: typing.Any = {"name": None, "id": None}
+    expected_response: typing.Any = {
+        "name": "David Hume",
+        "id": "c42352c0-4566-455d-b180-0f654b65b525",
+        "provider": "CUSTOM_VOICE",
+    }
+    expected_types: typing.Any = {"name": None, "id": None, "provider": None}
     response = client.tts.voices.create(generation_id="795c949a-1510-4a80-9646-7d0863b023ab", name="David Hume")
     validate_response(response, expected_response, expected_types)