From 34b4ad77e01d61c96f9586819af806bcc0408389 Mon Sep 17 00:00:00 2001
From: Darren Cohen <39422044+dargilco@users.noreply.github.com>
Date: Fri, 14 Feb 2025 11:21:07 -0800
Subject: [PATCH 1/4] Update version to beta 10

---
 sdk/ai/azure-ai-inference/CHANGELOG.md                   | 8 ++++++++
 sdk/ai/azure-ai-inference/azure/ai/inference/_version.py | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
index 2d32b28ae69c..958fa5d51d29 100644
--- a/sdk/ai/azure-ai-inference/CHANGELOG.md
+++ b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Release History
 
+## 1.0.0b10 (Unreleased)
+
+### Features Added
+
+### Bugs Fixed
+
+### Breaking Changes
+
 ## 1.0.0b9 (2025-02-14)
 
 ### Features Added
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py
index b1c2836b6921..9ab0a006e0d0 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py
@@ -6,4 +6,4 @@
 # Changes may cause incorrect behavior and will be lost if the code is regenerated.
 # --------------------------------------------------------------------------
 
-VERSION = "1.0.0b9"
+VERSION = "1.0.0b10"

From aedffe70619daf4dba7a3c2d624b8e3e53712f68 Mon Sep 17 00:00:00 2001
From: Travis Angevine <trangevi@microsoft.com>
Date: Thu, 20 Feb 2025 10:37:39 -0800
Subject: [PATCH 2/4] Generation with new body parameter (#39803)

* Changes after generation

Signed-off-by: trangevi <trangevi@microsoft.com>

* pylint fix

Signed-off-by: trangevi <trangevi@microsoft.com>

---------

Signed-off-by: trangevi <trangevi@microsoft.com>
---
 .../azure/ai/inference/_model_base.py         |   2 +-
 .../ai/inference/_operations/_operations.py   | 242 +-------
 .../azure/ai/inference/_serialization.py      |   4 +-
 .../inference/aio/_operations/_operations.py  | 243 +-------
 .../azure/ai/inference/aio/_patch.py          |   2 +-
 .../azure/ai/inference/models/_models.py      | 536 +++++++++++++++---
 .../azure/ai/inference/prompts/_invoker.py    |   1 +
 .../azure/ai/inference/prompts/_mustache.py   |   1 +
 .../azure/ai/inference/prompts/_parsers.py    |   1 +
 .../azure/ai/inference/prompts/_patch.py      |   1 +
 .../ai/inference/prompts/_prompty_utils.py    |   1 +
 .../azure/ai/inference/tracing.py             |   1 +
 ..._chat_completions_from_input_dict_async.py |   1 +
 ...ompletions_streaming_azure_openai_async.py |   1 +
 .../async_samples/sample_embeddings_async.py  |   1 +
 .../async_samples/sample_load_client_async.py |   1 +
 .../sample_chat_completions_azure_openai.py   |   1 +
 ...sample_chat_completions_from_input_dict.py |   1 +
 ...pletions_from_input_dict_with_image_url.py |   1 +
 ...at_completions_from_input_prompt_string.py |   1 +
 ...e_chat_completions_streaming_with_tools.py |   1 +
 .../sample_chat_completions_with_history.py   |   1 +
 .../sample_chat_completions_with_image_url.py |   1 +
 ...chat_completions_with_structured_output.py |   1 +
 ...letions_with_structured_output_pydantic.py |   1 +
 .../sample_chat_completions_with_tools.py     |   1 +
 .../samples/sample_embeddings_azure_openai.py |   1 +
 .../tests/gen_ai_trace_verifier.py            |   1 +
 .../tests/model_inference_test_base.py        |   1 +
 .../tests/test_chat_completions_client.py     |   1 +
 .../test_chat_completions_client_async.py     |   1 +
 .../tests/test_client_tracing.py              |   2 +-
 .../tests/test_embeddings_client_async.py     |   1 +
 .../test_image_embeddings_client_async.py     |   1 +
 .../azure-ai-inference/tests/test_prompts.py  |   1 +
 .../tests/test_unit_tests.py                  |   1 +
 sdk/ai/azure-ai-inference/tsp-location.yaml   |   2 +-
 37 files changed, 516 insertions(+), 546 deletions(-)

diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
index 359ecebe23f7..3072ee252ed9 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines,arguments-differ,signature-differs,no-member
+# pylint: disable=too-many-lines
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
index 78e5ee353228..b48a0dc52af5 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py
@@ -1,4 +1,3 @@
-# pylint: disable=too-many-locals
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
@@ -9,7 +8,7 @@
 from io import IOBase
 import json
 import sys
-from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+from typing import Any, Callable, Dict, IO, Optional, TypeVar, Union, overload
 
 from azure.core.exceptions import (
     ClientAuthenticationError,
@@ -36,7 +35,6 @@
 else:
     from typing import MutableMapping  # type: ignore
 JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
-_Unset: Any = object()
 T = TypeVar("T")
 ClsType = Optional[Callable[[PipelineResponse[HttpRequest, HttpResponse], T, Dict[str, Any]], Any]]
 
@@ -184,24 +182,10 @@ class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
     @overload
     def _complete(
         self,
+        body: _models._models.ChatCompletionsOptions,
         *,
-        messages: List[_models._models.ChatRequestMessage],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        frequency_penalty: Optional[float] = None,
-        stream_parameter: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
-        stop: Optional[List[str]] = None,
-        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
-        tool_choice: Optional[
-            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
-        ] = None,
-        seed: Optional[int] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.ChatCompletions: ...
     @overload
@@ -226,24 +210,9 @@ def _complete(
     @distributed_trace
     def _complete(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.ChatCompletionsOptions, JSON, IO[bytes]],
         *,
-        messages: List[_models._models.ChatRequestMessage] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        frequency_penalty: Optional[float] = None,
-        stream_parameter: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
-        stop: Optional[List[str]] = None,
-        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
-        tool_choice: Optional[
-            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
-        ] = None,
-        seed: Optional[int] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.ChatCompletions:
         """Gets chat completions for the provided chat messages.
@@ -252,93 +221,14 @@ def _complete(
         provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
         on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword messages: The collection of context messages associated with this chat completions
-         request.
-         Typical usage begins with a chat message for the System role that provides instructions for
-         the behavior of the assistant, followed by alternating messages between the User and
-         Assistant roles. Required.
-        :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+        :param body: The options for chat completions. Is one of the following types:
+         ChatCompletionsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.ChatCompletionsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword frequency_penalty: A value that influences the probability of generated tokens
-         appearing based on their cumulative
-         frequency in generated text.
-         Positive values will make tokens less likely to appear as their frequency increases and
-         decrease the likelihood of the model repeating the same statements verbatim.
-         Supported range is [-2, 2]. Default value is None.
-        :paramtype frequency_penalty: float
-        :keyword stream_parameter: A value indicating whether chat completions should be streamed for
-         this request. Default value is None.
-        :paramtype stream_parameter: bool
-        :keyword presence_penalty: A value that influences the probability of generated tokens
-         appearing based on their existing
-         presence in generated text.
-         Positive values will make tokens less likely to appear when they already exist and increase
-         the
-         model's likelihood to output new topics.
-         Supported range is [-2, 2]. Default value is None.
-        :paramtype presence_penalty: float
-        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
-         generated completions.
-         Higher values will make output more random while lower values will make results more focused
-         and deterministic.
-         It is not recommended to modify temperature and top_p for the same completions request as the
-         interaction of these two settings is difficult to predict.
-         Supported range is [0, 1]. Default value is None.
-        :paramtype temperature: float
-        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
-         causes the
-         model to consider the results of tokens with the provided probability mass. As an example, a
-         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-         considered.
-         It is not recommended to modify temperature and top_p for the same completions request as the
-         interaction of these two settings is difficult to predict.
-         Supported range is [0, 1]. Default value is None.
-        :paramtype top_p: float
-        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
-        :paramtype max_tokens: int
-        :keyword response_format: An object specifying the format that the model must output.
-
-         Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
-         which ensures the model will match your supplied JSON schema.
-
-         Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
-         model generates is valid JSON.
-
-         **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
-         yourself via a system or user message. Without this, the model may generate an unending stream
-         of whitespace until the generation reaches the token limit, resulting in a long-running and
-         seemingly "stuck" request. Also note that the message content may be partially cut off if
-         ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
-         conversation exceeded the max context length. Default value is None.
-        :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
-        :keyword stop: A collection of textual sequences that will end completions generation. Default
-         value is None.
-        :paramtype stop: list[str]
-        :keyword tools: A list of tools the model may request to call. Currently, only functions are
-         supported as a tool. The model
-         may response with a function call request and provide the input arguments in JSON format for
-         that function. Default value is None.
-        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
-        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
-         use for the chat completions response. Is either a Union[str,
-         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
-         Default value is None.
-        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
-         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
-        :keyword seed: If specified, the system will make a best effort to sample deterministically
-         such that repeated requests with the
-         same seed and parameters should return the same result. Determinism is not guaranteed. Default
-         value is None.
-        :paramtype seed: int
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.ChatCompletions
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -357,25 +247,6 @@ def _complete(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if messages is _Unset:
-                raise TypeError("missing required argument: messages")
-            body = {
-                "frequency_penalty": frequency_penalty,
-                "max_tokens": max_tokens,
-                "messages": messages,
-                "model": model,
-                "presence_penalty": presence_penalty,
-                "response_format": response_format,
-                "seed": seed,
-                "stop": stop,
-                "stream": stream_parameter,
-                "temperature": temperature,
-                "tool_choice": tool_choice,
-                "tools": tools,
-                "top_p": top_p,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
@@ -488,14 +359,10 @@ class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
     @overload
     def _embed(
         self,
+        body: _models._models.EmbeddingsOptions,
         *,
-        input: List[str],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
@@ -520,46 +387,22 @@ def _embed(
     @distributed_trace
     def _embed(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.EmbeddingsOptions, JSON, IO[bytes]],
         *,
-        input: List[str] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult:
         """Return the embedding vectors for given text prompts.
         The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword input: Input text to embed, encoded as a string or array of tokens.
-         To embed multiple inputs in a single request, pass an array
-         of strings or array of token arrays. Required.
-        :paramtype input: list[str]
+        :param body: The body of the request containing the options for generating embeddings. Is one
+         of the following types: EmbeddingsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.EmbeddingsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
-         have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Default value is
-         None.
-        :paramtype dimensions: int
-        :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
-         values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
-        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
-        :keyword input_type: Optional. The type of the input.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "text", "query", and "document". Default value is None.
-        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.EmbeddingsResult
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -578,17 +421,6 @@ def _embed(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if input is _Unset:
-                raise TypeError("missing required argument: input")
-            body = {
-                "dimensions": dimensions,
-                "encoding_format": encoding_format,
-                "input": input,
-                "input_type": input_type,
-                "model": model,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
@@ -701,14 +533,10 @@ class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
     @overload
     def _embed(
         self,
+        body: _models._models.ImageEmbeddingsOptions,
         *,
-        input: List[_models.ImageEmbeddingInput],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
@@ -733,49 +561,22 @@ def _embed(
     @distributed_trace
     def _embed(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.ImageEmbeddingsOptions, JSON, IO[bytes]],
         *,
-        input: List[_models.ImageEmbeddingInput] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult:
         """Return the embedding vectors for given images.
         The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
-         array.
-         The input must not exceed the max input tokens for the model. Required.
-        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :param body: The body of the request containing options for image embeddings. Is one of the
+         following types: ImageEmbeddingsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.ImageEmbeddingsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
-         have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Default value is
-         None.
-        :paramtype dimensions: int
-        :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
-         should have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
-        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
-        :keyword input_type: Optional. The type of the input.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "text", "query", and "document". Default value is None.
-        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.EmbeddingsResult
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -794,17 +595,6 @@ def _embed(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if input is _Unset:
-                raise TypeError("missing required argument: input")
-            body = {
-                "dimensions": dimensions,
-                "encoding_format": encoding_format,
-                "input": input,
-                "input_type": input_type,
-                "model": model,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
index a066e16a64dd..e2a20b1d534c 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines
+# pylint: disable=line-too-long,useless-suppression,too-many-lines
 # --------------------------------------------------------------------------
 #
 # Copyright (c) Microsoft Corporation. All rights reserved.
@@ -1361,7 +1361,7 @@ def xml_key_extractor(attr, attr_desc, data):  # pylint: disable=unused-argument
         # Iter and wrapped, should have found one node only (the wrap one)
         if len(children) != 1:
             raise DeserializationError(
-                "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format(  # pylint: disable=line-too-long
+                "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format(
                     xml_name
                 )
             )
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
index 62ec772f6dae..c481e4719835 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-locals
+# pylint: disable=line-too-long,useless-suppression
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
@@ -9,7 +9,7 @@
 from io import IOBase
 import json
 import sys
-from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+from typing import Any, Callable, Dict, IO, Optional, TypeVar, Union, overload
 
 from azure.core.exceptions import (
     ClientAuthenticationError,
@@ -43,7 +43,6 @@
 else:
     from typing import MutableMapping  # type: ignore
 JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
-_Unset: Any = object()
 T = TypeVar("T")
 ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]]
 
@@ -53,24 +52,10 @@ class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
     @overload
     async def _complete(
         self,
+        body: _models._models.ChatCompletionsOptions,
         *,
-        messages: List[_models._models.ChatRequestMessage],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        frequency_penalty: Optional[float] = None,
-        stream_parameter: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
-        stop: Optional[List[str]] = None,
-        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
-        tool_choice: Optional[
-            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
-        ] = None,
-        seed: Optional[int] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.ChatCompletions: ...
     @overload
@@ -95,24 +80,9 @@ async def _complete(
     @distributed_trace_async
     async def _complete(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.ChatCompletionsOptions, JSON, IO[bytes]],
         *,
-        messages: List[_models._models.ChatRequestMessage] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        frequency_penalty: Optional[float] = None,
-        stream_parameter: Optional[bool] = None,
-        presence_penalty: Optional[float] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
-        stop: Optional[List[str]] = None,
-        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
-        tool_choice: Optional[
-            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
-        ] = None,
-        seed: Optional[int] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.ChatCompletions:
         """Gets chat completions for the provided chat messages.
@@ -121,93 +91,14 @@ async def _complete(
         provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
         on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword messages: The collection of context messages associated with this chat completions
-         request.
-         Typical usage begins with a chat message for the System role that provides instructions for
-         the behavior of the assistant, followed by alternating messages between the User and
-         Assistant roles. Required.
-        :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+        :param body: The options for chat completions. Is one of the following types:
+         ChatCompletionsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.ChatCompletionsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword frequency_penalty: A value that influences the probability of generated tokens
-         appearing based on their cumulative
-         frequency in generated text.
-         Positive values will make tokens less likely to appear as their frequency increases and
-         decrease the likelihood of the model repeating the same statements verbatim.
-         Supported range is [-2, 2]. Default value is None.
-        :paramtype frequency_penalty: float
-        :keyword stream_parameter: A value indicating whether chat completions should be streamed for
-         this request. Default value is None.
-        :paramtype stream_parameter: bool
-        :keyword presence_penalty: A value that influences the probability of generated tokens
-         appearing based on their existing
-         presence in generated text.
-         Positive values will make tokens less likely to appear when they already exist and increase
-         the
-         model's likelihood to output new topics.
-         Supported range is [-2, 2]. Default value is None.
-        :paramtype presence_penalty: float
-        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
-         generated completions.
-         Higher values will make output more random while lower values will make results more focused
-         and deterministic.
-         It is not recommended to modify temperature and top_p for the same completions request as the
-         interaction of these two settings is difficult to predict.
-         Supported range is [0, 1]. Default value is None.
-        :paramtype temperature: float
-        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
-         causes the
-         model to consider the results of tokens with the provided probability mass. As an example, a
-         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-         considered.
-         It is not recommended to modify temperature and top_p for the same completions request as the
-         interaction of these two settings is difficult to predict.
-         Supported range is [0, 1]. Default value is None.
-        :paramtype top_p: float
-        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
-        :paramtype max_tokens: int
-        :keyword response_format: An object specifying the format that the model must output.
-
-         Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
-         which ensures the model will match your supplied JSON schema.
-
-         Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
-         model generates is valid JSON.
-
-         **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
-         yourself via a system or user message. Without this, the model may generate an unending stream
-         of whitespace until the generation reaches the token limit, resulting in a long-running and
-         seemingly "stuck" request. Also note that the message content may be partially cut off if
-         ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
-         conversation exceeded the max context length. Default value is None.
-        :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
-        :keyword stop: A collection of textual sequences that will end completions generation. Default
-         value is None.
-        :paramtype stop: list[str]
-        :keyword tools: A list of tools the model may request to call. Currently, only functions are
-         supported as a tool. The model
-         may response with a function call request and provide the input arguments in JSON format for
-         that function. Default value is None.
-        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
-        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
-         use for the chat completions response. Is either a Union[str,
-         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
-         Default value is None.
-        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
-         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
-        :keyword seed: If specified, the system will make a best effort to sample deterministically
-         such that repeated requests with the
-         same seed and parameters should return the same result. Determinism is not guaranteed. Default
-         value is None.
-        :paramtype seed: int
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.ChatCompletions
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -226,25 +117,6 @@ async def _complete(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if messages is _Unset:
-                raise TypeError("missing required argument: messages")
-            body = {
-                "frequency_penalty": frequency_penalty,
-                "max_tokens": max_tokens,
-                "messages": messages,
-                "model": model,
-                "presence_penalty": presence_penalty,
-                "response_format": response_format,
-                "seed": seed,
-                "stop": stop,
-                "stream": stream_parameter,
-                "temperature": temperature,
-                "tool_choice": tool_choice,
-                "tools": tools,
-                "top_p": top_p,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
@@ -357,14 +229,10 @@ class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
     @overload
     async def _embed(
         self,
+        body: _models._models.EmbeddingsOptions,
         *,
-        input: List[str],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
@@ -389,46 +257,22 @@ async def _embed(
     @distributed_trace_async
     async def _embed(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.EmbeddingsOptions, JSON, IO[bytes]],
         *,
-        input: List[str] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult:
         """Return the embedding vectors for given text prompts.
         The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword input: Input text to embed, encoded as a string or array of tokens.
-         To embed multiple inputs in a single request, pass an array
-         of strings or array of token arrays. Required.
-        :paramtype input: list[str]
+        :param body: The body of the request containing the options for generating embeddings. Is one
+         of the following types: EmbeddingsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.EmbeddingsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
-         have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Default value is
-         None.
-        :paramtype dimensions: int
-        :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
-         values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
-        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
-        :keyword input_type: Optional. The type of the input.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "text", "query", and "document". Default value is None.
-        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.EmbeddingsResult
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -447,17 +291,6 @@ async def _embed(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if input is _Unset:
-                raise TypeError("missing required argument: input")
-            body = {
-                "dimensions": dimensions,
-                "encoding_format": encoding_format,
-                "input": input,
-                "input_type": input_type,
-                "model": model,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
@@ -570,14 +403,10 @@ class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
     @overload
     async def _embed(
         self,
+        body: _models._models.ImageEmbeddingsOptions,
         *,
-        input: List[_models.ImageEmbeddingInput],
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
         content_type: str = "application/json",
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult: ...
     @overload
@@ -602,49 +431,22 @@ async def _embed(
     @distributed_trace_async
     async def _embed(
         self,
-        body: Union[JSON, IO[bytes]] = _Unset,
+        body: Union[_models._models.ImageEmbeddingsOptions, JSON, IO[bytes]],
         *,
-        input: List[_models.ImageEmbeddingInput] = _Unset,
         extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
-        dimensions: Optional[int] = None,
-        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
-        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
-        model: Optional[str] = None,
         **kwargs: Any
     ) -> _models.EmbeddingsResult:
         """Return the embedding vectors for given images.
         The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
 
-        :param body: Is either a JSON type or a IO[bytes] type. Required.
-        :type body: JSON or IO[bytes]
-        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
-         array.
-         The input must not exceed the max input tokens for the model. Required.
-        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :param body: The body of the request containing options for image embeddings. Is one of the
+         following types: ImageEmbeddingsOptions, JSON, IO[bytes] Required.
+        :type body: ~azure.ai.inference.models._models.ImageEmbeddingsOptions or JSON or IO[bytes]
         :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
          are passed in the JSON request payload.
          This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
          "pass-through". Default value is None.
         :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
-        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
-         have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Default value is
-         None.
-        :paramtype dimensions: int
-        :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
-         should have.
-         Passing null causes the model to use its default value.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
-        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
-        :keyword input_type: Optional. The type of the input.
-         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
-         "text", "query", and "document". Default value is None.
-        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
-        :keyword model: ID of the specific AI model to use, if more than one model is available on the
-         endpoint. Default value is None.
-        :paramtype model: str
         :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
         :rtype: ~azure.ai.inference.models.EmbeddingsResult
         :raises ~azure.core.exceptions.HttpResponseError:
@@ -663,17 +465,6 @@ async def _embed(
         content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
         cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
 
-        if body is _Unset:
-            if input is _Unset:
-                raise TypeError("missing required argument: input")
-            body = {
-                "dimensions": dimensions,
-                "encoding_format": encoding_format,
-                "input": input,
-                "input_type": input_type,
-                "model": model,
-            }
-            body = {k: v for k, v in body.items() if v is not None}
         content_type = content_type or "application/json"
         _content = None
         if isinstance(body, (IOBase, bytes)):
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
index 2f9873805aa6..9e084d8dd6d4 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines
+# pylint: disable=too-many-lines,line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
index 53934528434f..85598618489a 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines
+# pylint: disable=line-too-long,useless-suppression,too-many-lines
 # coding=utf-8
 # --------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
@@ -30,7 +30,7 @@ class ContentItem(_model_base.Model):
     """
 
     __mapping__: Dict[str, _model_base.Model] = {}
-    type: str = rest_discriminator(name="type")
+    type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])
     """The discriminated object type. Required. Default value is None."""
 
     @overload
@@ -61,10 +61,10 @@ class AudioContentItem(ContentItem, discriminator="input_audio"):
     :vartype input_audio: ~azure.ai.inference.models.InputAudio
     """
 
-    type: Literal["input_audio"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["input_audio"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The discriminated object type: always 'input_audio' for this type. Required. Default value is
      \"input_audio\"."""
-    input_audio: "_models.InputAudio" = rest_field()
+    input_audio: "_models.InputAudio" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The details of the input audio. Required."""
 
     @overload
@@ -101,12 +101,14 @@ class ChatChoice(_model_base.Model):
     :vartype message: ~azure.ai.inference.models.ChatResponseMessage
     """
 
-    index: int = rest_field()
+    index: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The ordered index associated with this chat completions choice. Required."""
-    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field()
+    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The reason that this chat completions choice completed its generated. Required. Known values
      are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\"."""
-    message: "_models.ChatResponseMessage" = rest_field()
+    message: "_models.ChatResponseMessage" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The chat message for a given chat completions prompt. Required."""
 
     @overload
@@ -153,18 +155,20 @@ class ChatCompletions(_model_base.Model):
     :vartype usage: ~azure.ai.inference.models.CompletionsUsage
     """
 
-    id: str = rest_field()
+    id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """A unique identifier associated with this chat completions response. Required."""
-    created: datetime.datetime = rest_field(format="unix-timestamp")
+    created: datetime.datetime = rest_field(
+        visibility=["read", "create", "update", "delete", "query"], format="unix-timestamp"
+    )
     """The first timestamp associated with generation activity for this completions response,
      represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required."""
-    model: str = rest_field()
+    model: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The model used for the chat completion. Required."""
-    choices: List["_models.ChatChoice"] = rest_field()
+    choices: List["_models.ChatChoice"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The collection of completions choices associated with this completions response.
      Generally, ``n`` choices are generated per provided prompt with a default value of 1.
      Token limits and other settings may limit the number of choices generated. Required."""
-    usage: "_models.CompletionsUsage" = rest_field()
+    usage: "_models.CompletionsUsage" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Usage information for tokens processed and generated as part of this completions operation.
      Required."""
 
@@ -201,10 +205,12 @@ class ChatCompletionsNamedToolChoice(_model_base.Model):
     :vartype function: ~azure.ai.inference.models.ChatCompletionsNamedToolChoiceFunction
     """
 
-    type: Literal["function"] = rest_field()
+    type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The type of the tool. Currently, only ``function`` is supported. Required. Default value is
      \"function\"."""
-    function: "_models.ChatCompletionsNamedToolChoiceFunction" = rest_field()
+    function: "_models.ChatCompletionsNamedToolChoiceFunction" = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The function that should be called. Required."""
 
     @overload
@@ -234,7 +240,7 @@ class ChatCompletionsNamedToolChoiceFunction(_model_base.Model):
     :vartype name: str
     """
 
-    name: str = rest_field()
+    name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The name of the function that should be called. Required."""
 
     @overload
@@ -255,6 +261,198 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
 
+class ChatCompletionsOptions(_model_base.Model):
+    """The configuration information for a chat completions request.
+    Completions support a wide variety of tasks and generate text that continues from or
+    "completes"
+    provided prompt data.
+
+    :ivar messages: The collection of context messages associated with this chat completions
+     request.
+     Typical usage begins with a chat message for the System role that provides instructions for
+     the behavior of the assistant, followed by alternating messages between the User and
+     Assistant roles. Required.
+    :vartype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+    :ivar frequency_penalty: A value that influences the probability of generated tokens appearing
+     based on their cumulative
+     frequency in generated text.
+     Positive values will make tokens less likely to appear as their frequency increases and
+     decrease the likelihood of the model repeating the same statements verbatim.
+     Supported range is [-2, 2].
+    :vartype frequency_penalty: float
+    :ivar stream: A value indicating whether chat completions should be streamed for this request.
+    :vartype stream: bool
+    :ivar presence_penalty: A value that influences the probability of generated tokens appearing
+     based on their existing
+     presence in generated text.
+     Positive values will make tokens less likely to appear when they already exist and increase the
+     model's likelihood to output new topics.
+     Supported range is [-2, 2].
+    :vartype presence_penalty: float
+    :ivar temperature: The sampling temperature to use that controls the apparent creativity of
+     generated completions.
+     Higher values will make output more random while lower values will make results more focused
+     and deterministic.
+     It is not recommended to modify temperature and top_p for the same completions request as the
+     interaction of these two settings is difficult to predict.
+     Supported range is [0, 1].
+    :vartype temperature: float
+    :ivar top_p: An alternative to sampling with temperature called nucleus sampling. This value
+     causes the
+     model to consider the results of tokens with the provided probability mass. As an example, a
+     value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     considered.
+     It is not recommended to modify temperature and top_p for the same completions request as the
+     interaction of these two settings is difficult to predict.
+     Supported range is [0, 1].
+    :vartype top_p: float
+    :ivar max_tokens: The maximum number of tokens to generate.
+    :vartype max_tokens: int
+    :ivar response_format: An object specifying the format that the model must output.
+
+     Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs which
+     ensures the model will match your supplied JSON schema.
+
+     Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the model
+     generates is valid JSON.
+
+     **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+     yourself via a system or user message. Without this, the model may generate an unending stream
+     of whitespace until the generation reaches the token limit, resulting in a long-running and
+     seemingly "stuck" request. Also note that the message content may be partially cut off if
+     ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+     conversation exceeded the max context length.
+    :vartype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
+    :ivar stop: A collection of textual sequences that will end completions generation.
+    :vartype stop: list[str]
+    :ivar tools: A list of tools the model may request to call. Currently, only functions are
+     supported as a tool. The model
+     may response with a function call request and provide the input arguments in JSON format for
+     that function.
+    :vartype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+    :ivar tool_choice: If specified, the model will configure which of the provided tools it can
+     use for the chat completions response. Is either a Union[str,
+     "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+    :vartype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+     ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+    :ivar seed: If specified, the system will make a best effort to sample deterministically such
+     that repeated requests with the
+     same seed and parameters should return the same result. Determinism is not guaranteed.
+    :vartype seed: int
+    :ivar model: ID of the specific AI model to use, if more than one model is available on the
+     endpoint.
+    :vartype model: str
+    """
+
+    messages: List["_models._models.ChatRequestMessage"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """The collection of context messages associated with this chat completions request.
+     Typical usage begins with a chat message for the System role that provides instructions for
+     the behavior of the assistant, followed by alternating messages between the User and
+     Assistant roles. Required."""
+    frequency_penalty: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A value that influences the probability of generated tokens appearing based on their cumulative
+     frequency in generated text.
+     Positive values will make tokens less likely to appear as their frequency increases and
+     decrease the likelihood of the model repeating the same statements verbatim.
+     Supported range is [-2, 2]."""
+    stream: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A value indicating whether chat completions should be streamed for this request."""
+    presence_penalty: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A value that influences the probability of generated tokens appearing based on their existing
+     presence in generated text.
+     Positive values will make tokens less likely to appear when they already exist and increase the
+     model's likelihood to output new topics.
+     Supported range is [-2, 2]."""
+    temperature: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The sampling temperature to use that controls the apparent creativity of generated completions.
+     Higher values will make output more random while lower values will make results more focused
+     and deterministic.
+     It is not recommended to modify temperature and top_p for the same completions request as the
+     interaction of these two settings is difficult to predict.
+     Supported range is [0, 1]."""
+    top_p: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """An alternative to sampling with temperature called nucleus sampling. This value causes the
+     model to consider the results of tokens with the provided probability mass. As an example, a
+     value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     considered.
+     It is not recommended to modify temperature and top_p for the same completions request as the
+     interaction of these two settings is difficult to predict.
+     Supported range is [0, 1]."""
+    max_tokens: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The maximum number of tokens to generate."""
+    response_format: Optional["_models._models.ChatCompletionsResponseFormat"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """An object specifying the format that the model must output.
+     
+     Setting to ``{ \"type\": \"json_schema\", \"json_schema\": {...} }`` enables Structured Outputs
+     which ensures the model will match your supplied JSON schema.
+     
+     Setting to ``{ \"type\": \"json_object\" }`` enables JSON mode, which ensures the message the
+     model generates is valid JSON.
+     
+     **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+     yourself via a system or user message. Without this, the model may generate an unending stream
+     of whitespace until the generation reaches the token limit, resulting in a long-running and
+     seemingly \"stuck\" request. Also note that the message content may be partially cut off if
+     ``finish_reason=\"length\"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+     conversation exceeded the max context length."""
+    stop: Optional[List[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """A collection of textual sequences that will end completions generation."""
+    tools: Optional[List["_models.ChatCompletionsToolDefinition"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """A list of tools the model may request to call. Currently, only functions are supported as a
+     tool. The model
+     may response with a function call request and provide the input arguments in JSON format for
+     that function."""
+    tool_choice: Optional[
+        Union[str, "_models.ChatCompletionsToolChoicePreset", "_models.ChatCompletionsNamedToolChoice"]
+    ] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """If specified, the model will configure which of the provided tools it can use for the chat
+     completions response. Is either a Union[str, \"_models.ChatCompletionsToolChoicePreset\"] type
+     or a ChatCompletionsNamedToolChoice type."""
+    seed: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """If specified, the system will make a best effort to sample deterministically such that repeated
+     requests with the
+     same seed and parameters should return the same result. Determinism is not guaranteed."""
+    model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """ID of the specific AI model to use, if more than one model is available on the endpoint."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        messages: List["_models._models.ChatRequestMessage"],
+        frequency_penalty: Optional[float] = None,
+        stream: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional["_models._models.ChatCompletionsResponseFormat"] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List["_models.ChatCompletionsToolDefinition"]] = None,
+        tool_choice: Optional[
+            Union[str, "_models.ChatCompletionsToolChoicePreset", "_models.ChatCompletionsNamedToolChoice"]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
 class ChatCompletionsResponseFormat(_model_base.Model):
     """Represents the format that the model must output. Use this to enable JSON mode instead of the
     default text mode.
@@ -272,7 +470,7 @@ class ChatCompletionsResponseFormat(_model_base.Model):
     """
 
     __mapping__: Dict[str, _model_base.Model] = {}
-    type: str = rest_discriminator(name="type")
+    type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])
     """The response format type to use for chat completions. Required. Default value is None."""
 
     @overload
@@ -304,7 +502,7 @@ class ChatCompletionsResponseFormatJsonObject(ChatCompletionsResponseFormat, dis
     :vartype type: str
     """
 
-    type: Literal["json_object"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["json_object"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """Response format type: always 'json_object' for this object. Required. Default value is
      \"json_object\"."""
 
@@ -337,10 +535,10 @@ class ChatCompletionsResponseFormatJsonSchema(ChatCompletionsResponseFormat, dis
     :vartype json_schema: ~azure.ai.inference.models.JsonSchemaFormat
     """
 
-    type: Literal["json_schema"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["json_schema"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The type of response format being defined: ``json_schema``. Required. Default value is
      \"json_schema\"."""
-    json_schema: "_models.JsonSchemaFormat" = rest_field()
+    json_schema: "_models.JsonSchemaFormat" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The definition of the required JSON schema in the response, and associated metadata. Required."""
 
     @overload
@@ -370,7 +568,7 @@ class ChatCompletionsResponseFormatText(ChatCompletionsResponseFormat, discrimin
     :vartype type: str
     """
 
-    type: Literal["text"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["text"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """Response format type: always 'text' for this object. Required. Default value is \"text\"."""
 
     @overload
@@ -392,6 +590,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 class ChatCompletionsToolCall(_model_base.Model):
     """A function tool call requested by the AI model.
 
+
     :ivar id: The ID of the tool call. Required.
     :vartype id: str
     :ivar type: The type of tool call. Currently, only ``function`` is supported. Required. Default
@@ -401,12 +600,12 @@ class ChatCompletionsToolCall(_model_base.Model):
     :vartype function: ~azure.ai.inference.models.FunctionCall
     """
 
-    id: str = rest_field()
+    id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The ID of the tool call. Required."""
-    type: Literal["function"] = rest_field()
+    type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The type of tool call. Currently, only ``function`` is supported. Required. Default value is
      \"function\"."""
-    function: "_models.FunctionCall" = rest_field()
+    function: "_models.FunctionCall" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The details of the function call requested by the AI model. Required."""
 
     @overload
@@ -439,10 +638,10 @@ class ChatCompletionsToolDefinition(_model_base.Model):
     :vartype function: ~azure.ai.inference.models.FunctionDefinition
     """
 
-    type: Literal["function"] = rest_field()
+    type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The type of the tool. Currently, only ``function`` is supported. Required. Default value is
      \"function\"."""
-    function: "_models.FunctionDefinition" = rest_field()
+    function: "_models.FunctionDefinition" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The function definition details for the function tool. Required."""
 
     @overload
@@ -477,7 +676,7 @@ class ChatRequestMessage(_model_base.Model):
     """
 
     __mapping__: Dict[str, _model_base.Model] = {}
-    role: str = rest_discriminator(name="role")
+    role: str = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])
     """The chat role associated with this message. Required. Known values are: \"system\", \"user\",
      \"assistant\", \"tool\", and \"developer\"."""
 
@@ -514,12 +713,14 @@ class ChatRequestAssistantMessage(ChatRequestMessage, discriminator="assistant")
     :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall]
     """
 
-    role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role")  # type: ignore
+    role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The chat role associated with this message, which is always 'assistant' for assistant messages.
      Required. The role that provides responses to system-instructed, user-prompted input."""
-    content: Optional[str] = rest_field()
+    content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The content of the message."""
-    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field()
+    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The tool calls that must be resolved and have their outputs appended to subsequent input
      messages for the chat
      completions request to resolve as configured."""
@@ -544,7 +745,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
 
 class ChatRequestDeveloperMessage(ChatRequestMessage, discriminator="developer"):
-    """A request chat message containing system instructions that influence how the model will
+    """A request chat message containing developer instructions that influence how the model will
     generate a chat completions
     response. Some AI models support a developer message instead of a system message.
 
@@ -556,11 +757,11 @@ class ChatRequestDeveloperMessage(ChatRequestMessage, discriminator="developer")
     :vartype content: str
     """
 
-    role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role")  # type: ignore
+    role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The chat role associated with this message, which is always 'developer' for developer messages.
      Required. The role that instructs or sets the behavior of the assistant. Some AI models support
      this role instead of the 'system' role."""
-    content: str = rest_field()
+    content: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The contents of the developer message. Required."""
 
     @overload
@@ -593,10 +794,10 @@ class ChatRequestSystemMessage(ChatRequestMessage, discriminator="system"):
     :vartype content: str
     """
 
-    role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role")  # type: ignore
+    role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The chat role associated with this message, which is always 'system' for system messages.
      Required. The role that instructs or sets the behavior of the assistant."""
-    content: str = rest_field()
+    content: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The contents of the system message. Required."""
 
     @overload
@@ -630,12 +831,12 @@ class ChatRequestToolMessage(ChatRequestMessage, discriminator="tool"):
     :vartype tool_call_id: str
     """
 
-    role: Literal[ChatRole.TOOL] = rest_discriminator(name="role")  # type: ignore
+    role: Literal[ChatRole.TOOL] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The chat role associated with this message, which is always 'tool' for tool messages. Required.
      The role that represents extension tool activity within a chat completions operation."""
-    content: Optional[str] = rest_field()
+    content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The content of the message."""
-    tool_call_id: str = rest_field()
+    tool_call_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The ID of the tool call resolved by the provided content. Required."""
 
     @overload
@@ -668,10 +869,12 @@ class ChatRequestUserMessage(ChatRequestMessage, discriminator="user"):
     :vartype content: str or list[~azure.ai.inference.models.ContentItem]
     """
 
-    role: Literal[ChatRole.USER] = rest_discriminator(name="role")  # type: ignore
+    role: Literal[ChatRole.USER] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The chat role associated with this message, which is always 'user' for user messages. Required.
      The role that provides input for chat completions."""
-    content: Union["str", List["_models.ContentItem"]] = rest_field()
+    content: Union["str", List["_models.ContentItem"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The contents of the user message, with available input types varying by selected model.
      Required. Is either a str type or a [ContentItem] type."""
 
@@ -708,12 +911,14 @@ class ChatResponseMessage(_model_base.Model):
     :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall]
     """
 
-    role: Union[str, "_models.ChatRole"] = rest_field()
+    role: Union[str, "_models.ChatRole"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The chat role associated with the message. Required. Known values are: \"system\", \"user\",
      \"assistant\", \"tool\", and \"developer\"."""
-    content: str = rest_field()
+    content: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The content of the message. Required."""
-    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field()
+    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The tool calls that must be resolved and have their outputs appended to subsequent input
      messages for the chat
      completions request to resolve as configured."""
@@ -755,11 +960,11 @@ class CompletionsUsage(_model_base.Model):
     :vartype total_tokens: int
     """
 
-    completion_tokens: int = rest_field()
+    completion_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The number of tokens generated across all completions emissions. Required."""
-    prompt_tokens: int = rest_field()
+    prompt_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The number of tokens in the provided prompts for the completions request. Required."""
-    total_tokens: int = rest_field()
+    total_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The total number of tokens processed for the completions request and response. Required."""
 
     @overload
@@ -795,11 +1000,11 @@ class EmbeddingItem(_model_base.Model):
     :vartype index: int
     """
 
-    embedding: Union["str", List[float]] = rest_field()
+    embedding: Union["str", List[float]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """List of embedding values for the input prompt. These represent a measurement of the
      vector-based relatedness of the provided input. Or a base64 encoded string of the embedding
      vector. Required. Is either a str type or a [float] type."""
-    index: int = rest_field()
+    index: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Index of the prompt to which the EmbeddingItem corresponds. Required."""
 
     @overload
@@ -821,6 +1026,74 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
 
+class EmbeddingsOptions(_model_base.Model):
+    """The configuration information for an embeddings request.
+
+    :ivar input: Input text to embed, encoded as a string or array of tokens.
+     To embed multiple inputs in a single request, pass an array
+     of strings or array of token arrays. Required.
+    :vartype input: list[str]
+    :ivar dimensions: Optional. The number of dimensions the resulting output embeddings should
+     have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter.
+    :vartype dimensions: int
+    :ivar encoding_format: Optional. The desired format for the returned embeddings. Known values
+     are: "base64", "binary", "float", "int8", "ubinary", and "uint8".
+    :vartype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :ivar input_type: Optional. The type of the input.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     "text", "query", and "document".
+    :vartype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :ivar model: ID of the specific AI model to use, if more than one model is available on the
+     endpoint.
+    :vartype model: str
+    """
+
+    input: List[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Input text to embed, encoded as a string or array of tokens.
+     To embed multiple inputs in a single request, pass an array
+     of strings or array of token arrays. Required."""
+    dimensions: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Optional. The number of dimensions the resulting output embeddings should have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter."""
+    encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Optional. The desired format for the returned embeddings. Known values are: \"base64\",
+     \"binary\", \"float\", \"int8\", \"ubinary\", and \"uint8\"."""
+    input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Optional. The type of the input.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     \"text\", \"query\", and \"document\"."""
+    model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """ID of the specific AI model to use, if more than one model is available on the endpoint."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        input: List[str],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = None,
+        input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = None,
+        model: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
 class EmbeddingsResult(_model_base.Model):
     """Representation of the response data from an embeddings request.
     Embeddings measure the relatedness of text strings and are commonly used for search,
@@ -838,13 +1111,13 @@ class EmbeddingsResult(_model_base.Model):
     :vartype model: str
     """
 
-    id: str = rest_field()
+    id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Unique identifier for the embeddings result. Required."""
-    data: List["_models.EmbeddingItem"] = rest_field()
+    data: List["_models.EmbeddingItem"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Embedding values for the prompts submitted in the request. Required."""
-    usage: "_models.EmbeddingsUsage" = rest_field()
+    usage: "_models.EmbeddingsUsage" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Usage counts for tokens input using the embeddings API. Required."""
-    model: str = rest_field()
+    model: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The model ID used to generate this result. Required."""
 
     @overload
@@ -880,9 +1153,9 @@ class EmbeddingsUsage(_model_base.Model):
     :vartype total_tokens: int
     """
 
-    prompt_tokens: int = rest_field()
+    prompt_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Number of tokens in the request. Required."""
-    total_tokens: int = rest_field()
+    total_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Total number of tokens transacted in this request/response. Should equal the
      number of tokens in the request. Required."""
 
@@ -919,9 +1192,9 @@ class FunctionCall(_model_base.Model):
     :vartype arguments: str
     """
 
-    name: str = rest_field()
+    name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The name of the function to call. Required."""
-    arguments: str = rest_field()
+    arguments: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The arguments to call the function with, as generated by the model in JSON format.
      Note that the model does not always generate valid JSON, and may hallucinate parameters
      not defined by your function schema. Validate the arguments in your code before calling
@@ -960,13 +1233,13 @@ class FunctionDefinition(_model_base.Model):
     :vartype parameters: any
     """
 
-    name: str = rest_field()
+    name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The name of the function to be called. Required."""
-    description: Optional[str] = rest_field()
+    description: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """A description of what the function does. The model will use this description when selecting the
      function and
      interpreting its parameters."""
-    parameters: Optional[Any] = rest_field()
+    parameters: Optional[Any] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The parameters the function accepts, described as a JSON Schema object."""
 
     @overload
@@ -1000,10 +1273,10 @@ class ImageContentItem(ContentItem, discriminator="image_url"):
     :vartype image_url: ~azure.ai.inference.models.ImageUrl
     """
 
-    type: Literal["image_url"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["image_url"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The discriminated object type: always 'image_url' for this type. Required. Default value is
      \"image_url\"."""
-    image_url: "_models.ImageUrl" = rest_field()
+    image_url: "_models.ImageUrl" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """An internet location, which must be accessible to the model,from which the image may be
      retrieved. Required."""
 
@@ -1036,10 +1309,10 @@ class ImageEmbeddingInput(_model_base.Model):
     :vartype text: str
     """
 
-    image: str = rest_field()
+    image: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The input image encoded in base64 string as a data URL. Example:
      ``data:image/{format};base64,{data}``. Required."""
-    text: Optional[str] = rest_field()
+    text: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Optional. The text input to feed into the model (like DINO, CLIP).
      Returns a 422 error if the model doesn't support the value or parameter."""
 
@@ -1062,6 +1335,77 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
 
+class ImageEmbeddingsOptions(_model_base.Model):
+    """The configuration information for an image embeddings request.
+
+    :ivar input: Input image to embed. To embed multiple inputs in a single request, pass an array.
+     The input must not exceed the max input tokens for the model. Required.
+    :vartype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+    :ivar dimensions: Optional. The number of dimensions the resulting output embeddings should
+     have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter.
+    :vartype dimensions: int
+    :ivar encoding_format: Optional. The number of dimensions the resulting output embeddings
+     should have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     "base64", "binary", "float", "int8", "ubinary", and "uint8".
+    :vartype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :ivar input_type: Optional. The type of the input.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     "text", "query", and "document".
+    :vartype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :ivar model: ID of the specific AI model to use, if more than one model is available on the
+     endpoint.
+    :vartype model: str
+    """
+
+    input: List["_models.ImageEmbeddingInput"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Input image to embed. To embed multiple inputs in a single request, pass an array.
+     The input must not exceed the max input tokens for the model. Required."""
+    dimensions: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """Optional. The number of dimensions the resulting output embeddings should have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter."""
+    encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Optional. The number of dimensions the resulting output embeddings should have.
+     Passing null causes the model to use its default value.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     \"base64\", \"binary\", \"float\", \"int8\", \"ubinary\", and \"uint8\"."""
+    input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
+    """Optional. The type of the input.
+     Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+     \"text\", \"query\", and \"document\"."""
+    model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """ID of the specific AI model to use, if more than one model is available on the endpoint."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        input: List["_models.ImageEmbeddingInput"],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = None,
+        input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = None,
+        model: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
 class ImageUrl(_model_base.Model):
     """An internet location from which the model may retrieve an image.
 
@@ -1073,9 +1417,11 @@ class ImageUrl(_model_base.Model):
     :vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
     """
 
-    url: str = rest_field()
+    url: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The URL of the image. Required."""
-    detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field()
+    detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The evaluation quality setting to use, which controls relative prioritization of speed, token
      consumption, and
      accuracy. Known values are: \"auto\", \"low\", and \"high\"."""
@@ -1109,9 +1455,11 @@ class InputAudio(_model_base.Model):
     :vartype format: str or ~azure.ai.inference.models.AudioContentFormat
     """
 
-    data: str = rest_field()
+    data: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Base64 encoded audio data. Required."""
-    format: Union[str, "_models.AudioContentFormat"] = rest_field()
+    format: Union[str, "_models.AudioContentFormat"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The audio format of the audio content. Required. Known values are: \"wav\" and \"mp3\"."""
 
     @overload
@@ -1157,17 +1505,17 @@ class JsonSchemaFormat(_model_base.Model):
     :vartype strict: bool
     """
 
-    name: str = rest_field()
+    name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
      with a maximum length of 64. Required."""
-    schema: Dict[str, Any] = rest_field()
+    schema: Dict[str, Any] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
      Note that AI models usually only support a subset of the keywords defined by JSON schema.
      Consult your AI model documentation to determine what is supported. Required."""
-    description: Optional[str] = rest_field()
+    description: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """A description of the response format, used by the AI model to determine how to generate
      responses in this format."""
-    strict: Optional[bool] = rest_field()
+    strict: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """If set to true, the service will error out if the provided JSON schema contains keywords
      not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type
      ``string``.
@@ -1210,13 +1558,13 @@ class ModelInfo(_model_base.Model):
     :vartype model_provider_name: str
     """
 
-    model_name: str = rest_field()
+    model_name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The name of the AI model. For example: ``Phi21``. Required."""
-    model_type: Union[str, "_models.ModelType"] = rest_field()
+    model_type: Union[str, "_models.ModelType"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The type of the AI model. A Unique identifier for the profile. Required. Known values are:
      \"embeddings\", \"image_generation\", \"text_generation\", \"image_embeddings\",
      \"audio_generation\", and \"chat_completion\"."""
-    model_provider_name: str = rest_field()
+    model_provider_name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The model provider name. For example: ``Microsoft Research``. Required."""
 
     @overload
@@ -1255,12 +1603,16 @@ class StreamingChatChoiceUpdate(_model_base.Model):
     :vartype delta: ~azure.ai.inference.models.StreamingChatResponseMessageUpdate
     """
 
-    index: int = rest_field()
+    index: int = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The ordered index associated with this chat completions choice. Required."""
-    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field()
+    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The reason that this chat completions choice completed its generated. Required. Known values
      are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\"."""
-    delta: "_models.StreamingChatResponseMessageUpdate" = rest_field()
+    delta: "_models.StreamingChatResponseMessageUpdate" = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """An update to the chat message for a given chat completions prompt. Required."""
 
     @overload
@@ -1310,18 +1662,22 @@ class StreamingChatCompletionsUpdate(_model_base.Model):
     :vartype usage: ~azure.ai.inference.models.CompletionsUsage
     """
 
-    id: str = rest_field()
+    id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """A unique identifier associated with this chat completions response. Required."""
-    created: datetime.datetime = rest_field(format="unix-timestamp")
+    created: datetime.datetime = rest_field(
+        visibility=["read", "create", "update", "delete", "query"], format="unix-timestamp"
+    )
     """The first timestamp associated with generation activity for this completions response,
      represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required."""
-    model: str = rest_field()
+    model: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The model used for the chat completion. Required."""
-    choices: List["_models.StreamingChatChoiceUpdate"] = rest_field()
+    choices: List["_models.StreamingChatChoiceUpdate"] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """An update to the collection of completion choices associated with this completions response.
      Generally, ``n`` choices are generated per provided prompt with a default value of 1.
      Token limits and other settings may limit the number of choices generated. Required."""
-    usage: Optional["_models.CompletionsUsage"] = rest_field()
+    usage: Optional["_models.CompletionsUsage"] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Usage information for tokens processed and generated as part of this completions operation."""
 
     @overload
@@ -1360,12 +1716,16 @@ class StreamingChatResponseMessageUpdate(_model_base.Model):
     :vartype tool_calls: list[~azure.ai.inference.models.StreamingChatResponseToolCallUpdate]
     """
 
-    role: Optional[Union[str, "_models.ChatRole"]] = rest_field()
+    role: Optional[Union[str, "_models.ChatRole"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The chat role associated with the message. If present, should always be 'assistant'. Known
      values are: \"system\", \"user\", \"assistant\", \"tool\", and \"developer\"."""
-    content: Optional[str] = rest_field()
+    content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The content of the message."""
-    tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = rest_field()
+    tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = rest_field(
+        visibility=["read", "create", "update", "delete", "query"]
+    )
     """The tool calls that must be resolved and have their outputs appended to subsequent input
      messages for the chat
      completions request to resolve as configured."""
@@ -1400,9 +1760,9 @@ class StreamingChatResponseToolCallUpdate(_model_base.Model):
     :vartype function: ~azure.ai.inference.models.FunctionCall
     """
 
-    id: str = rest_field()
+    id: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The ID of the tool call. Required."""
-    function: "_models.FunctionCall" = rest_field()
+    function: "_models.FunctionCall" = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """Updates to the function call requested by the AI model. Required."""
 
     @overload
@@ -1434,10 +1794,10 @@ class TextContentItem(ContentItem, discriminator="text"):
     :vartype text: str
     """
 
-    type: Literal["text"] = rest_discriminator(name="type")  # type: ignore
+    type: Literal["text"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
     """The discriminated object type: always 'text' for this type. Required. Default value is
      \"text\"."""
-    text: str = rest_field()
+    text: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The content of the message. Required."""
 
     @overload
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
index d682662e7b01..bc6d7a73f54f 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
index f7a0c21d8bb8..2d6400ee1e28 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
index de3c570e5c89..d1f742a1ffa6 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
index 14ad4f62b4c1..66429f2b2c00 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
index 5ea38bda6229..ad728b806214 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py b/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py
index f7937a99074a..0aeb13853f6e 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py
index b908cea2c5ae..53ccd05053e1 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
index 5035bc652d8d..974f921e982c 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py
index cfc9a4372222..b8f846541cb1 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py
index 311837814607..b975822f1ed6 100644
--- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py
+++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
index e39c2adba790..34689ac27b3f 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py
index 0bbdea862ab3..6e2c5c4ca8d1 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py
index f1c44431c523..ad78561bcc3e 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
index e1ee22f32a9c..78a2315a3384 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
index a6042089946c..3c7b0c7f8279 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py
index 6083dd8b9ba2..d229672876ce 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py
index 18c3925d0326..c148d8d72f30 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py
index 50b07d63a841..06859cff8beb 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py
index e2cba755e8aa..fa58b961e307 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py
index dfe5fd048b51..dfa1ab3eb739 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
index 923e6410565c..41a606719bb3 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
index c3d3b34a4406..62dd824a0039 100644
--- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
+++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
index eab1fb7418de..9619a2b15cc5 100644
--- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
+++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
index b347f1d285b1..051a5372cf28 100644
--- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
index dc13fc1eba4a..89f06d6da0e5 100644
--- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
+++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_client_tracing.py b/sdk/ai/azure-ai-inference/tests/test_client_tracing.py
index 997dd117cae9..8b98a5e356ae 100644
--- a/sdk/ai/azure-ai-inference/tests/test_client_tracing.py
+++ b/sdk/ai/azure-ai-inference/tests/test_client_tracing.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-lines
+# pylint: disable=too-many-lines,line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py b/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py
index 3f1c5ade0057..aff721431109 100644
--- a/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py
+++ b/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py b/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py
index 58b48c143f59..3553e2863b36 100644
--- a/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py
+++ b/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
index 0168fbeb8c01..8f3d76ce4aab 100644
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tests/test_unit_tests.py b/sdk/ai/azure-ai-inference/tests/test_unit_tests.py
index 14f1f74dcfbe..d572c32deb54 100644
--- a/sdk/ai/azure-ai-inference/tests/test_unit_tests.py
+++ b/sdk/ai/azure-ai-inference/tests/test_unit_tests.py
@@ -1,3 +1,4 @@
+# pylint: disable=line-too-long,useless-suppression
 # ------------------------------------
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml
index b107d6f6ece8..c79d02351f73 100644
--- a/sdk/ai/azure-ai-inference/tsp-location.yaml
+++ b/sdk/ai/azure-ai-inference/tsp-location.yaml
@@ -1,4 +1,4 @@
 directory: specification/ai/ModelClient
-commit: a7a977a1666ad293769bc17fb80309be390b2ba9
+commit: 91fa01cca22d82bb2823e9238650ebf70e4a83a3
 repo: Azure/azure-rest-api-specs
 additionalDirectories:

From ce8dd0f88e5120bf3bfc71b183a209eb81a93e4e Mon Sep 17 00:00:00 2001
From: Travis Angevine <trangevi@microsoft.com>
Date: Fri, 14 Mar 2025 10:43:43 -0700
Subject: [PATCH 3/4] Changes for typespec branch merge 1 (#39810)

* Regen. Test for audio url handling

Signed-off-by: trangevi <trangevi@microsoft.com>

* Test fixes and sample added

Signed-off-by: trangevi <trangevi@microsoft.com>

* regen and format tool

Signed-off-by: trangevi <trangevi@microsoft.com>

* new test recordings

Signed-off-by: trangevi <trangevi@microsoft.com>

* fix "str" from generation. Update URL

Signed-off-by: trangevi <trangevi@microsoft.com>

* code comment

Signed-off-by: trangevi <trangevi@microsoft.com>

* Update to point back at Azure tsp branch

Signed-off-by: trangevi <trangevi@microsoft.com>

* Changelog

Signed-off-by: trangevi <trangevi@microsoft.com>

* Update test asset pointer after url change

Signed-off-by: trangevi <trangevi@microsoft.com>

---------

Signed-off-by: trangevi <trangevi@microsoft.com>
---
 sdk/ai/azure-ai-inference/CHANGELOG.md        | 11 +++
 sdk/ai/azure-ai-inference/assets.json         |  2 +-
 .../azure/ai/inference/_client.py             |  6 +-
 .../azure/ai/inference/_configuration.py      | 12 +--
 .../azure/ai/inference/aio/_client.py         |  3 -
 .../azure/ai/inference/aio/_configuration.py  |  9 --
 .../azure/ai/inference/models/__init__.py     |  8 +-
 .../azure/ai/inference/models/_models.py      | 80 +++++++++++++++--
 ...sample_chat_completions_with_audio_data.py |  4 +-
 .../sample_chat_completions_with_audio_url.py | 85 +++++++++++++++++++
 .../tests/model_inference_test_base.py        | 39 ++++++++-
 .../tests/test_chat_completions_client.py     | 33 ++++++-
 .../test_chat_completions_client_async.py     | 34 +++++++-
 sdk/ai/azure-ai-inference/tsp-location.yaml   |  2 +-
 14 files changed, 280 insertions(+), 48 deletions(-)
 create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py

diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
index 958fa5d51d29..8115918449d0 100644
--- a/sdk/ai/azure-ai-inference/CHANGELOG.md
+++ b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -4,10 +4,21 @@
 
 ### Features Added
 
+* Added support for Chat Completions with audio url input. This change introduces a split between 
+audio files which are passed as base64 encoded data (previously supported) and files which are passed
+by url reference (new). See new sample `sample_chat_completions_with_audio_url.py`.
+
 ### Bugs Fixed
 
 ### Breaking Changes
 
+* `AudioContentItem` has been renamed to `AudioDataContentItem`
+
+### Other Noteable Changes
+
+* `FunctionDefinition.parameters` has been clarified in typing to be of type `Dict[str, Any]`.
+This is not a change in functionality, just a clarification of what was already expected.
+
 ## 1.0.0b9 (2025-02-14)
 
 ### Features Added
diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json
index ceebde8be0cf..2ec0f468b0b0 100644
--- a/sdk/ai/azure-ai-inference/assets.json
+++ b/sdk/ai/azure-ai-inference/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-inference",
-  "Tag": "python/ai/azure-ai-inference_3f06cee8a7"
+  "Tag": "python/ai/azure-ai-inference_473838145b"
 }
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py
index 0cde08ffa7cc..1fc2ee38dca8 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py
@@ -39,7 +39,7 @@ class ChatCompletionsClient(ChatCompletionsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
@@ -117,7 +117,7 @@ class EmbeddingsClient(EmbeddingsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
@@ -195,7 +195,7 @@ class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py
index 894ec657140f..8fc56f572a89 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py
@@ -28,7 +28,7 @@ class ChatCompletionsClientConfiguration:  # pylint: disable=too-many-instance-a
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
@@ -54,8 +54,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
@@ -85,7 +83,7 @@ class EmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attrib
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
@@ -111,8 +109,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
@@ -142,7 +138,7 @@ class ImageEmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-a
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+     ~azure.core.credentials.TokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
      behavior.
@@ -168,8 +164,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py
index 88e6773bd8f1..212904c011cf 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py
@@ -39,7 +39,6 @@ class ChatCompletionsClient(ChatCompletionsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
@@ -122,7 +121,6 @@ class EmbeddingsClient(EmbeddingsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
@@ -205,7 +203,6 @@ class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin):
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py
index f60e112599d6..4fbe724f0326 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py
@@ -28,7 +28,6 @@ class ChatCompletionsClientConfiguration:  # pylint: disable=too-many-instance-a
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
@@ -57,8 +56,6 @@ def __init__(
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
@@ -88,7 +85,6 @@ class EmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attrib
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
@@ -117,8 +113,6 @@ def __init__(
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
@@ -148,7 +142,6 @@ class ImageEmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-a
     :param credential: Credential used to authenticate requests to the service. Is either a key
      credential type or a token credential type. Required.
     :type credential: ~azure.core.credentials.AzureKeyCredential or
-     ~azure.core.credentials.AzureKeyCredential or
      ~azure.core.credentials_async.AsyncTokenCredential
     :keyword api_version: The API version to use for this operation. Default value is
      "2024-05-01-preview". Note that overriding this default value may result in unsupported
@@ -177,8 +170,6 @@ def __init__(
     def _infer_policy(self, **kwargs):
         if isinstance(self.credential, AzureKeyCredential):
             return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
-        if isinstance(self.credential, AzureKeyCredential):
-            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
         if hasattr(self.credential, "get_token"):
             return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
         raise TypeError(f"Unsupported credential: {self.credential}")
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
index 66e625705c58..5dfafa1a420d 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py
@@ -14,7 +14,8 @@
 
 
 from ._models import (  # type: ignore
-    AudioContentItem,
+    AudioDataContentItem,
+    AudioUrlContentItem,
     ChatChoice,
     ChatCompletions,
     ChatCompletionsNamedToolChoice,
@@ -33,6 +34,7 @@
     ImageEmbeddingInput,
     ImageUrl,
     InputAudio,
+    InputAudioUrl,
     JsonSchemaFormat,
     ModelInfo,
     StreamingChatChoiceUpdate,
@@ -57,7 +59,8 @@
 from ._patch import patch_sdk as _patch_sdk
 
 __all__ = [
-    "AudioContentItem",
+    "AudioDataContentItem",
+    "AudioUrlContentItem",
     "ChatChoice",
     "ChatCompletions",
     "ChatCompletionsNamedToolChoice",
@@ -76,6 +79,7 @@
     "ImageEmbeddingInput",
     "ImageUrl",
     "InputAudio",
+    "InputAudioUrl",
     "JsonSchemaFormat",
     "ModelInfo",
     "StreamingChatChoiceUpdate",
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
index 85598618489a..de1e78d089c9 100644
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
+++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py
@@ -23,7 +23,7 @@ class ContentItem(_model_base.Model):
     """An abstract representation of a structured content item within a chat message.
 
     You probably want to use the sub-classes and not this class directly. Known sub-classes are:
-    ImageContentItem, AudioContentItem, TextContentItem
+    AudioUrlContentItem, ImageContentItem, AudioDataContentItem, TextContentItem
 
     :ivar type: The discriminated object type. Required. Default value is None.
     :vartype type: str
@@ -51,13 +51,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
 
-class AudioContentItem(ContentItem, discriminator="input_audio"):
-    """A structured chat content item containing an audio content.
+class AudioDataContentItem(ContentItem, discriminator="input_audio"):
+    """A structured chat content item for audio content passed as base64 encoded data.
 
     :ivar type: The discriminated object type: always 'input_audio' for this type. Required.
      Default value is "input_audio".
     :vartype type: str
-    :ivar input_audio: The details of the input audio. Required.
+    :ivar input_audio: The details of the input audio data. Required.
     :vartype input_audio: ~azure.ai.inference.models.InputAudio
     """
 
@@ -65,7 +65,7 @@ class AudioContentItem(ContentItem, discriminator="input_audio"):
     """The discriminated object type: always 'input_audio' for this type. Required. Default value is
      \"input_audio\"."""
     input_audio: "_models.InputAudio" = rest_field(visibility=["read", "create", "update", "delete", "query"])
-    """The details of the input audio. Required."""
+    """The details of the input audio data. Required."""
 
     @overload
     def __init__(
@@ -85,6 +85,40 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, type="input_audio", **kwargs)
 
 
+class AudioUrlContentItem(ContentItem, discriminator="audio_url"):
+    """A structured chat content item for audio content passed as a url.
+
+    :ivar type: The discriminated object type: always 'audio_url' for this type. Required. Default
+     value is "audio_url".
+    :vartype type: str
+    :ivar audio_url: The details of the audio url. Required.
+    :vartype audio_url: ~azure.ai.inference.models.InputAudioUrl
+    """
+
+    type: Literal["audio_url"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"])  # type: ignore
+    """The discriminated object type: always 'audio_url' for this type. Required. Default value is
+     \"audio_url\"."""
+    audio_url: "_models.InputAudioUrl" = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The details of the audio url. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        audio_url: "_models.InputAudioUrl",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="audio_url", **kwargs)
+
+
 class ChatChoice(_model_base.Model):
     """The representation of a single prompt completion as part of an overall chat completions
     request.
@@ -1230,7 +1264,7 @@ class FunctionDefinition(_model_base.Model):
      interpreting its parameters.
     :vartype description: str
     :ivar parameters: The parameters the function accepts, described as a JSON Schema object.
-    :vartype parameters: any
+    :vartype parameters: dict[str, any]
     """
 
     name: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
@@ -1239,7 +1273,7 @@ class FunctionDefinition(_model_base.Model):
     """A description of what the function does. The model will use this description when selecting the
      function and
      interpreting its parameters."""
-    parameters: Optional[Any] = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    parameters: Optional[Dict[str, Any]] = rest_field(visibility=["read", "create", "update", "delete", "query"])
     """The parameters the function accepts, described as a JSON Schema object."""
 
     @overload
@@ -1248,7 +1282,7 @@ def __init__(
         *,
         name: str,
         description: Optional[str] = None,
-        parameters: Optional[Any] = None,
+        parameters: Optional[Dict[str, Any]] = None,
     ) -> None: ...
 
     @overload
@@ -1446,7 +1480,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
 
 
 class InputAudio(_model_base.Model):
-    """The details of an audio chat message content part.
+    """The details of the input audio data.
 
     :ivar data: Base64 encoded audio data. Required.
     :vartype data: str
@@ -1481,6 +1515,34 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
 
 
+class InputAudioUrl(_model_base.Model):
+    """The details of the audio url.
+
+    :ivar url: The URL of the audio content. Required.
+    :vartype url: str
+    """
+
+    url: str = rest_field(visibility=["read", "create", "update", "delete", "query"])
+    """The URL of the audio content. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
 class JsonSchemaFormat(_model_base.Model):
     """Defines the response format for chat completions as JSON with a given schema.
     The AI model will need to adhere to this schema when generating completions.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py
index 09c3c07a38f5..f791e55c8585 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py
@@ -37,7 +37,7 @@ def sample_chat_completions_with_audio_data():
         SystemMessage,
         UserMessage,
         TextContentItem,
-        AudioContentItem,
+        AudioDataContentItem,
         InputAudio,
         AudioContentFormat,
     )
@@ -69,7 +69,7 @@ def sample_chat_completions_with_audio_data():
             UserMessage(
                 [
                     TextContentItem(text="Please translate this audio snippet to spanish."),
-                    AudioContentItem(
+                    AudioDataContentItem(
                         input_audio=InputAudio.load(
                             audio_file="hello_how_are_you.mp3", audio_format=AudioContentFormat.MP3
                         )
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py
new file mode 100644
index 000000000000..ceae9095675e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py
@@ -0,0 +1,85 @@
+# pylint: disable=line-too-long,useless-suppression
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""
+DESCRIPTION:
+    This sample demonstrates how to get a chat completions response from
+    the service using a synchronous client. The sample shows how to use a
+    url pointer to an audio file in the input chat messages.
+    This sample will only work on AI models that support audio input.
+    Only these AI models accept the array form of `content` in the
+    `UserMessage`, as shown here.
+
+    This sample assumes the AI model is hosted on a Serverless API or
+    Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
+    the client constructor needs to be modified. See package documentation:
+    https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
+
+USAGE:
+    python sample_chat_completions_with_audio_data.py
+
+    Set these two or three environment variables before running the sample:
+    1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form
+        https://<your-deployment-name>.<your-azure-region>.models.ai.azure.com
+        where `your-deployment-name` is your unique AI Model deployment name, and
+        `your-azure-region` is the Azure region where your model is deployed.
+    2) AZURE_AI_CHAT_KEY - Your model key. Keep it secret.
+    3) AZURE_AI_CHAT_DEPLOYMENT_NAME - Optional. The value for the HTTP
+        request header `azureml-model-deployment`.
+"""
+
+
+def sample_chat_completions_with_audio_url():
+    import os
+    from azure.ai.inference import ChatCompletionsClient
+    from azure.ai.inference.models import (
+        SystemMessage,
+        UserMessage,
+        TextContentItem,
+        AudioUrlContentItem,
+        InputAudioUrl,
+    )
+    from azure.core.credentials import AzureKeyCredential
+
+    try:
+        endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"]
+        key = os.environ["AZURE_AI_CHAT_KEY"]
+    except KeyError:
+        print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'")
+        print("Set them before running this sample.")
+        exit()
+
+    try:
+        model_deployment = os.environ["AZURE_AI_CHAT_DEPLOYMENT_NAME"]
+    except KeyError:
+        print("Could not read optional environment variable `AZURE_AI_CHAT_DEPLOYMENT_NAME`.")
+        print("No specific model target will not be set.")
+        model_deployment = None
+
+    audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3"
+
+    client = ChatCompletionsClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key),
+    )
+
+    response = client.complete(
+        messages=[
+            SystemMessage("You are an AI assistant for translating and transcribing audio clips."),
+            UserMessage(
+                [
+                    TextContentItem(text="Please translate this audio snippet to spanish."),
+                    AudioUrlContentItem(audio_url=InputAudioUrl(url=audio_url)),
+                ],
+            ),
+        ],
+        model=model_deployment,
+    )
+
+    print(response.choices[0].message.content)
+
+
+if __name__ == "__main__":
+    sample_chat_completions_with_audio_url()
diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
index 9619a2b15cc5..d16ad99b5d23 100644
--- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
+++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py
@@ -92,6 +92,13 @@
     azure_ai_image_embeddings_key="00000000000000000000000000000000",
 )
 
+ServicePreparerChatCompletionsWithAudio = functools.partial(
+    EnvironmentVariableLoader,
+    "azure_ai_chat_audio",
+    azure_ai_chat_audio_endpoint="https://your-deployment-name.eastus2.models.ai.azure.com",
+    azure_ai_chat_audio_key="00000000000000000000000000000000",
+)
+
 
 # The test class name needs to start with "Test" to get collected by pytest
 class ModelClientTestBase(AzureRecordedTestCase):
@@ -266,6 +273,12 @@ def _load_image_embeddings_credentials_entra_id(self, is_async: bool = False, **
         credential = self.get_credential(sdk.ImageEmbeddingsClient, is_async=is_async)
         return endpoint, credential
 
+    def _load_phi_audio_credentials(self, bad_key: bool, **kwargs):
+        endpoint = kwargs.pop("azure_ai_chat_audio_endpoint")
+        key = "00000000000000000000000000000000" if bad_key else kwargs.pop("azure_ai_chat_audio_key")
+        credential = AzureKeyCredential(key)
+        return endpoint, credential
+
     # **********************************************************************************
     #
     #     HELPER METHODS TO CREATE CLIENTS USING THE SDK's load_client() FUNCTION
@@ -444,6 +457,30 @@ def _create_embeddings_client_with_chat_completions_credentials(self, **kwargs)
         credential = AzureKeyCredential(key)
         return sdk.EmbeddingsClient(endpoint=endpoint, credential=credential, logging_enable=LOGGING_ENABLED)
 
+    def _create_phi_audio_chat_client(self, *, bad_key: bool = False, **kwargs) -> sdk.ChatCompletionsClient:
+        (
+            endpoint,
+            credential,
+        ) = self._load_phi_audio_credentials(bad_key=bad_key, **kwargs)
+        return sdk.ChatCompletionsClient(
+            endpoint=endpoint,
+            credential=credential,
+            logging_enable=LOGGING_ENABLED,
+        )
+
+    def _create_async_phi_audio_chat_client(
+        self, *, bad_key: bool = False, **kwargs
+    ) -> async_sdk.ChatCompletionsClient:
+        (
+            endpoint,
+            credential,
+        ) = self._load_phi_audio_credentials(bad_key=bad_key, **kwargs)
+        return async_sdk.ChatCompletionsClient(
+            endpoint=endpoint,
+            credential=credential,
+            logging_enable=LOGGING_ENABLED,
+        )
+
     # **********************************************************************************
     #
     #             HELPER METHODS TO VALIDATE TEST RESULTS
@@ -532,7 +569,7 @@ def _validate_chat_completions_result(
         if is_aoai:
             assert bool(ModelClientTestBase.REGEX_AOAI_RESULT_ID.match(response.id))
         else:
-            assert bool(ModelClientTestBase.REGEX_RESULT_ID.match(response.id))
+            assert response.id
         assert response.created is not None
         assert response.created != ""
         assert response.model is not None
diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
index 051a5372cf28..f796ead90b57 100644
--- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
+++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py
@@ -11,6 +11,7 @@
     ModelClientTestBase,
     ServicePreparerChatCompletions,
     ServicePreparerAOAIChatCompletions,
+    ServicePreparerChatCompletionsWithAudio,
 )
 
 from devtools_testutils import recorded_by_proxy
@@ -559,11 +560,9 @@ def test_aoai_chat_completions_with_structured_output(self, **kwargs):
         )
         client.close()
 
-    # We use AOAI endpoint here because at the moment there is no MaaS model that supports
-    # input audio.
     @ServicePreparerAOAIChatCompletions()
     @recorded_by_proxy
-    def test_chat_completions_with_audio_input(self, **kwargs):
+    def test_chat_completions_with_audio_data_input(self, **kwargs):
         client = self._create_aoai_audio_chat_client(**kwargs)
 
         # Construct the full path to the image file
@@ -578,7 +577,7 @@ def test_chat_completions_with_audio_input(self, **kwargs):
                 sdk.models.UserMessage(
                     content=[
                         sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."),
-                        sdk.models.AudioContentItem(
+                        sdk.models.AudioDataContentItem(
                             input_audio=sdk.models.InputAudio.load(
                                 audio_file=audio_file_path, audio_format=sdk.models.AudioContentFormat.MP3
                             )
@@ -591,6 +590,32 @@ def test_chat_completions_with_audio_input(self, **kwargs):
         self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=True)
         client.close()
 
+    @ServicePreparerChatCompletionsWithAudio()
+    @recorded_by_proxy
+    def test_chat_completions_with_audio_url_input(self, **kwargs):
+        client = self._create_phi_audio_chat_client(**kwargs)
+
+        # Construct the full path to the image file
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3"
+
+        response = client.complete(
+            messages=[
+                sdk.models.SystemMessage(
+                    content="You are an AI assistant for translating and transcribing audio clips."
+                ),
+                sdk.models.UserMessage(
+                    content=[
+                        sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."),
+                        sdk.models.AudioUrlContentItem(audio_url=sdk.models.InputAudioUrl(url=audio_url)),
+                    ],
+                ),
+            ],
+        )
+        self._print_chat_completions_result(response)
+        self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=False)
+        client.close()
+
     # **********************************************************************************
     #
     #                            ERROR TESTS - CHAT COMPLETIONS
diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
index 89f06d6da0e5..25be9b738c6b 100644
--- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
+++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py
@@ -12,6 +12,7 @@
     ModelClientTestBase,
     ServicePreparerChatCompletions,
     ServicePreparerAOAIChatCompletions,
+    ServicePreparerChatCompletionsWithAudio,
 )
 
 from devtools_testutils.aio import recorded_by_proxy_async
@@ -515,11 +516,9 @@ async def test_async_aoai_chat_completions_with_structured_output(self, **kwargs
         )
         await client.close()
 
-    # We use AOAI endpoint here because at the moment there is no MaaS model that supports
-    # input audio.
     @ServicePreparerAOAIChatCompletions()
     @recorded_by_proxy_async
-    async def test_chat_completions_with_audio_input(self, **kwargs):
+    async def test_chat_completions_with_audio_data_input(self, **kwargs):
         client = self._create_async_aoai_audio_chat_client(**kwargs)
 
         # Construct the full path to the image file
@@ -534,7 +533,7 @@ async def test_chat_completions_with_audio_input(self, **kwargs):
                 sdk.models.UserMessage(
                     content=[
                         sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."),
-                        sdk.models.AudioContentItem(
+                        sdk.models.AudioDataContentItem(
                             input_audio=sdk.models.InputAudio.load(
                                 audio_file=audio_file_path, audio_format=sdk.models.AudioContentFormat.MP3
                             )
@@ -546,3 +545,30 @@ async def test_chat_completions_with_audio_input(self, **kwargs):
         self._print_chat_completions_result(response)
         self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=True)
         await client.close()
+
+    @ServicePreparerChatCompletionsWithAudio()
+    @recorded_by_proxy_async
+    async def test_chat_completions_with_audio_url_input(self, **kwargs):
+        client = self._create_async_phi_audio_chat_client(**kwargs)
+
+        # Construct the full path to the image file
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3"
+
+        response = await client.complete(
+            messages=[
+                sdk.models.SystemMessage(
+                    content="You are an AI assistant for translating and transcribing audio clips."
+                ),
+                sdk.models.UserMessage(
+                    content=[
+                        sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."),
+                        sdk.models.AudioUrlContentItem(audio_url=sdk.models.InputAudioUrl(url=audio_url)),
+                    ],
+                ),
+            ],
+            model="phi-4-multimodal-instruct-1",
+        )
+        self._print_chat_completions_result(response)
+        self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=False)
+        await client.close()
diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml
index c79d02351f73..bc1ef4b901e7 100644
--- a/sdk/ai/azure-ai-inference/tsp-location.yaml
+++ b/sdk/ai/azure-ai-inference/tsp-location.yaml
@@ -1,4 +1,4 @@
 directory: specification/ai/ModelClient
-commit: 91fa01cca22d82bb2823e9238650ebf70e4a83a3
+commit: d61efb1bdfdf159ec15e4144f41e2828513bcfa4
 repo: Azure/azure-rest-api-specs
 additionalDirectories:

From c07b4e2813ae928e104cb2439211951398887c34 Mon Sep 17 00:00:00 2001
From: David Wu <YusakuNo1@users.noreply.github.com>
Date: Thu, 1 May 2025 18:43:32 -0700
Subject: [PATCH 4/4] Prompty sample code (#40849)

* Prompty sample code

* Remove Prompty related unit tests

* Update change log and README
---
 sdk/ai/azure-ai-inference/CHANGELOG.md        |   1 +
 .../azure/ai/inference/prompts/__init__.py    |   8 -
 .../azure/ai/inference/prompts/_core.py       | 312 --------
 .../azure/ai/inference/prompts/_invoker.py    | 296 --------
 .../azure/ai/inference/prompts/_mustache.py   | 672 ------------------
 .../azure/ai/inference/prompts/_parsers.py    | 157 ----
 .../azure/ai/inference/prompts/_patch.py      | 125 ----
 .../ai/inference/prompts/_prompty_utils.py    | 416 -----------
 .../azure/ai/inference/prompts/_renderers.py  |  30 -
 .../azure/ai/inference/prompts/_tracer.py     | 316 --------
 .../azure/ai/inference/prompts/_utils.py      | 100 ---
 sdk/ai/azure-ai-inference/samples/README.md   |   3 +-
 .../samples/sample1.prompty                   |   1 +
 ...at_completions_from_input_prompt_string.py |  82 ---
 ...ple_chat_completions_with_prompty_file.py} |  36 +-
 .../azure-ai-inference/tests/sample1.prompty  |  30 -
 .../tests/sample1_with_secrets.prompty        |  34 -
 .../azure-ai-inference/tests/test_prompts.py  | 104 ---
 .../tests/test_prompts_utils.py               |  26 -
 19 files changed, 27 insertions(+), 2722 deletions(-)
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
 delete mode 100644 sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
 delete mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
 rename sdk/ai/azure-ai-inference/samples/{sample_chat_completions_from_input_prompty.py => sample_chat_completions_with_prompty_file.py} (66%)
 delete mode 100644 sdk/ai/azure-ai-inference/tests/sample1.prompty
 delete mode 100644 sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty
 delete mode 100644 sdk/ai/azure-ai-inference/tests/test_prompts.py
 delete mode 100644 sdk/ai/azure-ai-inference/tests/test_prompts_utils.py

diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
index 2a390e77d0fc..f23b8539e097 100644
--- a/sdk/ai/azure-ai-inference/CHANGELOG.md
+++ b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -13,6 +13,7 @@ by url reference (new). See new sample `sample_chat_completions_with_audio_url.p
 ### Breaking Changes
 
 * `AudioContentItem` has been renamed to `AudioDataContentItem`
+* Remove `PromptTemplate`, replace with Prompty sample code with native Prompty APIs
 
 ### Other Noteable Changes
 
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
deleted file mode 100644
index 2e11b31cb6a4..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# pylint: disable=unused-import
-from ._patch import patch_sdk as _patch_sdk, PromptTemplate
-
-_patch_sdk()
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
deleted file mode 100644
index ec6702995149..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="assignment,attr-defined,index,arg-type"
-# pylint: disable=line-too-long,R,consider-iterating-dictionary,raise-missing-from,dangerous-default-value
-from __future__ import annotations
-import os
-from dataclasses import dataclass, field, asdict
-from pathlib import Path
-from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Union
-from ._tracer import Tracer, to_dict
-from ._utils import load_json
-
-
-@dataclass
-class ToolCall:
-    id: str
-    name: str
-    arguments: str
-
-
-@dataclass
-class PropertySettings:
-    """PropertySettings class to define the properties of the model
-
-    Attributes
-    ----------
-    type : str
-        The type of the property
-    default : Any
-        The default value of the property
-    description : str
-        The description of the property
-    """
-
-    type: Literal["string", "number", "array", "object", "boolean"]
-    default: Union[str, int, float, List, Dict, bool, None] = field(default=None)
-    description: str = field(default="")
-
-
-@dataclass
-class ModelSettings:
-    """ModelSettings class to define the model of the prompty
-
-    Attributes
-    ----------
-    api : str
-        The api of the model
-    configuration : Dict
-        The configuration of the model
-    parameters : Dict
-        The parameters of the model
-    response : Dict
-        The response of the model
-    """
-
-    api: str = field(default="")
-    configuration: Dict = field(default_factory=dict)
-    parameters: Dict = field(default_factory=dict)
-    response: Dict = field(default_factory=dict)
-
-
-@dataclass
-class TemplateSettings:
-    """TemplateSettings class to define the template of the prompty
-
-    Attributes
-    ----------
-    type : str
-        The type of the template
-    parser : str
-        The parser of the template
-    """
-
-    type: str = field(default="mustache")
-    parser: str = field(default="")
-
-
-@dataclass
-class Prompty:
-    """Prompty class to define the prompty
-
-    Attributes
-    ----------
-    name : str
-        The name of the prompty
-    description : str
-        The description of the prompty
-    authors : List[str]
-        The authors of the prompty
-    tags : List[str]
-        The tags of the prompty
-    version : str
-        The version of the prompty
-    base : str
-        The base of the prompty
-    basePrompty : Prompty
-        The base prompty
-    model : ModelSettings
-        The model of the prompty
-    sample : Dict
-        The sample of the prompty
-    inputs : Dict[str, PropertySettings]
-        The inputs of the prompty
-    outputs : Dict[str, PropertySettings]
-        The outputs of the prompty
-    template : TemplateSettings
-        The template of the prompty
-    file : FilePath
-        The file of the prompty
-    content : Union[str, List[str], Dict]
-        The content of the prompty
-    """
-
-    # metadata
-    name: str = field(default="")
-    description: str = field(default="")
-    authors: List[str] = field(default_factory=list)
-    tags: List[str] = field(default_factory=list)
-    version: str = field(default="")
-    base: str = field(default="")
-    basePrompty: Union[Prompty, None] = field(default=None)
-    # model
-    model: ModelSettings = field(default_factory=ModelSettings)
-
-    # sample
-    sample: Dict = field(default_factory=dict)
-
-    # input / output
-    inputs: Dict[str, PropertySettings] = field(default_factory=dict)
-    outputs: Dict[str, PropertySettings] = field(default_factory=dict)
-
-    # template
-    template: TemplateSettings = field(default_factory=TemplateSettings)
-
-    file: Union[Path, str] = field(default="")
-    content: Union[str, List[str], Dict] = field(default="")
-
-    def to_safe_dict(self) -> Dict[str, Any]:
-        d = {}
-        if self.model:
-            d["model"] = asdict(self.model)
-            _mask_secrets(d, ["model", "configuration"])
-        if self.template:
-            d["template"] = asdict(self.template)
-        if self.inputs:
-            d["inputs"] = {k: asdict(v) for k, v in self.inputs.items()}
-        if self.outputs:
-            d["outputs"] = {k: asdict(v) for k, v in self.outputs.items()}
-        if self.file:
-            d["file"] = str(self.file.as_posix()) if isinstance(self.file, Path) else self.file
-        return d
-
-    @staticmethod
-    def hoist_base_prompty(top: Prompty, base: Prompty) -> Prompty:
-        top.name = base.name if top.name == "" else top.name
-        top.description = base.description if top.description == "" else top.description
-        top.authors = list(set(base.authors + top.authors))
-        top.tags = list(set(base.tags + top.tags))
-        top.version = base.version if top.version == "" else top.version
-
-        top.model.api = base.model.api if top.model.api == "" else top.model.api
-        top.model.configuration = param_hoisting(top.model.configuration, base.model.configuration)
-        top.model.parameters = param_hoisting(top.model.parameters, base.model.parameters)
-        top.model.response = param_hoisting(top.model.response, base.model.response)
-
-        top.sample = param_hoisting(top.sample, base.sample)
-
-        top.basePrompty = base
-
-        return top
-
-    @staticmethod
-    def _process_file(file: str, parent: Path) -> Any:
-        file_path = Path(parent / Path(file)).resolve().absolute()
-        if file_path.exists():
-            items = load_json(file_path)
-            if isinstance(items, list):
-                return [Prompty.normalize(value, parent) for value in items]
-            elif isinstance(items, Dict):
-                return {key: Prompty.normalize(value, parent) for key, value in items.items()}
-            else:
-                return items
-        else:
-            raise FileNotFoundError(f"File {file} not found")
-
-    @staticmethod
-    def _process_env(variable: str, env_error=True, default: Union[str, None] = None) -> Any:
-        if variable in os.environ.keys():
-            return os.environ[variable]
-        else:
-            if default:
-                return default
-            if env_error:
-                raise ValueError(f"Variable {variable} not found in environment")
-
-            return ""
-
-    @staticmethod
-    def normalize(attribute: Any, parent: Path, env_error=True) -> Any:
-        if isinstance(attribute, str):
-            attribute = attribute.strip()
-            if attribute.startswith("${") and attribute.endswith("}"):
-                # check if env or file
-                variable = attribute[2:-1].split(":")
-                if variable[0] == "env" and len(variable) > 1:
-                    return Prompty._process_env(
-                        variable[1],
-                        env_error,
-                        variable[2] if len(variable) > 2 else None,
-                    )
-                elif variable[0] == "file" and len(variable) > 1:
-                    return Prompty._process_file(variable[1], parent)
-                else:
-                    raise ValueError(f"Invalid attribute format ({attribute})")
-            else:
-                return attribute
-        elif isinstance(attribute, list):
-            return [Prompty.normalize(value, parent) for value in attribute]
-        elif isinstance(attribute, Dict):
-            return {key: Prompty.normalize(value, parent) for key, value in attribute.items()}
-        else:
-            return attribute
-
-
-def param_hoisting(top: Dict[str, Any], bottom: Dict[str, Any], top_key: Union[str, None] = None) -> Dict[str, Any]:
-    if top_key:
-        new_dict = {**top[top_key]} if top_key in top else {}
-    else:
-        new_dict = {**top}
-    for key, value in bottom.items():
-        if not key in new_dict:
-            new_dict[key] = value
-    return new_dict
-
-
-class PromptyStream(Iterator):
-    """PromptyStream class to iterate over LLM stream.
-    Necessary for Prompty to handle streaming data when tracing."""
-
-    def __init__(self, name: str, iterator: Iterator):
-        self.name = name
-        self.iterator = iterator
-        self.items: List[Any] = []
-        self.__name__ = "PromptyStream"
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        try:
-            # enumerate but add to list
-            o = self.iterator.__next__()
-            self.items.append(o)
-            return o
-
-        except StopIteration:
-            # StopIteration is raised
-            # contents are exhausted
-            if len(self.items) > 0:
-                with Tracer.start("PromptyStream") as trace:
-                    trace("signature", f"{self.name}.PromptyStream")
-                    trace("inputs", "None")
-                    trace("result", [to_dict(s) for s in self.items])
-
-            raise StopIteration
-
-
-class AsyncPromptyStream(AsyncIterator):
-    """AsyncPromptyStream class to iterate over LLM stream.
-    Necessary for Prompty to handle streaming data when tracing."""
-
-    def __init__(self, name: str, iterator: AsyncIterator):
-        self.name = name
-        self.iterator = iterator
-        self.items: List[Any] = []
-        self.__name__ = "AsyncPromptyStream"
-
-    def __aiter__(self):
-        return self
-
-    async def __anext__(self):
-        try:
-            # enumerate but add to list
-            o = await self.iterator.__anext__()
-            self.items.append(o)
-            return o
-
-        except StopAsyncIteration:
-            # StopIteration is raised
-            # contents are exhausted
-            if len(self.items) > 0:
-                with Tracer.start("AsyncPromptyStream") as trace:
-                    trace("signature", f"{self.name}.AsyncPromptyStream")
-                    trace("inputs", "None")
-                    trace("result", [to_dict(s) for s in self.items])
-
-            raise StopAsyncIteration
-
-
-def _mask_secrets(d: Dict[str, Any], path: list[str], patterns: list[str] = ["key", "secret"]) -> bool:
-    sub_d = d
-    for key in path:
-        if key not in sub_d:
-            return False
-        sub_d = sub_d[key]
-
-    for k, v in sub_d.items():
-        if any([pattern in k.lower() for pattern in patterns]):
-            sub_d[k] = "*" * len(v)
-    return True
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
deleted file mode 100644
index bc6d7a73f54f..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="return-value,operator"
-# pylint: disable=line-too-long,R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,unnecessary-pass
-import abc
-from typing import Any, Callable, Dict, Literal
-from ._tracer import trace
-from ._core import Prompty
-
-
-class Invoker(abc.ABC):
-    """Abstract class for Invoker
-
-    Attributes
-    ----------
-    prompty : Prompty
-        The prompty object
-    name : str
-        The name of the invoker
-
-    """
-
-    def __init__(self, prompty: Prompty) -> None:
-        self.prompty = prompty
-        self.name = self.__class__.__name__
-
-    @abc.abstractmethod
-    def invoke(self, data: Any) -> Any:
-        """Abstract method to invoke the invoker
-
-        Parameters
-        ----------
-        data : Any
-            The data to be invoked
-
-        Returns
-        -------
-        Any
-            The invoked
-        """
-        pass
-
-    @abc.abstractmethod
-    async def invoke_async(self, data: Any) -> Any:
-        """Abstract method to invoke the invoker asynchronously
-
-        Parameters
-        ----------
-        data : Any
-            The data to be invoked
-
-        Returns
-        -------
-        Any
-            The invoked
-        """
-        pass
-
-    @trace
-    def run(self, data: Any) -> Any:
-        """Method to run the invoker
-
-        Parameters
-        ----------
-        data : Any
-            The data to be invoked
-
-        Returns
-        -------
-        Any
-            The invoked
-        """
-        return self.invoke(data)
-
-    @trace
-    async def run_async(self, data: Any) -> Any:
-        """Method to run the invoker asynchronously
-
-        Parameters
-        ----------
-        data : Any
-            The data to be invoked
-
-        Returns
-        -------
-        Any
-            The invoked
-        """
-        return await self.invoke_async(data)
-
-
-class InvokerFactory:
-    """Factory class for Invoker"""
-
-    _renderers: Dict[str, Invoker] = {}
-    _parsers: Dict[str, Invoker] = {}
-    _executors: Dict[str, Invoker] = {}
-    _processors: Dict[str, Invoker] = {}
-
-    @classmethod
-    def add_renderer(cls, name: str, invoker: Invoker) -> None:
-        cls._renderers[name] = invoker
-
-    @classmethod
-    def add_parser(cls, name: str, invoker: Invoker) -> None:
-        cls._parsers[name] = invoker
-
-    @classmethod
-    def add_executor(cls, name: str, invoker: Invoker) -> None:
-        cls._executors[name] = invoker
-
-    @classmethod
-    def add_processor(cls, name: str, invoker: Invoker) -> None:
-        cls._processors[name] = invoker
-
-    @classmethod
-    def register_renderer(cls, name: str) -> Callable:
-        def inner_wrapper(wrapped_class: Invoker) -> Callable:
-            cls._renderers[name] = wrapped_class
-            return wrapped_class  # type: ignore
-
-        return inner_wrapper
-
-    @classmethod
-    def register_parser(cls, name: str) -> Callable:
-        def inner_wrapper(wrapped_class: Invoker) -> Callable:
-            cls._parsers[name] = wrapped_class
-            return wrapped_class  # type: ignore
-
-        return inner_wrapper
-
-    @classmethod
-    def register_executor(cls, name: str) -> Callable:
-        def inner_wrapper(wrapped_class: Invoker) -> Callable:
-            cls._executors[name] = wrapped_class
-            return wrapped_class  # type: ignore
-
-        return inner_wrapper
-
-    @classmethod
-    def register_processor(cls, name: str) -> Callable:
-        def inner_wrapper(wrapped_class: Invoker) -> Callable:
-            cls._processors[name] = wrapped_class
-            return wrapped_class  # type: ignore
-
-        return inner_wrapper
-
-    @classmethod
-    def _get_name(
-        cls,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        prompty: Prompty,
-    ) -> str:
-        if type == "renderer":
-            return prompty.template.type
-        elif type == "parser":
-            return f"{prompty.template.parser}.{prompty.model.api}"
-        elif type == "executor":
-            return prompty.model.configuration["type"]
-        elif type == "processor":
-            return prompty.model.configuration["type"]
-        else:
-            raise ValueError(f"Type {type} not found")
-
-    @classmethod
-    def _get_invoker(
-        cls,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        prompty: Prompty,
-    ) -> Invoker:
-        if type == "renderer":
-            name = prompty.template.type
-            if name not in cls._renderers:
-                raise ValueError(f"Renderer {name} not found")
-
-            return cls._renderers[name](prompty)  # type: ignore
-
-        elif type == "parser":
-            name = f"{prompty.template.parser}.{prompty.model.api}"
-            if name not in cls._parsers:
-                raise ValueError(f"Parser {name} not found")
-
-            return cls._parsers[name](prompty)  # type: ignore
-
-        elif type == "executor":
-            name = prompty.model.configuration["type"]
-            if name not in cls._executors:
-                raise ValueError(f"Executor {name} not found")
-
-            return cls._executors[name](prompty)  # type: ignore
-
-        elif type == "processor":
-            name = prompty.model.configuration["type"]
-            if name not in cls._processors:
-                raise ValueError(f"Processor {name} not found")
-
-            return cls._processors[name](prompty)  # type: ignore
-
-        else:
-            raise ValueError(f"Type {type} not found")
-
-    @classmethod
-    def run(
-        cls,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        prompty: Prompty,
-        data: Any,
-        default: Any = None,
-    ):
-        name = cls._get_name(type, prompty)
-        if name.startswith("NOOP") and default is not None:
-            return default
-        elif name.startswith("NOOP"):
-            return data
-
-        invoker = cls._get_invoker(type, prompty)
-        value = invoker.run(data)
-        return value
-
-    @classmethod
-    async def run_async(
-        cls,
-        type: Literal["renderer", "parser", "executor", "processor"],
-        prompty: Prompty,
-        data: Any,
-        default: Any = None,
-    ):
-        name = cls._get_name(type, prompty)
-        if name.startswith("NOOP") and default is not None:
-            return default
-        elif name.startswith("NOOP"):
-            return data
-        invoker = cls._get_invoker(type, prompty)
-        value = await invoker.run_async(data)
-        return value
-
-    @classmethod
-    def run_renderer(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return cls.run("renderer", prompty, data, default)
-
-    @classmethod
-    async def run_renderer_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return await cls.run_async("renderer", prompty, data, default)
-
-    @classmethod
-    def run_parser(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return cls.run("parser", prompty, data, default)
-
-    @classmethod
-    async def run_parser_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return await cls.run_async("parser", prompty, data, default)
-
-    @classmethod
-    def run_executor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return cls.run("executor", prompty, data, default)
-
-    @classmethod
-    async def run_executor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return await cls.run_async("executor", prompty, data, default)
-
-    @classmethod
-    def run_processor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return cls.run("processor", prompty, data, default)
-
-    @classmethod
-    async def run_processor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
-        return await cls.run_async("processor", prompty, data, default)
-
-
-class InvokerException(Exception):
-    """Exception class for Invoker"""
-
-    def __init__(self, message: str, type: str) -> None:
-        super().__init__(message)
-        self.type = type
-
-    def __str__(self) -> str:
-        return f"{super().__str__()}. Make sure to pip install any necessary package extras (i.e. could be something like `pip install prompty[{self.type}]`) for {self.type} as well as import the appropriate invokers (i.e. could be something like `import prompty.{self.type}`)."
-
-
-@InvokerFactory.register_renderer("NOOP")
-@InvokerFactory.register_parser("NOOP")
-@InvokerFactory.register_executor("NOOP")
-@InvokerFactory.register_processor("NOOP")
-@InvokerFactory.register_parser("prompty.embedding")
-@InvokerFactory.register_parser("prompty.image")
-@InvokerFactory.register_parser("prompty.completion")
-class NoOp(Invoker):
-    def invoke(self, data: Any) -> Any:
-        return data
-
-    async def invoke_async(self, data: str) -> Any:
-        return self.invoke(data)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
deleted file mode 100644
index 2d6400ee1e28..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py
+++ /dev/null
@@ -1,672 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# pylint: disable=line-too-long,R,consider-using-dict-items,docstring-missing-return,docstring-missing-rtype,docstring-missing-param,global-statement,unused-argument,global-variable-not-assigned,protected-access,logging-fstring-interpolation,deprecated-method
-from __future__ import annotations
-import logging
-from collections.abc import Iterator, Sequence
-from types import MappingProxyType
-from typing import (
-    Any,
-    Dict,
-    List,
-    Literal,
-    Mapping,
-    Optional,
-    Union,
-    cast,
-)
-from typing_extensions import TypeAlias
-
-logger = logging.getLogger(__name__)
-
-
-Scopes: TypeAlias = List[Union[Literal[False, 0], Mapping[str, Any]]]
-
-
-# Globals
-_CURRENT_LINE = 1
-_LAST_TAG_LINE = None
-
-
-class ChevronError(SyntaxError):
-    """Custom exception for Chevron errors."""
-
-
-#
-# Helper functions
-#
-
-
-def grab_literal(template: str, l_del: str) -> tuple[str, str]:
-    """Parse a literal from the template.
-
-    Args:
-        template: The template to parse.
-        l_del: The left delimiter.
-
-    Returns:
-        Tuple[str, str]: The literal and the template.
-    """
-
-    global _CURRENT_LINE
-
-    try:
-        # Look for the next tag and move the template to it
-        literal, template = template.split(l_del, 1)
-        _CURRENT_LINE += literal.count("\n")
-        return (literal, template)
-
-    # There are no more tags in the template?
-    except ValueError:
-        # Then the rest of the template is a literal
-        return (template, "")
-
-
-def l_sa_check(template: str, literal: str, is_standalone: bool) -> bool:
-    """Do a preliminary check to see if a tag could be a standalone.
-
-    Args:
-        template: The template. (Not used.)
-        literal: The literal.
-        is_standalone: Whether the tag is standalone.
-
-    Returns:
-        bool: Whether the tag could be a standalone.
-    """
-
-    # If there is a newline, or the previous tag was a standalone
-    if literal.find("\n") != -1 or is_standalone:
-        padding = literal.split("\n")[-1]
-
-        # If all the characters since the last newline are spaces
-        # Then the next tag could be a standalone
-        # Otherwise it can't be
-        return padding.isspace() or padding == ""
-    else:
-        return False
-
-
-def r_sa_check(template: str, tag_type: str, is_standalone: bool) -> bool:
-    """Do a final check to see if a tag could be a standalone.
-
-    Args:
-        template: The template.
-        tag_type: The type of the tag.
-        is_standalone: Whether the tag is standalone.
-
-    Returns:
-        bool: Whether the tag could be a standalone.
-    """
-
-    # Check right side if we might be a standalone
-    if is_standalone and tag_type not in ["variable", "no escape"]:
-        on_newline = template.split("\n", 1)
-
-        # If the stuff to the right of us are spaces we're a standalone
-        return on_newline[0].isspace() or not on_newline[0]
-
-    # If we're a tag can't be a standalone
-    else:
-        return False
-
-
-def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]:
-    """Parse a tag from a template.
-
-    Args:
-        template: The template.
-        l_del: The left delimiter.
-        r_del: The right delimiter.
-
-    Returns:
-        Tuple[Tuple[str, str], str]: The tag and the template.
-
-    Raises:
-        ChevronError: If the tag is unclosed.
-        ChevronError: If the set delimiter tag is unclosed.
-    """
-    global _CURRENT_LINE
-    global _LAST_TAG_LINE
-
-    tag_types = {
-        "!": "comment",
-        "#": "section",
-        "^": "inverted section",
-        "/": "end",
-        ">": "partial",
-        "=": "set delimiter?",
-        "{": "no escape?",
-        "&": "no escape",
-    }
-
-    # Get the tag
-    try:
-        tag, template = template.split(r_del, 1)
-    except ValueError as e:
-        msg = "unclosed tag " f"at line {_CURRENT_LINE}"
-        raise ChevronError(msg) from e
-
-    # Find the type meaning of the first character
-    tag_type = tag_types.get(tag[0], "variable")
-
-    # If the type is not a variable
-    if tag_type != "variable":
-        # Then that first character is not needed
-        tag = tag[1:]
-
-    # If we might be a set delimiter tag
-    if tag_type == "set delimiter?":
-        # Double check to make sure we are
-        if tag.endswith("="):
-            tag_type = "set delimiter"
-            # Remove the equal sign
-            tag = tag[:-1]
-
-        # Otherwise we should complain
-        else:
-            msg = "unclosed set delimiter tag\n" f"at line {_CURRENT_LINE}"
-            raise ChevronError(msg)
-
-    elif (
-        # If we might be a no html escape tag
-        tag_type == "no escape?"
-        # And we have a third curly brace
-        # (And are using curly braces as delimiters)
-        and l_del == "{{"
-        and r_del == "}}"
-        and template.startswith("}")
-    ):
-        # Then we are a no html escape tag
-        template = template[1:]
-        tag_type = "no escape"
-
-    # Strip the whitespace off the key and return
-    return ((tag_type, tag.strip()), template)
-
-
-#
-# The main tokenizing function
-#
-
-
-def tokenize(template: str, def_ldel: str = "{{", def_rdel: str = "}}") -> Iterator[tuple[str, str]]:
-    """Tokenize a mustache template.
-
-    Tokenizes a mustache template in a generator fashion,
-    using file-like objects. It also accepts a string containing
-    the template.
-
-
-    Arguments:
-
-    template -- a file-like object, or a string of a mustache template
-
-    def_ldel -- The default left delimiter
-                ("{{" by default, as in spec compliant mustache)
-
-    def_rdel -- The default right delimiter
-                ("}}" by default, as in spec compliant mustache)
-
-
-    Returns:
-
-    A generator of mustache tags in the form of a tuple
-
-    -- (tag_type, tag_key)
-
-    Where tag_type is one of:
-     * literal
-     * section
-     * inverted section
-     * end
-     * partial
-     * no escape
-
-    And tag_key is either the key or in the case of a literal tag,
-    the literal itself.
-    """
-
-    global _CURRENT_LINE, _LAST_TAG_LINE
-    _CURRENT_LINE = 1
-    _LAST_TAG_LINE = None
-
-    is_standalone = True
-    open_sections = []
-    l_del = def_ldel
-    r_del = def_rdel
-
-    while template:
-        literal, template = grab_literal(template, l_del)
-
-        # If the template is completed
-        if not template:
-            # Then yield the literal and leave
-            yield ("literal", literal)
-            break
-
-        # Do the first check to see if we could be a standalone
-        is_standalone = l_sa_check(template, literal, is_standalone)
-
-        # Parse the tag
-        tag, template = parse_tag(template, l_del, r_del)
-        tag_type, tag_key = tag
-
-        # Special tag logic
-
-        # If we are a set delimiter tag
-        if tag_type == "set delimiter":
-            # Then get and set the delimiters
-            dels = tag_key.strip().split(" ")
-            l_del, r_del = dels[0], dels[-1]
-
-        # If we are a section tag
-        elif tag_type in ["section", "inverted section"]:
-            # Then open a new section
-            open_sections.append(tag_key)
-            _LAST_TAG_LINE = _CURRENT_LINE
-
-        # If we are an end tag
-        elif tag_type == "end":
-            # Then check to see if the last opened section
-            # is the same as us
-            try:
-                last_section = open_sections.pop()
-            except IndexError as e:
-                msg = f'Trying to close tag "{tag_key}"\n' "Looks like it was not opened.\n" f"line {_CURRENT_LINE + 1}"
-                raise ChevronError(msg) from e
-            if tag_key != last_section:
-                # Otherwise we need to complain
-                msg = (
-                    f'Trying to close tag "{tag_key}"\n'
-                    f'last open tag is "{last_section}"\n'
-                    f"line {_CURRENT_LINE + 1}"
-                )
-                raise ChevronError(msg)
-
-        # Do the second check to see if we're a standalone
-        is_standalone = r_sa_check(template, tag_type, is_standalone)
-
-        # Which if we are
-        if is_standalone:
-            # Remove the stuff before the newline
-            template = template.split("\n", 1)[-1]
-
-            # Partials need to keep the spaces on their left
-            if tag_type != "partial":
-                # But other tags don't
-                literal = literal.rstrip(" ")
-
-        # Start yielding
-        # Ignore literals that are empty
-        if literal != "":
-            yield ("literal", literal)
-
-        # Ignore comments and set delimiters
-        if tag_type not in ["comment", "set delimiter?"]:
-            yield (tag_type, tag_key)
-
-    # If there are any open sections when we're done
-    if open_sections:
-        # Then we need to complain
-        msg = (
-            "Unexpected EOF\n"
-            f'the tag "{open_sections[-1]}" was never closed\n'
-            f"was opened at line {_LAST_TAG_LINE}"
-        )
-        raise ChevronError(msg)
-
-
-#
-# Helper functions
-#
-
-
-def _html_escape(string: str) -> str:
-    """HTML escape all of these " & < >"""
-
-    html_codes = {
-        '"': "&quot;",
-        "<": "&lt;",
-        ">": "&gt;",
-    }
-
-    # & must be handled first
-    string = string.replace("&", "&amp;")
-    for char in html_codes:
-        string = string.replace(char, html_codes[char])
-    return string
-
-
-def _get_key(
-    key: str,
-    scopes: Scopes,
-    warn: bool,
-    keep: bool,
-    def_ldel: str,
-    def_rdel: str,
-) -> Any:
-    """Get a key from the current scope"""
-
-    # If the key is a dot
-    if key == ".":
-        # Then just return the current scope
-        return scopes[0]
-
-    # Loop through the scopes
-    for scope in scopes:
-        try:
-            # Return an empty string if falsy, with two exceptions
-            # 0 should return 0, and False should return False
-            if scope in (0, False):
-                return scope
-
-            # For every dot separated key
-            for child in key.split("."):
-                # Return an empty string if falsy, with two exceptions
-                # 0 should return 0, and False should return False
-                if scope in (0, False):
-                    return scope
-                # Move into the scope
-                try:
-                    # Try subscripting (Normal dictionaries)
-                    scope = cast(Dict[str, Any], scope)[child]
-                except (TypeError, AttributeError):
-                    try:
-                        scope = getattr(scope, child)
-                    except (TypeError, AttributeError):
-                        # Try as a list
-                        scope = scope[int(child)]  # type: ignore
-
-            try:
-                # This allows for custom falsy data types
-                # https://github.com/noahmorrison/chevron/issues/35
-                if scope._CHEVRON_return_scope_when_falsy:  # type: ignore
-                    return scope
-            except AttributeError:
-                if scope in (0, False):
-                    return scope
-                return scope or ""
-        except (AttributeError, KeyError, IndexError, ValueError):
-            # We couldn't find the key in the current scope
-            # We'll try again on the next pass
-            pass
-
-    # We couldn't find the key in any of the scopes
-
-    if warn:
-        logger.warn(f"Could not find key '{key}'")
-
-    if keep:
-        return f"{def_ldel} {key} {def_rdel}"
-
-    return ""
-
-
-def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
-    """Load a partial"""
-    try:
-        # Maybe the partial is in the dictionary
-        return partials_dict[name]
-    except KeyError:
-        return ""
-
-
-#
-# The main rendering function
-#
-g_token_cache: Dict[str, List[tuple[str, str]]] = {}
-
-EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
-
-
-def render(
-    template: Union[str, List[tuple[str, str]]] = "",
-    data: Mapping[str, Any] = EMPTY_DICT,
-    partials_dict: Mapping[str, str] = EMPTY_DICT,
-    padding: str = "",
-    def_ldel: str = "{{",
-    def_rdel: str = "}}",
-    scopes: Optional[Scopes] = None,
-    warn: bool = False,
-    keep: bool = False,
-) -> str:
-    """Render a mustache template.
-
-    Renders a mustache template with a data scope and inline partial capability.
-
-    Arguments:
-
-    template      -- A file-like object or a string containing the template.
-
-    data          -- A python dictionary with your data scope.
-
-    partials_path -- The path to where your partials are stored.
-                     If set to None, then partials won't be loaded from the file system
-                     (defaults to '.').
-
-    partials_ext  -- The extension that you want the parser to look for
-                     (defaults to 'mustache').
-
-    partials_dict -- A python dictionary which will be search for partials
-                     before the filesystem is. {'include': 'foo'} is the same
-                     as a file called include.mustache
-                     (defaults to {}).
-
-    padding       -- This is for padding partials, and shouldn't be used
-                     (but can be if you really want to).
-
-    def_ldel      -- The default left delimiter
-                     ("{{" by default, as in spec compliant mustache).
-
-    def_rdel      -- The default right delimiter
-                     ("}}" by default, as in spec compliant mustache).
-
-    scopes        -- The list of scopes that get_key will look through.
-
-    warn          -- Log a warning when a template substitution isn't found in the data
-
-    keep          -- Keep unreplaced tags when a substitution isn't found in the data.
-
-
-    Returns:
-
-    A string containing the rendered template.
-    """
-
-    # If the template is a sequence but not derived from a string
-    if isinstance(template, Sequence) and not isinstance(template, str):
-        # Then we don't need to tokenize it
-        # But it does need to be a generator
-        tokens: Iterator[tuple[str, str]] = (token for token in template)
-    else:
-        if template in g_token_cache:
-            tokens = (token for token in g_token_cache[template])
-        else:
-            # Otherwise make a generator
-            tokens = tokenize(template, def_ldel, def_rdel)
-
-    output = ""
-
-    if scopes is None:
-        scopes = [data]
-
-    # Run through the tokens
-    for tag, key in tokens:
-        # Set the current scope
-        current_scope = scopes[0]
-
-        # If we're an end tag
-        if tag == "end":
-            # Pop out of the latest scope
-            del scopes[0]
-
-        # If the current scope is falsy and not the only scope
-        elif not current_scope and len(scopes) != 1:
-            if tag in ["section", "inverted section"]:
-                # Set the most recent scope to a falsy value
-                scopes.insert(0, False)
-
-        # If we're a literal tag
-        elif tag == "literal":
-            # Add padding to the key and add it to the output
-            output += key.replace("\n", "\n" + padding)
-
-        # If we're a variable tag
-        elif tag == "variable":
-            # Add the html escaped key to the output
-            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
-            if thing is True and key == ".":
-                # if we've coerced into a boolean by accident
-                # (inverted tags do this)
-                # then get the un-coerced object (next in the stack)
-                thing = scopes[1]
-            if not isinstance(thing, str):
-                thing = str(thing)
-            output += _html_escape(thing)
-
-        # If we're a no html escape tag
-        elif tag == "no escape":
-            # Just lookup the key and add it
-            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
-            if not isinstance(thing, str):
-                thing = str(thing)
-            output += thing
-
-        # If we're a section tag
-        elif tag == "section":
-            # Get the sections scope
-            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
-
-            # If the scope is a callable (as described in
-            # https://mustache.github.io/mustache.5.html)
-            if callable(scope):
-                # Generate template text from tags
-                text = ""
-                tags: List[tuple[str, str]] = []
-                for token in tokens:
-                    if token == ("end", key):
-                        break
-
-                    tags.append(token)
-                    tag_type, tag_key = token
-                    if tag_type == "literal":
-                        text += tag_key
-                    elif tag_type == "no escape":
-                        text += f"{def_ldel}& {tag_key} {def_rdel}"
-                    else:
-                        text += "{}{} {}{}".format(
-                            def_ldel,
-                            {
-                                "comment": "!",
-                                "section": "#",
-                                "inverted section": "^",
-                                "end": "/",
-                                "partial": ">",
-                                "set delimiter": "=",
-                                "no escape": "&",
-                                "variable": "",
-                            }[tag_type],
-                            tag_key,
-                            def_rdel,
-                        )
-
-                g_token_cache[text] = tags
-
-                rend = scope(
-                    text,
-                    lambda template, data=None: render(
-                        template,
-                        data={},
-                        partials_dict=partials_dict,
-                        padding=padding,
-                        def_ldel=def_ldel,
-                        def_rdel=def_rdel,
-                        scopes=data and [data] + scopes or scopes,
-                        warn=warn,
-                        keep=keep,
-                    ),
-                )
-
-                output += rend  # type: ignore[reportOperatorIssue]
-
-            # If the scope is a sequence, an iterator or generator but not
-            # derived from a string
-            elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str):
-                # Then we need to do some looping
-
-                # Gather up all the tags inside the section
-                # (And don't be tricked by nested end tags with the same key)
-                # TODO: This feels like it still has edge cases, no?
-                tags = []
-                tags_with_same_key = 0
-                for token in tokens:
-                    if token == ("section", key):
-                        tags_with_same_key += 1
-                    if token == ("end", key):
-                        tags_with_same_key -= 1
-                        if tags_with_same_key < 0:
-                            break
-                    tags.append(token)
-
-                # For every item in the scope
-                for thing in scope:
-                    # Append it as the most recent scope and render
-                    new_scope = [thing] + scopes
-                    rend = render(
-                        template=tags,
-                        scopes=new_scope,
-                        padding=padding,
-                        partials_dict=partials_dict,
-                        def_ldel=def_ldel,
-                        def_rdel=def_rdel,
-                        warn=warn,
-                        keep=keep,
-                    )
-
-                    output += rend
-
-            else:
-                # Otherwise we're just a scope section
-                scopes.insert(0, scope)  # type: ignore[reportArgumentType]
-
-        # If we're an inverted section
-        elif tag == "inverted section":
-            # Add the flipped scope to the scopes
-            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
-            scopes.insert(0, cast(Literal[False], not scope))
-
-        # If we're a partial
-        elif tag == "partial":
-            # Load the partial
-            partial = _get_partial(key, partials_dict)
-
-            # Find what to pad the partial with
-            left = output.rpartition("\n")[2]
-            part_padding = padding
-            if left.isspace():
-                part_padding += left
-
-            # Render the partial
-            part_out = render(
-                template=partial,
-                partials_dict=partials_dict,
-                def_ldel=def_ldel,
-                def_rdel=def_rdel,
-                padding=part_padding,
-                scopes=scopes,
-                warn=warn,
-                keep=keep,
-            )
-
-            # If the partial was indented
-            if left.isspace():
-                # then remove the spaces from the end
-                part_out = part_out.rstrip(" \t")
-
-            # Add the partials output to the output
-            output += part_out
-
-    return output
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
deleted file mode 100644
index d1f742a1ffa6..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="union-attr,return-value"
-# pylint: disable=line-too-long,R,consider-using-enumerate,docstring-missing-param,docstring-missing-return,docstring-missing-rtype
-import re
-import base64
-from pathlib import Path
-from typing import Any, Union
-from ._core import Prompty
-from ._invoker import Invoker, InvokerFactory
-
-
-ROLES = ["assistant", "function", "system", "user"]
-
-
-@InvokerFactory.register_parser("prompty.chat")
-class PromptyChatParser(Invoker):
-    """Prompty Chat Parser"""
-
-    def __init__(self, prompty: Prompty) -> None:
-        super().__init__(prompty)
-        self.path = Path(self.prompty.file).parent
-
-    def invoke(self, data: str) -> Any:
-        return invoke_parser(self.path, data)
-
-    async def invoke_async(self, data: str) -> Any:
-        """Invoke the Prompty Chat Parser (Async)
-
-        Parameters
-        ----------
-        data : str
-            The data to parse
-
-        Returns
-        -------
-        str
-            The parsed data
-        """
-        return self.invoke(data)
-
-
-def _inline_image(path: Union[Path, None], image_item: str) -> str:
-    """Inline Image
-
-    Parameters
-    ----------
-    image_item : str
-        The image item to inline
-
-    Returns
-    -------
-    str
-        The inlined image
-    """
-    # pass through if it's a url or base64 encoded or the path is None
-    if image_item.startswith("http") or image_item.startswith("data") or path is None:
-        return image_item
-    # otherwise, it's a local file - need to base64 encode it
-    else:
-        image_path = (path if path is not None else Path(".")) / image_item
-        with open(image_path, "rb") as f:
-            base64_image = base64.b64encode(f.read()).decode("utf-8")
-
-        if image_path.suffix == ".png":
-            return f"data:image/png;base64,{base64_image}"
-        elif image_path.suffix == ".jpg":
-            return f"data:image/jpeg;base64,{base64_image}"
-        elif image_path.suffix == ".jpeg":
-            return f"data:image/jpeg;base64,{base64_image}"
-        else:
-            raise ValueError(
-                f"Invalid image format {image_path.suffix} - currently only .png and .jpg / .jpeg are supported."
-            )
-
-
-def _parse_content(path: Union[Path, None], content: str):
-    """for parsing inline images
-
-    Parameters
-    ----------
-    content : str
-        The content to parse
-
-    Returns
-    -------
-    any
-        The parsed content
-    """
-    # regular expression to parse markdown images
-    image = r"(?P<alt>!\[[^\]]*\])\((?P<filename>.*?)(?=\"|\))\)"
-    matches = re.findall(image, content, flags=re.MULTILINE)
-    if len(matches) > 0:
-        content_items = []
-        content_chunks = re.split(image, content, flags=re.MULTILINE)
-        current_chunk = 0
-        for i in range(len(content_chunks)):
-            # image entry
-            if current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][0]:
-                content_items.append(
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": _inline_image(path, matches[current_chunk][1].split(" ")[0].strip())},
-                    }
-                )
-            # second part of image entry
-            elif current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][1]:
-                current_chunk += 1
-            # text entry
-            else:
-                if len(content_chunks[i].strip()) > 0:
-                    content_items.append({"type": "text", "text": content_chunks[i].strip()})
-        return content_items
-    else:
-        return content
-
-
-def invoke_parser(path: Union[Path, None], data: str) -> Any:
-    """Invoke the Prompty Chat Parser
-
-    Parameters
-    ----------
-    data : str
-        The data to parse
-
-    Returns
-    -------
-    str
-        The parsed data
-    """
-    messages = []
-    separator = r"(?i)^\s*#?\s*(" + "|".join(ROLES) + r")\s*:\s*\n"
-
-    # get valid chunks - remove empty items
-    chunks = [item for item in re.split(separator, data, flags=re.MULTILINE) if len(item.strip()) > 0]
-
-    # if no starter role, then inject system role
-    if not chunks[0].strip().lower() in ROLES:
-        chunks.insert(0, "system")
-
-    # if last chunk is role entry, then remove (no content?)
-    if chunks[-1].strip().lower() in ROLES:
-        chunks.pop()
-
-    if len(chunks) % 2 != 0:
-        raise ValueError("Invalid prompt format")
-
-    # create messages
-    for i in range(0, len(chunks), 2):
-        role = chunks[i].strip().lower()
-        content = chunks[i + 1].strip()
-        messages.append({"role": role, "content": _parse_content(path, content)})
-
-    return messages
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
deleted file mode 100644
index 66429f2b2c00..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# pylint: disable=line-too-long,R
-"""Customize generated code here.
-
-Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
-"""
-
-import traceback
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-from typing_extensions import Self
-from ._core import Prompty
-from ._mustache import render
-from ._parsers import invoke_parser
-from ._prompty_utils import load, prepare
-from ._utils import remove_leading_empty_space
-
-
-class PromptTemplate:
-    """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array."""
-
-    @classmethod
-    def from_prompty(cls, file_path: str) -> Self:
-        """Initialize a PromptTemplate object from a prompty file.
-
-        :param file_path: The path to the prompty file.
-        :type file_path: str
-        :return: The PromptTemplate object.
-        :rtype: PromptTemplate
-        """
-        if not file_path:
-            raise ValueError("Please provide file_path")
-
-        # Get the absolute path of the file by `traceback.extract_stack()`, it's "-2" because:
-        #  In the stack, the last function is the current function.
-        #  The second last function is the caller function, which is the root of the file_path.
-        stack = traceback.extract_stack()
-        caller = Path(stack[-2].filename)
-        abs_file_path = Path(caller.parent / Path(file_path)).resolve().absolute()
-
-        prompty = load(str(abs_file_path))
-        return cls(prompty=prompty)
-
-    @classmethod
-    def from_string(cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None) -> Self:
-        """Initialize a PromptTemplate object from a message template.
-
-        :param prompt_template: The prompt template string.
-        :type prompt_template: str
-        :param api: The API type, e.g. "chat" or "completion".
-        :type api: str
-        :param model_name: The model name, e.g. "gpt-4o-mini".
-        :type model_name: str
-        :return: The PromptTemplate object.
-        :rtype: PromptTemplate
-        """
-        return cls(
-            api=api,
-            prompt_template=prompt_template,
-            model_name=model_name,
-            prompty=None,
-        )
-
-    def __init__(
-        self,
-        *,
-        api: str = "chat",
-        prompty: Optional[Prompty] = None,
-        prompt_template: Optional[str] = None,
-        model_name: Optional[str] = None,
-    ) -> None:
-        self.prompty = prompty
-        if self.prompty is not None:
-            self.model_name = (
-                self.prompty.model.configuration["azure_deployment"]
-                if "azure_deployment" in self.prompty.model.configuration
-                else None
-            )
-            self.parameters = self.prompty.model.parameters
-            self._config = {}
-        elif prompt_template is not None:
-            self.model_name = model_name
-            self.parameters = {}
-            # _config is a dict to hold the internal configuration
-            self._config = {
-                "api": api if api is not None else "chat",
-                "prompt_template": prompt_template,
-            }
-        else:
-            raise ValueError("Please pass valid arguments for PromptTemplate")
-
-    def create_messages(self, data: Optional[Dict[str, Any]] = None, **kwargs) -> List[Dict[str, Any]]:
-        """Render the prompt template with the given data.
-
-        :param data: The data to render the prompt template with.
-        :type data: Optional[Dict[str, Any]]
-        :return: The rendered prompt template.
-        :rtype: List[Dict[str, Any]]
-        """
-        if data is None:
-            data = kwargs
-
-        if self.prompty is not None:
-            parsed = prepare(self.prompty, data)
-            return parsed
-        elif "prompt_template" in self._config:
-            prompt_template = remove_leading_empty_space(self._config["prompt_template"])
-            system_prompt_str = render(prompt_template, data)
-            parsed = invoke_parser(None, system_prompt_str)
-            return parsed
-        else:
-            raise ValueError("Please provide valid prompt template")
-
-
-def patch_sdk():
-    """Do not remove from this file.
-
-    `patch_sdk` is a last resort escape hatch that allows you to do customizations
-    you can't accomplish using the techniques described in
-    https://aka.ms/azsdk/python/dpcodegen/python/customize
-    """
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
deleted file mode 100644
index ad728b806214..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py
+++ /dev/null
@@ -1,416 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="assignment"
-# pylint: disable=R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,dangerous-default-value,redefined-outer-name,unused-wildcard-import,wildcard-import,raise-missing-from
-import traceback
-from pathlib import Path
-from typing import Any, Dict, List, Union
-from ._tracer import trace
-from ._invoker import InvokerFactory
-from ._core import (
-    ModelSettings,
-    Prompty,
-    PropertySettings,
-    TemplateSettings,
-    param_hoisting,
-)
-from ._utils import (
-    load_global_config,
-    load_prompty,
-)
-
-from ._renderers import *
-from ._parsers import *
-
-
-@trace(description="Create a headless prompty object for programmatic use.")
-def headless(
-    api: str,
-    content: Union[str, List[str], dict],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    connection: str = "default",
-) -> Prompty:
-    """Create a headless prompty object for programmatic use.
-
-    Parameters
-    ----------
-    api : str
-        The API to use for the model
-    content : Union[str, List[str], dict]
-        The content to process
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    connection : str, optional
-        The connection to use, by default "default"
-
-    Returns
-    -------
-    Prompty
-        The headless prompty object
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.headless(
-            api="embedding",
-            configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"},
-            content="hello world",
-        )
-    >>> emb = prompty.execute(p)
-
-    """
-
-    # get caller's path (to get relative path for prompty.json)
-    caller = Path(traceback.extract_stack()[-2].filename)
-    templateSettings = TemplateSettings(type="NOOP", parser="NOOP")
-    modelSettings = ModelSettings(
-        api=api,
-        configuration=Prompty.normalize(
-            param_hoisting(configuration, load_global_config(caller.parent, connection)),
-            caller.parent,
-        ),
-        parameters=parameters,
-    )
-
-    return Prompty(model=modelSettings, template=templateSettings, content=content)
-
-
-def _load_raw_prompty(attributes: dict, content: str, p: Path, global_config: dict):
-    if "model" not in attributes:
-        attributes["model"] = {}
-
-    if "configuration" not in attributes["model"]:
-        attributes["model"]["configuration"] = global_config
-    else:
-        attributes["model"]["configuration"] = param_hoisting(
-            attributes["model"]["configuration"],
-            global_config,
-        )
-
-    # pull model settings out of attributes
-    try:
-        model = ModelSettings(**attributes.pop("model"))
-    except Exception as e:
-        raise ValueError(f"Error in model settings: {e}")
-
-    # pull template settings
-    try:
-        if "template" in attributes:
-            t = attributes.pop("template")
-            if isinstance(t, dict):
-                template = TemplateSettings(**t)
-            # has to be a string denoting the type
-            else:
-                template = TemplateSettings(type=t, parser="prompty")
-        else:
-            template = TemplateSettings(type="mustache", parser="prompty")
-    except Exception as e:
-        raise ValueError(f"Error in template loader: {e}")
-
-    # formalize inputs and outputs
-    if "inputs" in attributes:
-        try:
-            inputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()}
-        except Exception as e:
-            raise ValueError(f"Error in inputs: {e}")
-    else:
-        inputs = {}
-    if "outputs" in attributes:
-        try:
-            outputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()}
-        except Exception as e:
-            raise ValueError(f"Error in outputs: {e}")
-    else:
-        outputs = {}
-
-    prompty = Prompty(
-        **attributes,
-        model=model,
-        inputs=inputs,
-        outputs=outputs,
-        template=template,
-        content=content,
-        file=p,
-    )
-
-    return prompty
-
-
-@trace(description="Load a prompty file.")
-def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty:
-    """Load a prompty file.
-
-    Parameters
-    ----------
-    prompty_file : Union[str, Path]
-        The path to the prompty file
-    configuration : str, optional
-        The configuration to use, by default "default"
-
-    Returns
-    -------
-    Prompty
-        The loaded prompty object
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> print(p)
-    """
-
-    p = Path(prompty_file)
-    if not p.is_absolute():
-        # get caller's path (take into account trace frame)
-        caller = Path(traceback.extract_stack()[-3].filename)
-        p = Path(caller.parent / p).resolve().absolute()
-
-    # load dictionary from prompty file
-    matter = load_prompty(p)
-
-    attributes = matter["attributes"]
-    content = matter["body"]
-
-    # normalize attribute dictionary resolve keys and files
-    attributes = Prompty.normalize(attributes, p.parent)
-
-    # load global configuration
-    global_config = Prompty.normalize(load_global_config(p.parent, configuration), p.parent)
-
-    prompty = _load_raw_prompty(attributes, content, p, global_config)
-
-    # recursive loading of base prompty
-    if "base" in attributes:
-        # load the base prompty from the same directory as the current prompty
-        base = load(p.parent / attributes["base"])
-        prompty = Prompty.hoist_base_prompty(prompty, base)
-
-    return prompty
-
-
-@trace(description="Prepare the inputs for the prompt.")
-def prepare(
-    prompt: Prompty,
-    inputs: Dict[str, Any] = {},
-):
-    """Prepare the inputs for the prompt.
-
-    Parameters
-    ----------
-    prompt : Prompty
-        The prompty object
-    inputs : Dict[str, Any], optional
-        The inputs to the prompt, by default {}
-
-    Returns
-    -------
-    dict
-        The prepared and hidrated template shaped to the LLM model
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> inputs = {"name": "John Doe"}
-    >>> content = prompty.prepare(p, inputs)
-    """
-    inputs = param_hoisting(inputs, prompt.sample)
-
-    render = InvokerFactory.run_renderer(prompt, inputs, prompt.content)
-    result = InvokerFactory.run_parser(prompt, render)
-
-    return result
-
-
-@trace(description="Prepare the inputs for the prompt.")
-async def prepare_async(
-    prompt: Prompty,
-    inputs: Dict[str, Any] = {},
-):
-    """Prepare the inputs for the prompt.
-
-    Parameters
-    ----------
-    prompt : Prompty
-        The prompty object
-    inputs : Dict[str, Any], optional
-        The inputs to the prompt, by default {}
-
-    Returns
-    -------
-    dict
-        The prepared and hidrated template shaped to the LLM model
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> inputs = {"name": "John Doe"}
-    >>> content = await prompty.prepare_async(p, inputs)
-    """
-    inputs = param_hoisting(inputs, prompt.sample)
-
-    render = await InvokerFactory.run_renderer_async(prompt, inputs, prompt.content)
-    result = await InvokerFactory.run_parser_async(prompt, render)
-
-    return result
-
-
-@trace(description="Run the prepared Prompty content against the model.")
-def run(
-    prompt: Prompty,
-    content: Union[dict, list, str],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    raw: bool = False,
-):
-    """Run the prepared Prompty content.
-
-    Parameters
-    ----------
-    prompt : Prompty
-        The prompty object
-    content : Union[dict, list, str]
-        The content to process
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    raw : bool, optional
-        Whether to skip processing, by default False
-
-    Returns
-    -------
-    Any
-        The result of the prompt
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> inputs = {"name": "John Doe"}
-    >>> content = prompty.prepare(p, inputs)
-    >>> result = prompty.run(p, content)
-    """
-
-    if configuration != {}:
-        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
-
-    if parameters != {}:
-        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
-
-    result = InvokerFactory.run_executor(prompt, content)
-    if not raw:
-        result = InvokerFactory.run_processor(prompt, result)
-
-    return result
-
-
-@trace(description="Run the prepared Prompty content against the model.")
-async def run_async(
-    prompt: Prompty,
-    content: Union[dict, list, str],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    raw: bool = False,
-):
-    """Run the prepared Prompty content.
-
-    Parameters
-    ----------
-    prompt : Prompty
-        The prompty object
-    content : Union[dict, list, str]
-        The content to process
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    raw : bool, optional
-        Whether to skip processing, by default False
-
-    Returns
-    -------
-    Any
-        The result of the prompt
-
-    Example
-    -------
-    >>> import prompty
-    >>> p = prompty.load("prompts/basic.prompty")
-    >>> inputs = {"name": "John Doe"}
-    >>> content = await prompty.prepare_async(p, inputs)
-    >>> result = await prompty.run_async(p, content)
-    """
-
-    if configuration != {}:
-        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
-
-    if parameters != {}:
-        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
-
-    result = await InvokerFactory.run_executor_async(prompt, content)
-    if not raw:
-        result = await InvokerFactory.run_processor_async(prompt, result)
-
-    return result
-
-
-@trace(description="Execute a prompty")
-def execute(
-    prompt: Union[str, Prompty],
-    configuration: Dict[str, Any] = {},
-    parameters: Dict[str, Any] = {},
-    inputs: Dict[str, Any] = {},
-    raw: bool = False,
-    config_name: str = "default",
-):
-    """Execute a prompty.
-
-    Parameters
-    ----------
-    prompt : Union[str, Prompty]
-        The prompty object or path to the prompty file
-    configuration : Dict[str, Any], optional
-        The configuration to use, by default {}
-    parameters : Dict[str, Any], optional
-        The parameters to use, by default {}
-    inputs : Dict[str, Any], optional
-        The inputs to the prompt, by default {}
-    raw : bool, optional
-        Whether to skip processing, by default False
-    connection : str, optional
-        The connection to use, by default "default"
-
-    Returns
-    -------
-    Any
-        The result of the prompt
-
-    Example
-    -------
-    >>> import prompty
-    >>> inputs = {"name": "John Doe"}
-    >>> result = prompty.execute("prompts/basic.prompty", inputs=inputs)
-    """
-    if isinstance(prompt, str):
-        path = Path(prompt)
-        if not path.is_absolute():
-            # get caller's path (take into account trace frame)
-            caller = Path(traceback.extract_stack()[-3].filename)
-            path = Path(caller.parent / path).resolve().absolute()
-        prompt = load(path, config_name)
-
-    # prepare content
-    content = prepare(prompt, inputs)
-
-    # run LLM model
-    result = run(prompt, content, configuration, parameters, raw)
-
-    return result
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
deleted file mode 100644
index 0d682a7fe151..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="union-attr,assignment,arg-type"
-from pathlib import Path
-from ._core import Prompty
-from ._invoker import Invoker, InvokerFactory
-from ._mustache import render
-
-
-@InvokerFactory.register_renderer("mustache")
-class MustacheRenderer(Invoker):
-    """Render a mustache template."""
-
-    def __init__(self, prompty: Prompty) -> None:
-        super().__init__(prompty)
-        self.templates = {}
-        cur_prompt = self.prompty
-        while cur_prompt:
-            self.templates[Path(cur_prompt.file).name] = cur_prompt.content
-            cur_prompt = cur_prompt.basePrompty
-        self.name = Path(self.prompty.file).name
-
-    def invoke(self, data: str) -> str:
-        generated = render(self.prompty.content, data)  # type: ignore
-        return generated
-
-    async def invoke_async(self, data: str) -> str:
-        return self.invoke(data)
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
deleted file mode 100644
index 24f800b465f4..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="union-attr,arg-type,misc,return-value,assignment,func-returns-value"
-# pylint: disable=R,redefined-outer-name,bare-except,unspecified-encoding
-import os
-import json
-import inspect
-import traceback
-import importlib
-import contextlib
-from pathlib import Path
-from numbers import Number
-from datetime import datetime
-from functools import wraps, partial
-from typing import Any, Callable, Dict, Iterator, List, Union
-
-
-# clean up key value pairs for sensitive values
-def sanitize(key: str, value: Any) -> Any:
-    if isinstance(value, str) and any([s in key.lower() for s in ["key", "token", "secret", "password", "credential"]]):
-        return len(str(value)) * "*"
-
-    if isinstance(value, dict):
-        return {k: sanitize(k, v) for k, v in value.items()}
-
-    return value
-
-
-class Tracer:
-    _tracers: Dict[str, Callable[[str], Iterator[Callable[[str, Any], None]]]] = {}
-
-    @classmethod
-    def add(cls, name: str, tracer: Callable[[str], Iterator[Callable[[str, Any], None]]]) -> None:
-        cls._tracers[name] = tracer
-
-    @classmethod
-    def clear(cls) -> None:
-        cls._tracers = {}
-
-    @classmethod
-    @contextlib.contextmanager
-    def start(cls, name: str) -> Iterator[Callable[[str, Any], None]]:
-        with contextlib.ExitStack() as stack:
-            traces: List[Any] = [stack.enter_context(tracer(name)) for tracer in cls._tracers.values()]  # type: ignore
-            yield lambda key, value: [  # type: ignore
-                # normalize and sanitize any trace values
-                trace(key, sanitize(key, to_dict(value)))
-                for trace in traces
-            ]
-
-
-def to_dict(obj: Any) -> Union[Dict[str, Any], List[Dict[str, Any]], str, Number, bool]:
-    # simple json types
-    if isinstance(obj, str) or isinstance(obj, Number) or isinstance(obj, bool):
-        return obj
-
-    # datetime
-    if isinstance(obj, datetime):
-        return obj.isoformat()
-
-    # safe Prompty obj serialization
-    if type(obj).__name__ == "Prompty":
-        return obj.to_safe_dict()
-
-    # safe PromptyStream obj serialization
-    if type(obj).__name__ == "PromptyStream":
-        return "PromptyStream"
-
-    if type(obj).__name__ == "AsyncPromptyStream":
-        return "AsyncPromptyStream"
-
-    # recursive list and dict
-    if isinstance(obj, List):
-        return [to_dict(item) for item in obj]  # type: ignore
-
-    if isinstance(obj, Dict):
-        return {k: v if isinstance(v, str) else to_dict(v) for k, v in obj.items()}
-
-    if isinstance(obj, Path):
-        return str(obj)
-
-    # cast to string otherwise...
-    return str(obj)
-
-
-def _name(func: Callable, args):
-    if hasattr(func, "__qualname__"):
-        signature = f"{func.__module__}.{func.__qualname__}"
-    else:
-        signature = f"{func.__module__}.{func.__name__}"
-
-    # core invoker gets special treatment prompty.invoker.Invoker
-    core_invoker = signature.startswith("prompty.invoker.Invoker.run")
-    if core_invoker:
-        name = type(args[0]).__name__
-        if signature.endswith("async"):
-            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async"
-        else:
-            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke"
-    else:
-        name = func.__name__
-
-    return name, signature
-
-
-def _inputs(func: Callable, args, kwargs) -> dict:
-    ba = inspect.signature(func).bind(*args, **kwargs)
-    ba.apply_defaults()
-
-    inputs = {k: to_dict(v) for k, v in ba.arguments.items() if k != "self"}
-
-    return inputs
-
-
-def _results(result: Any) -> Union[Dict, List[Dict], str, Number, bool]:
-    return to_dict(result) if result is not None else "None"
-
-
-def _trace_sync(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
-
-    @wraps(func)  # type: ignore
-    def wrapper(*args, **kwargs):
-        name, signature = _name(func, args)  # type: ignore
-        with Tracer.start(name) as trace:
-            trace("signature", signature)
-
-            # support arbitrary keyword
-            # arguments for trace decorator
-            for k, v in okwargs.items():
-                trace(k, to_dict(v))
-
-            inputs = _inputs(func, args, kwargs)  # type: ignore
-            trace("inputs", inputs)
-
-            try:
-                result = func(*args, **kwargs)  # type: ignore
-                trace("result", _results(result))
-            except Exception as e:
-                trace(
-                    "result",
-                    {
-                        "exception": {
-                            "type": type(e),
-                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
-                            "message": str(e),
-                            "args": to_dict(e.args),
-                        }
-                    },
-                )
-                raise e
-
-            return result
-
-    return wrapper
-
-
-def _trace_async(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
-
-    @wraps(func)  # type: ignore
-    async def wrapper(*args, **kwargs):
-        name, signature = _name(func, args)  # type: ignore
-        with Tracer.start(name) as trace:
-            trace("signature", signature)
-
-            # support arbitrary keyword
-            # arguments for trace decorator
-            for k, v in okwargs.items():
-                trace(k, to_dict(v))
-
-            inputs = _inputs(func, args, kwargs)  # type: ignore
-            trace("inputs", inputs)
-            try:
-                result = await func(*args, **kwargs)  # type: ignore
-                trace("result", _results(result))
-            except Exception as e:
-                trace(
-                    "result",
-                    {
-                        "exception": {
-                            "type": type(e),
-                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
-                            "message": str(e),
-                            "args": to_dict(e.args),
-                        }
-                    },
-                )
-                raise e
-
-            return result
-
-    return wrapper
-
-
-def trace(func: Union[Callable, None] = None, **kwargs: Any) -> Callable:
-    if func is None:
-        return partial(trace, **kwargs)
-    wrapped_method = _trace_async if inspect.iscoroutinefunction(func) else _trace_sync
-    return wrapped_method(func, **kwargs)
-
-
-class PromptyTracer:
-    def __init__(self, output_dir: Union[str, None] = None) -> None:
-        if output_dir:
-            self.output = Path(output_dir).resolve().absolute()
-        else:
-            self.output = Path(Path(os.getcwd()) / ".runs").resolve().absolute()
-
-        if not self.output.exists():
-            self.output.mkdir(parents=True, exist_ok=True)
-
-        self.stack: List[Dict[str, Any]] = []
-
-    @contextlib.contextmanager
-    def tracer(self, name: str) -> Iterator[Callable[[str, Any], None]]:
-        try:
-            self.stack.append({"name": name})
-            frame = self.stack[-1]
-            frame["__time"] = {
-                "start": datetime.now(),
-            }
-
-            def add(key: str, value: Any) -> None:
-                if key not in frame:
-                    frame[key] = value
-                # multiple values creates list
-                else:
-                    if isinstance(frame[key], list):
-                        frame[key].append(value)
-                    else:
-                        frame[key] = [frame[key], value]
-
-            yield add
-        finally:
-            frame = self.stack.pop()
-            start: datetime = frame["__time"]["start"]
-            end: datetime = datetime.now()
-
-            # add duration to frame
-            frame["__time"] = {
-                "start": start.strftime("%Y-%m-%dT%H:%M:%S.%f"),
-                "end": end.strftime("%Y-%m-%dT%H:%M:%S.%f"),
-                "duration": int((end - start).total_seconds() * 1000),
-            }
-
-            # hoist usage to parent frame
-            if "result" in frame and isinstance(frame["result"], dict):
-                if "usage" in frame["result"]:
-                    frame["__usage"] = self.hoist_item(
-                        frame["result"]["usage"],
-                        frame["__usage"] if "__usage" in frame else {},
-                    )
-
-            # streamed results may have usage as well
-            if "result" in frame and isinstance(frame["result"], list):
-                for result in frame["result"]:
-                    if isinstance(result, dict) and "usage" in result and isinstance(result["usage"], dict):
-                        frame["__usage"] = self.hoist_item(
-                            result["usage"],
-                            frame["__usage"] if "__usage" in frame else {},
-                        )
-
-            # add any usage frames from below
-            if "__frames" in frame:
-                for child in frame["__frames"]:
-                    if "__usage" in child:
-                        frame["__usage"] = self.hoist_item(
-                            child["__usage"],
-                            frame["__usage"] if "__usage" in frame else {},
-                        )
-
-            # if stack is empty, dump the frame
-            if len(self.stack) == 0:
-                self.write_trace(frame)
-            # otherwise, append the frame to the parent
-            else:
-                if "__frames" not in self.stack[-1]:
-                    self.stack[-1]["__frames"] = []
-                self.stack[-1]["__frames"].append(frame)
-
-    def hoist_item(self, src: Dict[str, Any], cur: Dict[str, Any]) -> Dict[str, Any]:
-        for key, value in src.items():
-            if value is None or isinstance(value, list) or isinstance(value, dict):
-                continue
-            try:
-                if key not in cur:
-                    cur[key] = value
-                else:
-                    cur[key] += value
-            except:
-                continue
-
-        return cur
-
-    def write_trace(self, frame: Dict[str, Any]) -> None:
-        trace_file = self.output / f"{frame['name']}.{datetime.now().strftime('%Y%m%d.%H%M%S')}.tracy"
-
-        v = importlib.metadata.version("prompty")  # type: ignore
-        enriched_frame = {
-            "runtime": "python",
-            "version": v,
-            "trace": frame,
-        }
-
-        with open(trace_file, "w") as f:
-            json.dump(enriched_frame, f, indent=4)
-
-
-@contextlib.contextmanager
-def console_tracer(name: str) -> Iterator[Callable[[str, Any], None]]:
-    try:
-        print(f"Starting {name}")
-        yield lambda key, value: print(f"{key}:\n{json.dumps(to_dict(value), indent=4)}")
-    finally:
-        print(f"Ending {name}")
diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
deleted file mode 100644
index 22f284180ee1..000000000000
--- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-# mypy: disable-error-code="import-untyped,return-value"
-# pylint: disable=line-too-long,R,wrong-import-order,global-variable-not-assigned)
-import json
-import os
-import re
-import sys
-from typing import Any, Dict
-from pathlib import Path
-
-
-_yaml_regex = re.compile(
-    r"^\s*" + r"(?:---|\+\+\+)" + r"(.*?)" + r"(?:---|\+\+\+)" + r"\s*(.+)$",
-    re.S | re.M,
-)
-
-
-def load_text(file_path, encoding="utf-8"):
-    with open(file_path, "r", encoding=encoding) as file:
-        return file.read()
-
-
-def load_json(file_path, encoding="utf-8"):
-    return json.loads(load_text(file_path, encoding=encoding))
-
-
-def load_global_config(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]:
-    prompty_config_path = prompty_path.joinpath("prompty.json")
-    if os.path.exists(prompty_config_path):
-        c = load_json(prompty_config_path)
-        if configuration in c:
-            return c[configuration]
-        else:
-            raise ValueError(f'Item "{configuration}" not found in "{prompty_config_path}"')
-    else:
-        return {}
-
-
-def load_prompty(file_path, encoding="utf-8") -> Dict[str, Any]:
-    contents = load_text(file_path, encoding=encoding)
-    return parse(contents)
-
-
-def parse(contents):
-    try:
-        import yaml  # type: ignore
-    except ImportError as exc:
-        raise ImportError("Please install pyyaml to use this function. Run `pip install pyyaml`.") from exc
-
-    global _yaml_regex
-
-    fmatter = ""
-    body = ""
-    result = _yaml_regex.search(contents)
-
-    if result:
-        fmatter = result.group(1)
-        body = result.group(2)
-    return {
-        "attributes": yaml.load(fmatter, Loader=yaml.SafeLoader),
-        "body": body,
-        "frontmatter": fmatter,
-    }
-
-
-def remove_leading_empty_space(multiline_str: str) -> str:
-    """
-    Processes a multiline string by:
-    1. Removing empty lines
-    2. Finding the minimum leading spaces
-    3. Indenting all lines to the minimum level
-
-    :param multiline_str: The input multiline string.
-    :type multiline_str: str
-    :return: The processed multiline string.
-    :rtype: str
-    """
-    lines = multiline_str.splitlines()
-    start_index = 0
-    while start_index < len(lines) and lines[start_index].strip() == "":
-        start_index += 1
-
-    # Find the minimum number of leading spaces
-    min_spaces = sys.maxsize
-    for line in lines[start_index:]:
-        if len(line.strip()) == 0:
-            continue
-        spaces = len(line) - len(line.lstrip())
-        spaces += line.lstrip().count("\t") * 2  # Count tabs as 2 spaces
-        min_spaces = min(min_spaces, spaces)
-
-    # Remove leading spaces and indent to the minimum level
-    processed_lines = []
-    for line in lines[start_index:]:
-        processed_lines.append(line[min_spaces:])
-
-    return "\n".join(processed_lines)
diff --git a/sdk/ai/azure-ai-inference/samples/README.md b/sdk/ai/azure-ai-inference/samples/README.md
index c99e20d9b179..c8b9aa3f01f8 100644
--- a/sdk/ai/azure-ai-inference/samples/README.md
+++ b/sdk/ai/azure-ai-inference/samples/README.md
@@ -101,13 +101,12 @@ similarly for the other samples.
 | [sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py)                               | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
 | [sample_chat_completions_from_input_dict.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py)                                 | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
 | [sample_chat_completions_from_input_dict_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py)   | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image URL. |
-| [sample_chat_completions_from_input_prompt_string.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py)               | One chat completion operation using a synchronous client, with input message template in string format. |
-| [sample_chat_completions_from_input_prompty.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py)                           | One chat completion operation using a synchronous client, with the input in Prompty format from a Prompty file. Prompty website: https://prompty.ai |
 | [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py)                                           | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
 | [sample_chat_completions_streaming_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py)                       | Shows how do use a tool (function) in chat completions, with streaming response, for an AI model that supports tools |
 | [sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py)                                                                           | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
 | [sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py)                                                                     | Get AI model information using the chat completions client. Similarly can be done with all other clients. |
 | [sample_chat_completions_with_model_extras.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_model_extras.py)                             | Chat completions with additional model-specific parameters. |
+| [sample_chat_completions_with_prompty_file.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_prompty_file.py)                           | One chat completion operation using a synchronous client, with the input from a Prompty file. Prompty website: https://prompty.ai |
 | [sample_chat_completions_azure_openai.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py)                                       | Chat completions against Azure OpenAI endpoint. |
 | [sample_chat_completions_with_tracing.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tracing.py)                                       | Chat completions with traces enabled. Uses function call tool to demonstrates how to add traces to client code so that they will get included as part of the traces that are emitted. |
 
diff --git a/sdk/ai/azure-ai-inference/samples/sample1.prompty b/sdk/ai/azure-ai-inference/samples/sample1.prompty
index 6dbcbf40bc6f..5e7752b4ed07 100644
--- a/sdk/ai/azure-ai-inference/samples/sample1.prompty
+++ b/sdk/ai/azure-ai-inference/samples/sample1.prompty
@@ -12,6 +12,7 @@ model:
     temperature: 1
     frequency_penalty: 0.5
     presence_penalty: 0.5
+template: mustache
 ---
 system:
 You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
deleted file mode 100644
index 78a2315a3384..000000000000
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-"""
-DESCRIPTION:
-    This sample demonstrates how to get a chat completions response from
-    the service using a synchronous client, with input message template
-    in string format.
-
-    This sample assumes the AI model is hosted on a Serverless API or
-    Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
-    the client constructor needs to be modified. See package documentation:
-    https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
-
-USAGE:
-    python sample_chat_completions_from_input_prompt_string.py
-
-    Set these two environment variables before running the sample:
-    1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form 
-        https://<your-deployment-name>.<your-azure-region>.models.ai.azure.com
-        where `your-deployment-name` is your unique AI Model deployment name, and
-        `your-azure-region` is the Azure region where your model is deployed.
-    2) AZURE_AI_CHAT_KEY - Your model key. Keep it secret.
-"""
-# pyright: reportAttributeAccessIssue=false
-
-
-def sample_chat_completions_from_input_prompt_string():
-    import os
-    from azure.ai.inference import ChatCompletionsClient
-    from azure.ai.inference.prompts import PromptTemplate
-    from azure.core.credentials import AzureKeyCredential
-
-    try:
-        endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"]
-        key = os.environ["AZURE_AI_CHAT_KEY"]
-    except KeyError:
-        print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'")
-        print("Set them before running this sample.")
-        exit()
-
-    prompt_template_str = """
-        system:
-        You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
-
-        # context
-        {{#rules}}
-        {{rule}}
-        {{/rules}}
-
-        {{#chat_history}}
-        {{role}}:
-        {{content}}
-        {{/chat_history}}
-
-        user:
-        {{input}}
-    """
-    prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
-
-    input = "When I arrived, can I still have breakfast?"
-    rules = [
-        {"rule": "The check-in time is 3pm"},
-        {"rule": "The check-out time is 11am"},
-        {"rule": "Breakfast is served from 7am to 10am"},
-    ]
-    chat_history = [
-        {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
-        {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
-    ]
-    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
-
-    client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-    response = client.complete(messages=messages)
-
-    print(response.choices[0].message.content)
-
-
-if __name__ == "__main__":
-    sample_chat_completions_from_input_prompt_string()
diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_prompty_file.py
similarity index 66%
rename from sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
rename to sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_prompty_file.py
index 0e43465e5e4c..d891b5c4ecd3 100644
--- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompty.py
+++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_prompty_file.py
@@ -4,9 +4,9 @@
 # ------------------------------------
 """
 DESCRIPTION:
-    This sample demonstrates how to get a chat completions response from
-    the service using a synchronous client, with the input in Prompty format
-    from a Prompty file. Prompty website: https://prompty.ai
+    This sample demonstrates how to use Prompty (https://prompty.ai) as model config and
+    prompt template, and get the chat completions response from the service using a
+    synchronous client.
 
     This sample assumes the AI model is hosted on a Serverless API or
     Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints,
@@ -14,7 +14,7 @@
     https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts
 
 USAGE:
-    python sample_chat_completions_from_input_prompty.py
+    python sample_chat_completions_with_prompty_file.py
 
     Set these two environment variables before running the sample:
     1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form 
@@ -25,11 +25,15 @@
 """
 # pyright: reportAttributeAccessIssue=false
 
+try:
+    from prompty import load, prepare
+except ImportError as exc:
+    MISSING_PROMPTY_PACKAGE_MESSAGE = "Please install the 'prompty' package by running 'pip install prompty'."
+    raise ImportError(MISSING_PROMPTY_PACKAGE_MESSAGE) from exc
 
-def sample_chat_completions_from_input_prompty():
+def sample_chat_completions_with_prompty_file():
     import os
     from azure.ai.inference import ChatCompletionsClient
-    from azure.ai.inference.prompts import PromptTemplate
     from azure.core.credentials import AzureKeyCredential
 
     try:
@@ -40,8 +44,8 @@ def sample_chat_completions_from_input_prompty():
         print("Set them before running this sample.")
         exit()
 
-    path = "./sample1.prompty"
-    prompt_template = PromptTemplate.from_prompty(file_path=path)
+    # Load Prompty file
+    prompty = load("./sample1.prompty")
 
     input = "When I arrived, can I still have breakfast?"
     rules = [
@@ -53,18 +57,26 @@ def sample_chat_completions_from_input_prompty():
         {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
         {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
     ]
-    messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
 
     client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
 
+    # Retrieve the prompt template and model configuration from the Prompty file
+    data = {
+        "input": input,
+        "rules": rules,
+        "chat_history": chat_history,
+    }
+    messages = prepare(prompty, data)
+    model_name = prompty.model.configuration["azure_deployment"] if "azure_deployment" in prompty.model.configuration else None
+
     response = client.complete(
         messages=messages,
-        model=prompt_template.model_name,
-        **prompt_template.parameters,
+        model=model_name,
+        **prompty.model.parameters,
     )
 
     print(response.choices[0].message.content)
 
 
 if __name__ == "__main__":
-    sample_chat_completions_from_input_prompty()
+    sample_chat_completions_with_prompty_file()
diff --git a/sdk/ai/azure-ai-inference/tests/sample1.prompty b/sdk/ai/azure-ai-inference/tests/sample1.prompty
deleted file mode 100644
index 6dbcbf40bc6f..000000000000
--- a/sdk/ai/azure-ai-inference/tests/sample1.prompty
+++ /dev/null
@@ -1,30 +0,0 @@
----
-name: Basic Prompt
-description: A basic prompt that uses the GPT-3 chat API to answer questions
-authors:
-  - author_1
-  - author_2
-model:
-  api: chat
-  configuration:
-    azure_deployment: gpt-4o-mini
-  parameters:
-    temperature: 1
-    frequency_penalty: 0.5
-    presence_penalty: 0.5
----
-system:
-You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
-
-# context
-{{#rules}}
-{{rule}}
-{{/rules}}
-
-{{#chat_history}}
-{{role}}:
-{{content}}
-{{/chat_history}}
-
-user:
-{{input}}
diff --git a/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty b/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty
deleted file mode 100644
index 8451c02b942e..000000000000
--- a/sdk/ai/azure-ai-inference/tests/sample1_with_secrets.prompty
+++ /dev/null
@@ -1,34 +0,0 @@
----
-name: Basic Prompt
-description: A basic prompt that uses the GPT-3 chat API to answer questions
-authors:
-  - author_1
-  - author_2
-model:
-  api: chat
-  configuration:
-    azure_deployment: gpt-4o-mini
-    type: azure_openai
-    api_version: test_version
-    api_key: test_key
-    api_secret: test_secret
-  parameters:
-    temperature: 1
-    frequency_penalty: 0.5
-    presence_penalty: 0.5
----
-system:
-You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
-
-# context
-{{#rules}}
-{{rule}}
-{{/rules}}
-
-{{#chat_history}}
-{{role}}:
-{{content}}
-{{/chat_history}}
-
-user:
-{{input}}
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py
deleted file mode 100644
index 8f3d76ce4aab..000000000000
--- a/sdk/ai/azure-ai-inference/tests/test_prompts.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# pylint: disable=line-too-long,useless-suppression
-# ------------------------------------
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-# ------------------------------------
-import os
-from azure.ai.inference.prompts import PromptTemplate
-
-
-class TestPrompts:
-
-    # **********************************************************************************
-    #
-    #                               UNIT TESTS
-    #
-    # **********************************************************************************
-
-    def test_prompt_template_from_prompty(self, **kwargs):
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        prompty_file_path = os.path.join(script_dir, "sample1.prompty")
-        prompt_template = PromptTemplate.from_prompty(prompty_file_path)
-        assert prompt_template.model_name == "gpt-4o-mini"
-        assert prompt_template.prompty.model.configuration["api_version"] == "mock-api-version"
-        assert prompt_template.parameters["temperature"] == 1
-        assert prompt_template.parameters["frequency_penalty"] == 0.5
-        assert prompt_template.parameters["presence_penalty"] == 0.5
-
-        input = "What's the check-in and check-out time?"
-        rules = [
-            {"rule": "The check-in time is 3pm"},
-            {"rule": "The check-out time is 11am"},
-            {"rule": "Breakfast is served from 7am to 10am"},
-            {"rule": 'The hotel website is https://www.myhotel.com?key1=param1&key2=param"2&key3=param<3>'},
-        ]
-        messages = prompt_template.create_messages(input=input, rules=rules)
-        assert len(messages) == 2
-        assert messages[0]["role"] == "system"
-        assert "Breakfast is served from 7am to 10am" in messages[0]["content"]
-        assert (
-            "The hotel website is https://www.myhotel.com?key1=param1&amp;key2=param&quot;2&amp;key3=param&lt;3&gt;"
-            in messages[0]["content"]
-        )
-        assert messages[1]["role"] == "user"
-        assert messages[1]["content"] == "What's the check-in and check-out time?"
-
-    def test_prompt_template_from_prompty_with_masked_secrets(self, **kwargs):
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        prompty_file_path = os.path.join(script_dir, "sample1_with_secrets.prompty")
-        prompt_template = PromptTemplate.from_prompty(prompty_file_path)
-        assert prompt_template.prompty.model.configuration["api_key"] == "test_key"
-        assert prompt_template.prompty.model.configuration["api_secret"] == "test_secret"
-        telemetry_dict = prompt_template.prompty.to_safe_dict()
-        assert telemetry_dict["model"]["configuration"]["api_key"] == "********"
-        assert telemetry_dict["model"]["configuration"]["api_secret"] == "***********"
-
-    def test_prompt_template_from_string(self, **kwargs):
-        prompt_template_str = "system prompt template text\nuser:\n{{input}}"
-        prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
-        input = "user question input text"
-        messages = prompt_template.create_messages(input=input)
-        assert len(messages) == 2
-        assert messages[0]["role"] == "system"
-        assert "system prompt template text" == messages[0]["content"]
-        assert "user question input text" == messages[1]["content"]
-
-    def test_prompt_template_from_string_with_tags(self, **kwargs):
-        prompt_template_str = """
-            system:
-            You are an AI assistant in a hotel. You help guests with their requests and provide information about the hotel and its services.
-
-            # context
-            {{#rules}}
-            {{rule}}
-            {{/rules}}
-
-            {{#chat_history}}
-            {{role}}:
-            {{content}}
-            {{/chat_history}}
-
-            user:
-            {{input}}
-        """
-        prompt_template = PromptTemplate.from_string(api="chat", prompt_template=prompt_template_str)
-        input = "When I arrived, can I still have breakfast?"
-        rules = [
-            {"rule": "The check-in time is 3pm"},
-            {"rule": "The check-out time is 11am"},
-            {"rule": "Breakfast is served from 7am to 10am"},
-        ]
-        chat_history = [
-            {"role": "user", "content": "I'll arrive at 2pm. What's the check-in and check-out time?"},
-            {"role": "system", "content": "The check-in time is 3 PM, and the check-out time is 11 AM."},
-        ]
-        messages = prompt_template.create_messages(input=input, rules=rules, chat_history=chat_history)
-        assert len(messages) == 4
-        assert messages[0]["role"] == "system"
-        assert "You are an AI assistant in a hotel." in messages[0]["content"]
-        assert messages[1]["role"] == "user"
-        assert "I'll arrive at 2pm. What's the check-in and check-out time?" == messages[1]["content"]
-        assert messages[2]["role"] == "system"
-        assert "The check-in time is 3 PM, and the check-out time is 11 AM." == messages[2]["content"]
-        assert messages[3]["role"] == "user"
-        assert "When I arrived, can I still have breakfast?" == messages[3]["content"]
diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts_utils.py b/sdk/ai/azure-ai-inference/tests/test_prompts_utils.py
deleted file mode 100644
index b0483cb6ae4a..000000000000
--- a/sdk/ai/azure-ai-inference/tests/test_prompts_utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from azure.ai.inference.prompts._utils import remove_leading_empty_space
-
-
-def test_success_with_no_changes():
-    prompt_str = """First line
-Second line"""
-    result = remove_leading_empty_space(prompt_str)
-    assert result == prompt_str
-
-
-def test_success_by_remove_leading_empty_space():
-    prompt_str = """
-    
-    First line
-
-      Second line
-        Third line
-"""
-    result = remove_leading_empty_space(prompt_str)
-    assert (
-        result
-        == """First line
-
-  Second line
-    Third line"""
-    )