From 34b4ad77e01d61c96f9586819af806bcc0408389 Mon Sep 17 00:00:00 2001 From: Darren Cohen <39422044+dargilco@users.noreply.github.com> Date: Fri, 14 Feb 2025 11:21:07 -0800 Subject: [PATCH 1/3] Update version to beta 10 --- sdk/ai/azure-ai-inference/CHANGELOG.md | 8 ++++++++ sdk/ai/azure-ai-inference/azure/ai/inference/_version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md index 2d32b28ae69c..958fa5d51d29 100644 --- a/sdk/ai/azure-ai-inference/CHANGELOG.md +++ b/sdk/ai/azure-ai-inference/CHANGELOG.md @@ -1,5 +1,13 @@ # Release History +## 1.0.0b10 (Unreleased) + +### Features Added + +### Bugs Fixed + +### Breaking Changes + ## 1.0.0b9 (2025-02-14) ### Features Added diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py index b1c2836b6921..9ab0a006e0d0 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_version.py @@ -6,4 +6,4 @@ # Changes may cause incorrect behavior and will be lost if the code is regenerated. # -------------------------------------------------------------------------- -VERSION = "1.0.0b9" +VERSION = "1.0.0b10" From aedffe70619daf4dba7a3c2d624b8e3e53712f68 Mon Sep 17 00:00:00 2001 From: Travis Angevine Date: Thu, 20 Feb 2025 10:37:39 -0800 Subject: [PATCH 2/3] Generation with new body parameter (#39803) * Changes after generation Signed-off-by: trangevi * pylint fix Signed-off-by: trangevi --------- Signed-off-by: trangevi --- .../azure/ai/inference/_model_base.py | 2 +- .../ai/inference/_operations/_operations.py | 242 +------- .../azure/ai/inference/_serialization.py | 4 +- .../inference/aio/_operations/_operations.py | 243 +------- .../azure/ai/inference/aio/_patch.py | 2 +- .../azure/ai/inference/models/_models.py | 536 +++++++++++++++--- .../azure/ai/inference/prompts/_invoker.py | 1 + .../azure/ai/inference/prompts/_mustache.py | 1 + .../azure/ai/inference/prompts/_parsers.py | 1 + .../azure/ai/inference/prompts/_patch.py | 1 + .../ai/inference/prompts/_prompty_utils.py | 1 + .../azure/ai/inference/tracing.py | 1 + ..._chat_completions_from_input_dict_async.py | 1 + ...ompletions_streaming_azure_openai_async.py | 1 + .../async_samples/sample_embeddings_async.py | 1 + .../async_samples/sample_load_client_async.py | 1 + .../sample_chat_completions_azure_openai.py | 1 + ...sample_chat_completions_from_input_dict.py | 1 + ...pletions_from_input_dict_with_image_url.py | 1 + ...at_completions_from_input_prompt_string.py | 1 + ...e_chat_completions_streaming_with_tools.py | 1 + .../sample_chat_completions_with_history.py | 1 + .../sample_chat_completions_with_image_url.py | 1 + ...chat_completions_with_structured_output.py | 1 + ...letions_with_structured_output_pydantic.py | 1 + .../sample_chat_completions_with_tools.py | 1 + .../samples/sample_embeddings_azure_openai.py | 1 + .../tests/gen_ai_trace_verifier.py | 1 + .../tests/model_inference_test_base.py | 1 + .../tests/test_chat_completions_client.py | 1 + .../test_chat_completions_client_async.py | 1 + .../tests/test_client_tracing.py | 2 +- .../tests/test_embeddings_client_async.py | 1 + .../test_image_embeddings_client_async.py | 1 + .../azure-ai-inference/tests/test_prompts.py | 1 + .../tests/test_unit_tests.py | 1 + sdk/ai/azure-ai-inference/tsp-location.yaml | 2 +- 37 files changed, 516 insertions(+), 546 deletions(-) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py index 359ecebe23f7..3072ee252ed9 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines,arguments-differ,signature-differs,no-member +# pylint: disable=too-many-lines # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py index 78e5ee353228..b48a0dc52af5 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py @@ -1,4 +1,3 @@ -# pylint: disable=too-many-locals # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. @@ -9,7 +8,7 @@ from io import IOBase import json import sys -from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload +from typing import Any, Callable, Dict, IO, Optional, TypeVar, Union, overload from azure.core.exceptions import ( ClientAuthenticationError, @@ -36,7 +35,6 @@ else: from typing import MutableMapping # type: ignore JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object -_Unset: Any = object() T = TypeVar("T") ClsType = Optional[Callable[[PipelineResponse[HttpRequest, HttpResponse], T, Dict[str, Any]], Any]] @@ -184,24 +182,10 @@ class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC): @overload def _complete( self, + body: _models._models.ChatCompletionsOptions, *, - messages: List[_models._models.ChatRequestMessage], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - frequency_penalty: Optional[float] = None, - stream_parameter: Optional[bool] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.ChatCompletions: ... @overload @@ -226,24 +210,9 @@ def _complete( @distributed_trace def _complete( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.ChatCompletionsOptions, JSON, IO[bytes]], *, - messages: List[_models._models.ChatRequestMessage] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - frequency_penalty: Optional[float] = None, - stream_parameter: Optional[bool] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.ChatCompletions: """Gets chat completions for the provided chat messages. @@ -252,93 +221,14 @@ def _complete( provided prompt data. The method makes a REST API call to the ``/chat/completions`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage] + :param body: The options for chat completions. Is one of the following types: + ChatCompletionsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.ChatCompletionsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative - frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. Default value is None. - :paramtype frequency_penalty: float - :keyword stream_parameter: A value indicating whether chat completions should be streamed for - this request. Default value is None. - :paramtype stream_parameter: bool - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the - model's likelihood to output new topics. - Supported range is [-2, 2]. Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: An object specifying the format that the model must output. - - Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs - which ensures the model will match your supplied JSON schema. - - Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the - model generates is valid JSON. - - **Important:** when using JSON mode, you **must** also instruct the model to produce JSON - yourself via a system or user message. Without this, the model may generate an unending stream - of whitespace until the generation reaches the token limit, resulting in a long-running and - seemingly "stuck" request. Also note that the message content may be partially cut off if - ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the - conversation exceeded the max context length. Default value is None. - :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: A list of tools the model may request to call. Currently, only functions are - supported as a tool. The model - may response with a function call request and provide the input arguments in JSON format for - that function. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. Default - value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping :rtype: ~azure.ai.inference.models.ChatCompletions :raises ~azure.core.exceptions.HttpResponseError: @@ -357,25 +247,6 @@ def _complete( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None) - if body is _Unset: - if messages is _Unset: - raise TypeError("missing required argument: messages") - body = { - "frequency_penalty": frequency_penalty, - "max_tokens": max_tokens, - "messages": messages, - "model": model, - "presence_penalty": presence_penalty, - "response_format": response_format, - "seed": seed, - "stop": stop, - "stream": stream_parameter, - "temperature": temperature, - "tool_choice": tool_choice, - "tools": tools, - "top_p": top_p, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): @@ -488,14 +359,10 @@ class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC): @overload def _embed( self, + body: _models._models.EmbeddingsOptions, *, - input: List[str], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: ... @overload @@ -520,46 +387,22 @@ def _embed( @distributed_trace def _embed( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.EmbeddingsOptions, JSON, IO[bytes]], *, - input: List[str] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: """Return the embedding vectors for given text prompts. The method makes a REST API call to the ``/embeddings`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] + :param body: The body of the request containing the options for generating embeddings. Is one + of the following types: EmbeddingsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.EmbeddingsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Default value is - None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. Known - values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: @@ -578,17 +421,6 @@ def _embed( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "dimensions": dimensions, - "encoding_format": encoding_format, - "input": input, - "input_type": input_type, - "model": model, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): @@ -701,14 +533,10 @@ class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC): @overload def _embed( self, + body: _models._models.ImageEmbeddingsOptions, *, - input: List[_models.ImageEmbeddingInput], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: ... @overload @@ -733,49 +561,22 @@ def _embed( @distributed_trace def _embed( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.ImageEmbeddingsOptions, JSON, IO[bytes]], *, - input: List[_models.ImageEmbeddingInput] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: """Return the embedding vectors for given images. The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :param body: The body of the request containing options for image embeddings. Is one of the + following types: ImageEmbeddingsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.ImageEmbeddingsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Default value is - None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings - should have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: @@ -794,17 +595,6 @@ def _embed( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "dimensions": dimensions, - "encoding_format": encoding_format, - "input": input, - "input_type": input_type, - "model": model, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py index a066e16a64dd..e2a20b1d534c 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines +# pylint: disable=line-too-long,useless-suppression,too-many-lines # -------------------------------------------------------------------------- # # Copyright (c) Microsoft Corporation. All rights reserved. @@ -1361,7 +1361,7 @@ def xml_key_extractor(attr, attr_desc, data): # pylint: disable=unused-argument # Iter and wrapped, should have found one node only (the wrap one) if len(children) != 1: raise DeserializationError( - "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format( # pylint: disable=line-too-long + "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format( xml_name ) ) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py index 62ec772f6dae..c481e4719835 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_operations/_operations.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-locals +# pylint: disable=line-too-long,useless-suppression # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. @@ -9,7 +9,7 @@ from io import IOBase import json import sys -from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload +from typing import Any, Callable, Dict, IO, Optional, TypeVar, Union, overload from azure.core.exceptions import ( ClientAuthenticationError, @@ -43,7 +43,6 @@ else: from typing import MutableMapping # type: ignore JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object -_Unset: Any = object() T = TypeVar("T") ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]] @@ -53,24 +52,10 @@ class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC): @overload async def _complete( self, + body: _models._models.ChatCompletionsOptions, *, - messages: List[_models._models.ChatRequestMessage], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - frequency_penalty: Optional[float] = None, - stream_parameter: Optional[bool] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.ChatCompletions: ... @overload @@ -95,24 +80,9 @@ async def _complete( @distributed_trace_async async def _complete( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.ChatCompletionsOptions, JSON, IO[bytes]], *, - messages: List[_models._models.ChatRequestMessage] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - frequency_penalty: Optional[float] = None, - stream_parameter: Optional[bool] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.ChatCompletions: """Gets chat completions for the provided chat messages. @@ -121,93 +91,14 @@ async def _complete( provided prompt data. The method makes a REST API call to the ``/chat/completions`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage] + :param body: The options for chat completions. Is one of the following types: + ChatCompletionsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.ChatCompletionsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative - frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. Default value is None. - :paramtype frequency_penalty: float - :keyword stream_parameter: A value indicating whether chat completions should be streamed for - this request. Default value is None. - :paramtype stream_parameter: bool - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the - model's likelihood to output new topics. - Supported range is [-2, 2]. Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: An object specifying the format that the model must output. - - Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs - which ensures the model will match your supplied JSON schema. - - Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the - model generates is valid JSON. - - **Important:** when using JSON mode, you **must** also instruct the model to produce JSON - yourself via a system or user message. Without this, the model may generate an unending stream - of whitespace until the generation reaches the token limit, resulting in a long-running and - seemingly "stuck" request. Also note that the message content may be partially cut off if - ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the - conversation exceeded the max context length. Default value is None. - :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: A list of tools the model may request to call. Currently, only functions are - supported as a tool. The model - may response with a function call request and provide the input arguments in JSON format for - that function. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. Default - value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping :rtype: ~azure.ai.inference.models.ChatCompletions :raises ~azure.core.exceptions.HttpResponseError: @@ -226,25 +117,6 @@ async def _complete( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None) - if body is _Unset: - if messages is _Unset: - raise TypeError("missing required argument: messages") - body = { - "frequency_penalty": frequency_penalty, - "max_tokens": max_tokens, - "messages": messages, - "model": model, - "presence_penalty": presence_penalty, - "response_format": response_format, - "seed": seed, - "stop": stop, - "stream": stream_parameter, - "temperature": temperature, - "tool_choice": tool_choice, - "tools": tools, - "top_p": top_p, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): @@ -357,14 +229,10 @@ class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC): @overload async def _embed( self, + body: _models._models.EmbeddingsOptions, *, - input: List[str], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: ... @overload @@ -389,46 +257,22 @@ async def _embed( @distributed_trace_async async def _embed( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.EmbeddingsOptions, JSON, IO[bytes]], *, - input: List[str] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: """Return the embedding vectors for given text prompts. The method makes a REST API call to the ``/embeddings`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] + :param body: The body of the request containing the options for generating embeddings. Is one + of the following types: EmbeddingsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.EmbeddingsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Default value is - None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. Known - values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: @@ -447,17 +291,6 @@ async def _embed( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "dimensions": dimensions, - "encoding_format": encoding_format, - "input": input, - "input_type": input_type, - "model": model, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): @@ -570,14 +403,10 @@ class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC): @overload async def _embed( self, + body: _models._models.ImageEmbeddingsOptions, *, - input: List[_models.ImageEmbeddingInput], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: ... @overload @@ -602,49 +431,22 @@ async def _embed( @distributed_trace_async async def _embed( self, - body: Union[JSON, IO[bytes]] = _Unset, + body: Union[_models._models.ImageEmbeddingsOptions, JSON, IO[bytes]], *, - input: List[_models.ImageEmbeddingInput] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, **kwargs: Any ) -> _models.EmbeddingsResult: """Return the embedding vectors for given images. The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint. - :param body: Is either a JSON type or a IO[bytes] type. Required. - :type body: JSON or IO[bytes] - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :param body: The body of the request containing options for image embeddings. Is one of the + following types: ImageEmbeddingsOptions, JSON, IO[bytes] Required. + :type body: ~azure.ai.inference.models._models.ImageEmbeddingsOptions or JSON or IO[bytes] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and "pass-through". Default value is None. :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Default value is - None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings - should have. - Passing null causes the model to use its default value. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. - Returns a 422 error if the model doesn't support the value or parameter. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: @@ -663,17 +465,6 @@ async def _embed( content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "dimensions": dimensions, - "encoding_format": encoding_format, - "input": input, - "input_type": input_type, - "model": model, - } - body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py index 2f9873805aa6..9e084d8dd6d4 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_patch.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines +# pylint: disable=too-many-lines,line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py index 53934528434f..85598618489a 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines +# pylint: disable=line-too-long,useless-suppression,too-many-lines # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. @@ -30,7 +30,7 @@ class ContentItem(_model_base.Model): """ __mapping__: Dict[str, _model_base.Model] = {} - type: str = rest_discriminator(name="type") + type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) """The discriminated object type. Required. Default value is None.""" @overload @@ -61,10 +61,10 @@ class AudioContentItem(ContentItem, discriminator="input_audio"): :vartype input_audio: ~azure.ai.inference.models.InputAudio """ - type: Literal["input_audio"] = rest_discriminator(name="type") # type: ignore + type: Literal["input_audio"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The discriminated object type: always 'input_audio' for this type. Required. Default value is \"input_audio\".""" - input_audio: "_models.InputAudio" = rest_field() + input_audio: "_models.InputAudio" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The details of the input audio. Required.""" @overload @@ -101,12 +101,14 @@ class ChatChoice(_model_base.Model): :vartype message: ~azure.ai.inference.models.ChatResponseMessage """ - index: int = rest_field() + index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The ordered index associated with this chat completions choice. Required.""" - finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field() + finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The reason that this chat completions choice completed its generated. Required. Known values are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\".""" - message: "_models.ChatResponseMessage" = rest_field() + message: "_models.ChatResponseMessage" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The chat message for a given chat completions prompt. Required.""" @overload @@ -153,18 +155,20 @@ class ChatCompletions(_model_base.Model): :vartype usage: ~azure.ai.inference.models.CompletionsUsage """ - id: str = rest_field() + id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """A unique identifier associated with this chat completions response. Required.""" - created: datetime.datetime = rest_field(format="unix-timestamp") + created: datetime.datetime = rest_field( + visibility=["read", "create", "update", "delete", "query"], format="unix-timestamp" + ) """The first timestamp associated with generation activity for this completions response, represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.""" - model: str = rest_field() + model: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The model used for the chat completion. Required.""" - choices: List["_models.ChatChoice"] = rest_field() + choices: List["_models.ChatChoice"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The collection of completions choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required.""" - usage: "_models.CompletionsUsage" = rest_field() + usage: "_models.CompletionsUsage" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Usage information for tokens processed and generated as part of this completions operation. Required.""" @@ -201,10 +205,12 @@ class ChatCompletionsNamedToolChoice(_model_base.Model): :vartype function: ~azure.ai.inference.models.ChatCompletionsNamedToolChoiceFunction """ - type: Literal["function"] = rest_field() + type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The type of the tool. Currently, only ``function`` is supported. Required. Default value is \"function\".""" - function: "_models.ChatCompletionsNamedToolChoiceFunction" = rest_field() + function: "_models.ChatCompletionsNamedToolChoiceFunction" = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The function that should be called. Required.""" @overload @@ -234,7 +240,7 @@ class ChatCompletionsNamedToolChoiceFunction(_model_base.Model): :vartype name: str """ - name: str = rest_field() + name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The name of the function that should be called. Required.""" @overload @@ -255,6 +261,198 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class ChatCompletionsOptions(_model_base.Model): + """The configuration information for a chat completions request. + Completions support a wide variety of tasks and generate text that continues from or + "completes" + provided prompt data. + + :ivar messages: The collection of context messages associated with this chat completions + request. + Typical usage begins with a chat message for the System role that provides instructions for + the behavior of the assistant, followed by alternating messages between the User and + Assistant roles. Required. + :vartype messages: list[~azure.ai.inference.models._models.ChatRequestMessage] + :ivar frequency_penalty: A value that influences the probability of generated tokens appearing + based on their cumulative + frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2]. + :vartype frequency_penalty: float + :ivar stream: A value indicating whether chat completions should be streamed for this request. + :vartype stream: bool + :ivar presence_penalty: A value that influences the probability of generated tokens appearing + based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase the + model's likelihood to output new topics. + Supported range is [-2, 2]. + :vartype presence_penalty: float + :ivar temperature: The sampling temperature to use that controls the apparent creativity of + generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + :vartype temperature: float + :ivar top_p: An alternative to sampling with temperature called nucleus sampling. This value + causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + :vartype top_p: float + :ivar max_tokens: The maximum number of tokens to generate. + :vartype max_tokens: int + :ivar response_format: An object specifying the format that the model must output. + + Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs which + ensures the model will match your supplied JSON schema. + + Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the model + generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON + yourself via a system or user message. Without this, the model may generate an unending stream + of whitespace until the generation reaches the token limit, resulting in a long-running and + seemingly "stuck" request. Also note that the message content may be partially cut off if + ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the + conversation exceeded the max context length. + :vartype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat + :ivar stop: A collection of textual sequences that will end completions generation. + :vartype stop: list[str] + :ivar tools: A list of tools the model may request to call. Currently, only functions are + supported as a tool. The model + may response with a function call request and provide the input arguments in JSON format for + that function. + :vartype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] + :ivar tool_choice: If specified, the model will configure which of the provided tools it can + use for the chat completions response. Is either a Union[str, + "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. + :vartype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or + ~azure.ai.inference.models.ChatCompletionsNamedToolChoice + :ivar seed: If specified, the system will make a best effort to sample deterministically such + that repeated requests with the + same seed and parameters should return the same result. Determinism is not guaranteed. + :vartype seed: int + :ivar model: ID of the specific AI model to use, if more than one model is available on the + endpoint. + :vartype model: str + """ + + messages: List["_models._models.ChatRequestMessage"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The collection of context messages associated with this chat completions request. + Typical usage begins with a chat message for the System role that provides instructions for + the behavior of the assistant, followed by alternating messages between the User and + Assistant roles. Required.""" + frequency_penalty: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """A value that influences the probability of generated tokens appearing based on their cumulative + frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2].""" + stream: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """A value indicating whether chat completions should be streamed for this request.""" + presence_penalty: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """A value that influences the probability of generated tokens appearing based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase the + model's likelihood to output new topics. + Supported range is [-2, 2].""" + temperature: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sampling temperature to use that controls the apparent creativity of generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1].""" + top_p: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """An alternative to sampling with temperature called nucleus sampling. This value causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1].""" + max_tokens: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The maximum number of tokens to generate.""" + response_format: Optional["_models._models.ChatCompletionsResponseFormat"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """An object specifying the format that the model must output. + + Setting to ``{ \"type\": \"json_schema\", \"json_schema\": {...} }`` enables Structured Outputs + which ensures the model will match your supplied JSON schema. + + Setting to ``{ \"type\": \"json_object\" }`` enables JSON mode, which ensures the message the + model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON + yourself via a system or user message. Without this, the model may generate an unending stream + of whitespace until the generation reaches the token limit, resulting in a long-running and + seemingly \"stuck\" request. Also note that the message content may be partially cut off if + ``finish_reason=\"length\"``\\ , which indicates the generation exceeded ``max_tokens`` or the + conversation exceeded the max context length.""" + stop: Optional[List[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """A collection of textual sequences that will end completions generation.""" + tools: Optional[List["_models.ChatCompletionsToolDefinition"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """A list of tools the model may request to call. Currently, only functions are supported as a + tool. The model + may response with a function call request and provide the input arguments in JSON format for + that function.""" + tool_choice: Optional[ + Union[str, "_models.ChatCompletionsToolChoicePreset", "_models.ChatCompletionsNamedToolChoice"] + ] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """If specified, the model will configure which of the provided tools it can use for the chat + completions response. Is either a Union[str, \"_models.ChatCompletionsToolChoicePreset\"] type + or a ChatCompletionsNamedToolChoice type.""" + seed: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """If specified, the system will make a best effort to sample deterministically such that repeated + requests with the + same seed and parameters should return the same result. Determinism is not guaranteed.""" + model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """ID of the specific AI model to use, if more than one model is available on the endpoint.""" + + @overload + def __init__( + self, + *, + messages: List["_models._models.ChatRequestMessage"], + frequency_penalty: Optional[float] = None, + stream: Optional[bool] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional["_models._models.ChatCompletionsResponseFormat"] = None, + stop: Optional[List[str]] = None, + tools: Optional[List["_models.ChatCompletionsToolDefinition"]] = None, + tool_choice: Optional[ + Union[str, "_models.ChatCompletionsToolChoicePreset", "_models.ChatCompletionsNamedToolChoice"] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class ChatCompletionsResponseFormat(_model_base.Model): """Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode. @@ -272,7 +470,7 @@ class ChatCompletionsResponseFormat(_model_base.Model): """ __mapping__: Dict[str, _model_base.Model] = {} - type: str = rest_discriminator(name="type") + type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) """The response format type to use for chat completions. Required. Default value is None.""" @overload @@ -304,7 +502,7 @@ class ChatCompletionsResponseFormatJsonObject(ChatCompletionsResponseFormat, dis :vartype type: str """ - type: Literal["json_object"] = rest_discriminator(name="type") # type: ignore + type: Literal["json_object"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Response format type: always 'json_object' for this object. Required. Default value is \"json_object\".""" @@ -337,10 +535,10 @@ class ChatCompletionsResponseFormatJsonSchema(ChatCompletionsResponseFormat, dis :vartype json_schema: ~azure.ai.inference.models.JsonSchemaFormat """ - type: Literal["json_schema"] = rest_discriminator(name="type") # type: ignore + type: Literal["json_schema"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The type of response format being defined: ``json_schema``. Required. Default value is \"json_schema\".""" - json_schema: "_models.JsonSchemaFormat" = rest_field() + json_schema: "_models.JsonSchemaFormat" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The definition of the required JSON schema in the response, and associated metadata. Required.""" @overload @@ -370,7 +568,7 @@ class ChatCompletionsResponseFormatText(ChatCompletionsResponseFormat, discrimin :vartype type: str """ - type: Literal["text"] = rest_discriminator(name="type") # type: ignore + type: Literal["text"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Response format type: always 'text' for this object. Required. Default value is \"text\".""" @overload @@ -392,6 +590,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class ChatCompletionsToolCall(_model_base.Model): """A function tool call requested by the AI model. + :ivar id: The ID of the tool call. Required. :vartype id: str :ivar type: The type of tool call. Currently, only ``function`` is supported. Required. Default @@ -401,12 +600,12 @@ class ChatCompletionsToolCall(_model_base.Model): :vartype function: ~azure.ai.inference.models.FunctionCall """ - id: str = rest_field() + id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The ID of the tool call. Required.""" - type: Literal["function"] = rest_field() + type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The type of tool call. Currently, only ``function`` is supported. Required. Default value is \"function\".""" - function: "_models.FunctionCall" = rest_field() + function: "_models.FunctionCall" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The details of the function call requested by the AI model. Required.""" @overload @@ -439,10 +638,10 @@ class ChatCompletionsToolDefinition(_model_base.Model): :vartype function: ~azure.ai.inference.models.FunctionDefinition """ - type: Literal["function"] = rest_field() + type: Literal["function"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The type of the tool. Currently, only ``function`` is supported. Required. Default value is \"function\".""" - function: "_models.FunctionDefinition" = rest_field() + function: "_models.FunctionDefinition" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The function definition details for the function tool. Required.""" @overload @@ -477,7 +676,7 @@ class ChatRequestMessage(_model_base.Model): """ __mapping__: Dict[str, _model_base.Model] = {} - role: str = rest_discriminator(name="role") + role: str = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) """The chat role associated with this message. Required. Known values are: \"system\", \"user\", \"assistant\", \"tool\", and \"developer\".""" @@ -514,12 +713,14 @@ class ChatRequestAssistantMessage(ChatRequestMessage, discriminator="assistant") :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall] """ - role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role") # type: ignore + role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The chat role associated with this message, which is always 'assistant' for assistant messages. Required. The role that provides responses to system-instructed, user-prompted input.""" - content: Optional[str] = rest_field() + content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The content of the message.""" - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field() + tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat completions request to resolve as configured.""" @@ -544,7 +745,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class ChatRequestDeveloperMessage(ChatRequestMessage, discriminator="developer"): - """A request chat message containing system instructions that influence how the model will + """A request chat message containing developer instructions that influence how the model will generate a chat completions response. Some AI models support a developer message instead of a system message. @@ -556,11 +757,11 @@ class ChatRequestDeveloperMessage(ChatRequestMessage, discriminator="developer") :vartype content: str """ - role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role") # type: ignore + role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The chat role associated with this message, which is always 'developer' for developer messages. Required. The role that instructs or sets the behavior of the assistant. Some AI models support this role instead of the 'system' role.""" - content: str = rest_field() + content: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The contents of the developer message. Required.""" @overload @@ -593,10 +794,10 @@ class ChatRequestSystemMessage(ChatRequestMessage, discriminator="system"): :vartype content: str """ - role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role") # type: ignore + role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The chat role associated with this message, which is always 'system' for system messages. Required. The role that instructs or sets the behavior of the assistant.""" - content: str = rest_field() + content: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The contents of the system message. Required.""" @overload @@ -630,12 +831,12 @@ class ChatRequestToolMessage(ChatRequestMessage, discriminator="tool"): :vartype tool_call_id: str """ - role: Literal[ChatRole.TOOL] = rest_discriminator(name="role") # type: ignore + role: Literal[ChatRole.TOOL] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The chat role associated with this message, which is always 'tool' for tool messages. Required. The role that represents extension tool activity within a chat completions operation.""" - content: Optional[str] = rest_field() + content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The content of the message.""" - tool_call_id: str = rest_field() + tool_call_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The ID of the tool call resolved by the provided content. Required.""" @overload @@ -668,10 +869,12 @@ class ChatRequestUserMessage(ChatRequestMessage, discriminator="user"): :vartype content: str or list[~azure.ai.inference.models.ContentItem] """ - role: Literal[ChatRole.USER] = rest_discriminator(name="role") # type: ignore + role: Literal[ChatRole.USER] = rest_discriminator(name="role", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The chat role associated with this message, which is always 'user' for user messages. Required. The role that provides input for chat completions.""" - content: Union["str", List["_models.ContentItem"]] = rest_field() + content: Union["str", List["_models.ContentItem"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The contents of the user message, with available input types varying by selected model. Required. Is either a str type or a [ContentItem] type.""" @@ -708,12 +911,14 @@ class ChatResponseMessage(_model_base.Model): :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall] """ - role: Union[str, "_models.ChatRole"] = rest_field() + role: Union[str, "_models.ChatRole"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The chat role associated with the message. Required. Known values are: \"system\", \"user\", \"assistant\", \"tool\", and \"developer\".""" - content: str = rest_field() + content: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The content of the message. Required.""" - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field() + tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat completions request to resolve as configured.""" @@ -755,11 +960,11 @@ class CompletionsUsage(_model_base.Model): :vartype total_tokens: int """ - completion_tokens: int = rest_field() + completion_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The number of tokens generated across all completions emissions. Required.""" - prompt_tokens: int = rest_field() + prompt_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The number of tokens in the provided prompts for the completions request. Required.""" - total_tokens: int = rest_field() + total_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The total number of tokens processed for the completions request and response. Required.""" @overload @@ -795,11 +1000,11 @@ class EmbeddingItem(_model_base.Model): :vartype index: int """ - embedding: Union["str", List[float]] = rest_field() + embedding: Union["str", List[float]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """List of embedding values for the input prompt. These represent a measurement of the vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector. Required. Is either a str type or a [float] type.""" - index: int = rest_field() + index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Index of the prompt to which the EmbeddingItem corresponds. Required.""" @overload @@ -821,6 +1026,74 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class EmbeddingsOptions(_model_base.Model): + """The configuration information for an embeddings request. + + :ivar input: Input text to embed, encoded as a string or array of tokens. + To embed multiple inputs in a single request, pass an array + of strings or array of token arrays. Required. + :vartype input: list[str] + :ivar dimensions: Optional. The number of dimensions the resulting output embeddings should + have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + :vartype dimensions: int + :ivar encoding_format: Optional. The desired format for the returned embeddings. Known values + are: "base64", "binary", "float", "int8", "ubinary", and "uint8". + :vartype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :ivar input_type: Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "text", "query", and "document". + :vartype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :ivar model: ID of the specific AI model to use, if more than one model is available on the + endpoint. + :vartype model: str + """ + + input: List[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Input text to embed, encoded as a string or array of tokens. + To embed multiple inputs in a single request, pass an array + of strings or array of token arrays. Required.""" + dimensions: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter.""" + encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Optional. The desired format for the returned embeddings. Known values are: \"base64\", + \"binary\", \"float\", \"int8\", \"ubinary\", and \"uint8\".""" + input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + \"text\", \"query\", and \"document\".""" + model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """ID of the specific AI model to use, if more than one model is available on the endpoint.""" + + @overload + def __init__( + self, + *, + input: List[str], + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = None, + input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = None, + model: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class EmbeddingsResult(_model_base.Model): """Representation of the response data from an embeddings request. Embeddings measure the relatedness of text strings and are commonly used for search, @@ -838,13 +1111,13 @@ class EmbeddingsResult(_model_base.Model): :vartype model: str """ - id: str = rest_field() + id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Unique identifier for the embeddings result. Required.""" - data: List["_models.EmbeddingItem"] = rest_field() + data: List["_models.EmbeddingItem"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Embedding values for the prompts submitted in the request. Required.""" - usage: "_models.EmbeddingsUsage" = rest_field() + usage: "_models.EmbeddingsUsage" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Usage counts for tokens input using the embeddings API. Required.""" - model: str = rest_field() + model: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The model ID used to generate this result. Required.""" @overload @@ -880,9 +1153,9 @@ class EmbeddingsUsage(_model_base.Model): :vartype total_tokens: int """ - prompt_tokens: int = rest_field() + prompt_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Number of tokens in the request. Required.""" - total_tokens: int = rest_field() + total_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Total number of tokens transacted in this request/response. Should equal the number of tokens in the request. Required.""" @@ -919,9 +1192,9 @@ class FunctionCall(_model_base.Model): :vartype arguments: str """ - name: str = rest_field() + name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The name of the function to call. Required.""" - arguments: str = rest_field() + arguments: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling @@ -960,13 +1233,13 @@ class FunctionDefinition(_model_base.Model): :vartype parameters: any """ - name: str = rest_field() + name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The name of the function to be called. Required.""" - description: Optional[str] = rest_field() + description: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """A description of what the function does. The model will use this description when selecting the function and interpreting its parameters.""" - parameters: Optional[Any] = rest_field() + parameters: Optional[Any] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The parameters the function accepts, described as a JSON Schema object.""" @overload @@ -1000,10 +1273,10 @@ class ImageContentItem(ContentItem, discriminator="image_url"): :vartype image_url: ~azure.ai.inference.models.ImageUrl """ - type: Literal["image_url"] = rest_discriminator(name="type") # type: ignore + type: Literal["image_url"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The discriminated object type: always 'image_url' for this type. Required. Default value is \"image_url\".""" - image_url: "_models.ImageUrl" = rest_field() + image_url: "_models.ImageUrl" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """An internet location, which must be accessible to the model,from which the image may be retrieved. Required.""" @@ -1036,10 +1309,10 @@ class ImageEmbeddingInput(_model_base.Model): :vartype text: str """ - image: str = rest_field() + image: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The input image encoded in base64 string as a data URL. Example: ``data:image/{format};base64,{data}``. Required.""" - text: Optional[str] = rest_field() + text: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Optional. The text input to feed into the model (like DINO, CLIP). Returns a 422 error if the model doesn't support the value or parameter.""" @@ -1062,6 +1335,77 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class ImageEmbeddingsOptions(_model_base.Model): + """The configuration information for an image embeddings request. + + :ivar input: Input image to embed. To embed multiple inputs in a single request, pass an array. + The input must not exceed the max input tokens for the model. Required. + :vartype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :ivar dimensions: Optional. The number of dimensions the resulting output embeddings should + have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + :vartype dimensions: int + :ivar encoding_format: Optional. The number of dimensions the resulting output embeddings + should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". + :vartype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :ivar input_type: Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "text", "query", and "document". + :vartype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :ivar model: ID of the specific AI model to use, if more than one model is available on the + endpoint. + :vartype model: str + """ + + input: List["_models.ImageEmbeddingInput"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Input image to embed. To embed multiple inputs in a single request, pass an array. + The input must not exceed the max input tokens for the model. Required.""" + dimensions: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter.""" + encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + \"base64\", \"binary\", \"float\", \"int8\", \"ubinary\", and \"uint8\".""" + input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + \"text\", \"query\", and \"document\".""" + model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """ID of the specific AI model to use, if more than one model is available on the endpoint.""" + + @overload + def __init__( + self, + *, + input: List["_models.ImageEmbeddingInput"], + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, "_models.EmbeddingEncodingFormat"]] = None, + input_type: Optional[Union[str, "_models.EmbeddingInputType"]] = None, + model: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class ImageUrl(_model_base.Model): """An internet location from which the model may retrieve an image. @@ -1073,9 +1417,11 @@ class ImageUrl(_model_base.Model): :vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel """ - url: str = rest_field() + url: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The URL of the image. Required.""" - detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field() + detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and accuracy. Known values are: \"auto\", \"low\", and \"high\".""" @@ -1109,9 +1455,11 @@ class InputAudio(_model_base.Model): :vartype format: str or ~azure.ai.inference.models.AudioContentFormat """ - data: str = rest_field() + data: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Base64 encoded audio data. Required.""" - format: Union[str, "_models.AudioContentFormat"] = rest_field() + format: Union[str, "_models.AudioContentFormat"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The audio format of the audio content. Required. Known values are: \"wav\" and \"mp3\".""" @overload @@ -1157,17 +1505,17 @@ class JsonSchemaFormat(_model_base.Model): :vartype strict: bool """ - name: str = rest_field() + name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. Required.""" - schema: Dict[str, Any] = rest_field() + schema: Dict[str, Any] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema. Note that AI models usually only support a subset of the keywords defined by JSON schema. Consult your AI model documentation to determine what is supported. Required.""" - description: Optional[str] = rest_field() + description: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """A description of the response format, used by the AI model to determine how to generate responses in this format.""" - strict: Optional[bool] = rest_field() + strict: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """If set to true, the service will error out if the provided JSON schema contains keywords not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type ``string``. @@ -1210,13 +1558,13 @@ class ModelInfo(_model_base.Model): :vartype model_provider_name: str """ - model_name: str = rest_field() + model_name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The name of the AI model. For example: ``Phi21``. Required.""" - model_type: Union[str, "_models.ModelType"] = rest_field() + model_type: Union[str, "_models.ModelType"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The type of the AI model. A Unique identifier for the profile. Required. Known values are: \"embeddings\", \"image_generation\", \"text_generation\", \"image_embeddings\", \"audio_generation\", and \"chat_completion\".""" - model_provider_name: str = rest_field() + model_provider_name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The model provider name. For example: ``Microsoft Research``. Required.""" @overload @@ -1255,12 +1603,16 @@ class StreamingChatChoiceUpdate(_model_base.Model): :vartype delta: ~azure.ai.inference.models.StreamingChatResponseMessageUpdate """ - index: int = rest_field() + index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The ordered index associated with this chat completions choice. Required.""" - finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field() + finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The reason that this chat completions choice completed its generated. Required. Known values are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\".""" - delta: "_models.StreamingChatResponseMessageUpdate" = rest_field() + delta: "_models.StreamingChatResponseMessageUpdate" = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """An update to the chat message for a given chat completions prompt. Required.""" @overload @@ -1310,18 +1662,22 @@ class StreamingChatCompletionsUpdate(_model_base.Model): :vartype usage: ~azure.ai.inference.models.CompletionsUsage """ - id: str = rest_field() + id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """A unique identifier associated with this chat completions response. Required.""" - created: datetime.datetime = rest_field(format="unix-timestamp") + created: datetime.datetime = rest_field( + visibility=["read", "create", "update", "delete", "query"], format="unix-timestamp" + ) """The first timestamp associated with generation activity for this completions response, represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.""" - model: str = rest_field() + model: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The model used for the chat completion. Required.""" - choices: List["_models.StreamingChatChoiceUpdate"] = rest_field() + choices: List["_models.StreamingChatChoiceUpdate"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """An update to the collection of completion choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required.""" - usage: Optional["_models.CompletionsUsage"] = rest_field() + usage: Optional["_models.CompletionsUsage"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Usage information for tokens processed and generated as part of this completions operation.""" @overload @@ -1360,12 +1716,16 @@ class StreamingChatResponseMessageUpdate(_model_base.Model): :vartype tool_calls: list[~azure.ai.inference.models.StreamingChatResponseToolCallUpdate] """ - role: Optional[Union[str, "_models.ChatRole"]] = rest_field() + role: Optional[Union[str, "_models.ChatRole"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The chat role associated with the message. If present, should always be 'assistant'. Known values are: \"system\", \"user\", \"assistant\", \"tool\", and \"developer\".""" - content: Optional[str] = rest_field() + content: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The content of the message.""" - tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = rest_field() + tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) """The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat completions request to resolve as configured.""" @@ -1400,9 +1760,9 @@ class StreamingChatResponseToolCallUpdate(_model_base.Model): :vartype function: ~azure.ai.inference.models.FunctionCall """ - id: str = rest_field() + id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The ID of the tool call. Required.""" - function: "_models.FunctionCall" = rest_field() + function: "_models.FunctionCall" = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Updates to the function call requested by the AI model. Required.""" @overload @@ -1434,10 +1794,10 @@ class TextContentItem(ContentItem, discriminator="text"): :vartype text: str """ - type: Literal["text"] = rest_discriminator(name="type") # type: ignore + type: Literal["text"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """The discriminated object type: always 'text' for this type. Required. Default value is \"text\".""" - text: str = rest_field() + text: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The content of the message. Required.""" @overload diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py index d682662e7b01..bc6d7a73f54f 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py index f7a0c21d8bb8..2d6400ee1e28 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py index de3c570e5c89..d1f742a1ffa6 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py index 14ad4f62b4c1..66429f2b2c00 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py index 5ea38bda6229..ad728b806214 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py b/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py index f7937a99074a..0aeb13853f6e 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py index b908cea2c5ae..53ccd05053e1 100644 --- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py +++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_from_input_dict_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py index 5035bc652d8d..974f921e982c 100644 --- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py +++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_chat_completions_streaming_azure_openai_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py index cfc9a4372222..b8f846541cb1 100644 --- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py +++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_embeddings_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py b/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py index 311837814607..b975822f1ed6 100644 --- a/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py +++ b/sdk/ai/azure-ai-inference/samples/async_samples/sample_load_client_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py index e39c2adba790..34689ac27b3f 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_azure_openai.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py index 0bbdea862ab3..6e2c5c4ca8d1 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py index f1c44431c523..ad78561bcc3e 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_dict_with_image_url.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py index e1ee22f32a9c..78a2315a3384 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_prompt_string.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py index a6042089946c..3c7b0c7f8279 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_tools.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py index 6083dd8b9ba2..d229672876ce 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py index 18c3925d0326..c148d8d72f30 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py index 50b07d63a841..06859cff8beb 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py index e2cba755e8aa..fa58b961e307 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_structured_output_pydantic.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py index dfe5fd048b51..dfa1ab3eb739 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py index 923e6410565c..41a606719bb3 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py +++ b/sdk/ai/azure-ai-inference/samples/sample_embeddings_azure_openai.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py index c3d3b34a4406..62dd824a0039 100644 --- a/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py +++ b/sdk/ai/azure-ai-inference/tests/gen_ai_trace_verifier.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py index eab1fb7418de..9619a2b15cc5 100644 --- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py +++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py index b347f1d285b1..051a5372cf28 100644 --- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py index dc13fc1eba4a..89f06d6da0e5 100644 --- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py +++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_client_tracing.py b/sdk/ai/azure-ai-inference/tests/test_client_tracing.py index 997dd117cae9..8b98a5e356ae 100644 --- a/sdk/ai/azure-ai-inference/tests/test_client_tracing.py +++ b/sdk/ai/azure-ai-inference/tests/test_client_tracing.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines +# pylint: disable=too-many-lines,line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py b/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py index 3f1c5ade0057..aff721431109 100644 --- a/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py +++ b/sdk/ai/azure-ai-inference/tests/test_embeddings_client_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py b/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py index 58b48c143f59..3553e2863b36 100644 --- a/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py +++ b/sdk/ai/azure-ai-inference/tests/test_image_embeddings_client_async.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_prompts.py b/sdk/ai/azure-ai-inference/tests/test_prompts.py index 0168fbeb8c01..8f3d76ce4aab 100644 --- a/sdk/ai/azure-ai-inference/tests/test_prompts.py +++ b/sdk/ai/azure-ai-inference/tests/test_prompts.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tests/test_unit_tests.py b/sdk/ai/azure-ai-inference/tests/test_unit_tests.py index 14f1f74dcfbe..d572c32deb54 100644 --- a/sdk/ai/azure-ai-inference/tests/test_unit_tests.py +++ b/sdk/ai/azure-ai-inference/tests/test_unit_tests.py @@ -1,3 +1,4 @@ +# pylint: disable=line-too-long,useless-suppression # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml index b107d6f6ece8..c79d02351f73 100644 --- a/sdk/ai/azure-ai-inference/tsp-location.yaml +++ b/sdk/ai/azure-ai-inference/tsp-location.yaml @@ -1,4 +1,4 @@ directory: specification/ai/ModelClient -commit: a7a977a1666ad293769bc17fb80309be390b2ba9 +commit: 91fa01cca22d82bb2823e9238650ebf70e4a83a3 repo: Azure/azure-rest-api-specs additionalDirectories: From ce8dd0f88e5120bf3bfc71b183a209eb81a93e4e Mon Sep 17 00:00:00 2001 From: Travis Angevine Date: Fri, 14 Mar 2025 10:43:43 -0700 Subject: [PATCH 3/3] Changes for typespec branch merge 1 (#39810) * Regen. Test for audio url handling Signed-off-by: trangevi * Test fixes and sample added Signed-off-by: trangevi * regen and format tool Signed-off-by: trangevi * new test recordings Signed-off-by: trangevi * fix "str" from generation. Update URL Signed-off-by: trangevi * code comment Signed-off-by: trangevi * Update to point back at Azure tsp branch Signed-off-by: trangevi * Changelog Signed-off-by: trangevi * Update test asset pointer after url change Signed-off-by: trangevi --------- Signed-off-by: trangevi --- sdk/ai/azure-ai-inference/CHANGELOG.md | 11 +++ sdk/ai/azure-ai-inference/assets.json | 2 +- .../azure/ai/inference/_client.py | 6 +- .../azure/ai/inference/_configuration.py | 12 +-- .../azure/ai/inference/aio/_client.py | 3 - .../azure/ai/inference/aio/_configuration.py | 9 -- .../azure/ai/inference/models/__init__.py | 8 +- .../azure/ai/inference/models/_models.py | 80 +++++++++++++++-- ...sample_chat_completions_with_audio_data.py | 4 +- .../sample_chat_completions_with_audio_url.py | 85 +++++++++++++++++++ .../tests/model_inference_test_base.py | 39 ++++++++- .../tests/test_chat_completions_client.py | 33 ++++++- .../test_chat_completions_client_async.py | 34 +++++++- sdk/ai/azure-ai-inference/tsp-location.yaml | 2 +- 14 files changed, 280 insertions(+), 48 deletions(-) create mode 100644 sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md index 958fa5d51d29..8115918449d0 100644 --- a/sdk/ai/azure-ai-inference/CHANGELOG.md +++ b/sdk/ai/azure-ai-inference/CHANGELOG.md @@ -4,10 +4,21 @@ ### Features Added +* Added support for Chat Completions with audio url input. This change introduces a split between +audio files which are passed as base64 encoded data (previously supported) and files which are passed +by url reference (new). See new sample `sample_chat_completions_with_audio_url.py`. + ### Bugs Fixed ### Breaking Changes +* `AudioContentItem` has been renamed to `AudioDataContentItem` + +### Other Noteable Changes + +* `FunctionDefinition.parameters` has been clarified in typing to be of type `Dict[str, Any]`. +This is not a change in functionality, just a clarification of what was already expected. + ## 1.0.0b9 (2025-02-14) ### Features Added diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json index ceebde8be0cf..2ec0f468b0b0 100644 --- a/sdk/ai/azure-ai-inference/assets.json +++ b/sdk/ai/azure-ai-inference/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/ai/azure-ai-inference", - "Tag": "python/ai/azure-ai-inference_3f06cee8a7" + "Tag": "python/ai/azure-ai-inference_473838145b" } diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py index 0cde08ffa7cc..1fc2ee38dca8 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py @@ -39,7 +39,7 @@ class ChatCompletionsClient(ChatCompletionsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. @@ -117,7 +117,7 @@ class EmbeddingsClient(EmbeddingsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. @@ -195,7 +195,7 @@ class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py index 894ec657140f..8fc56f572a89 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py @@ -28,7 +28,7 @@ class ChatCompletionsClientConfiguration: # pylint: disable=too-many-instance-a :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. @@ -54,8 +54,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") @@ -85,7 +83,7 @@ class EmbeddingsClientConfiguration: # pylint: disable=too-many-instance-attrib :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. @@ -111,8 +109,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") @@ -142,7 +138,7 @@ class ImageEmbeddingsClientConfiguration: # pylint: disable=too-many-instance-a :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential + ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. @@ -168,8 +164,6 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py index 88e6773bd8f1..212904c011cf 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_client.py @@ -39,7 +39,6 @@ class ChatCompletionsClient(ChatCompletionsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported @@ -122,7 +121,6 @@ class EmbeddingsClient(EmbeddingsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported @@ -205,7 +203,6 @@ class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin): :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py index f60e112599d6..4fbe724f0326 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/aio/_configuration.py @@ -28,7 +28,6 @@ class ChatCompletionsClientConfiguration: # pylint: disable=too-many-instance-a :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported @@ -57,8 +56,6 @@ def __init__( def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") @@ -88,7 +85,6 @@ class EmbeddingsClientConfiguration: # pylint: disable=too-many-instance-attrib :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported @@ -117,8 +113,6 @@ def __init__( def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") @@ -148,7 +142,6 @@ class ImageEmbeddingsClientConfiguration: # pylint: disable=too-many-instance-a :param credential: Credential used to authenticate requests to the service. Is either a key credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported @@ -177,8 +170,6 @@ def __init__( def _infer_policy(self, **kwargs): if isinstance(self.credential, AzureKeyCredential): return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) - if isinstance(self.credential, AzureKeyCredential): - return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) if hasattr(self.credential, "get_token"): return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) raise TypeError(f"Unsupported credential: {self.credential}") diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py index 66e625705c58..5dfafa1a420d 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py @@ -14,7 +14,8 @@ from ._models import ( # type: ignore - AudioContentItem, + AudioDataContentItem, + AudioUrlContentItem, ChatChoice, ChatCompletions, ChatCompletionsNamedToolChoice, @@ -33,6 +34,7 @@ ImageEmbeddingInput, ImageUrl, InputAudio, + InputAudioUrl, JsonSchemaFormat, ModelInfo, StreamingChatChoiceUpdate, @@ -57,7 +59,8 @@ from ._patch import patch_sdk as _patch_sdk __all__ = [ - "AudioContentItem", + "AudioDataContentItem", + "AudioUrlContentItem", "ChatChoice", "ChatCompletions", "ChatCompletionsNamedToolChoice", @@ -76,6 +79,7 @@ "ImageEmbeddingInput", "ImageUrl", "InputAudio", + "InputAudioUrl", "JsonSchemaFormat", "ModelInfo", "StreamingChatChoiceUpdate", diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py index 85598618489a..de1e78d089c9 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py @@ -23,7 +23,7 @@ class ContentItem(_model_base.Model): """An abstract representation of a structured content item within a chat message. You probably want to use the sub-classes and not this class directly. Known sub-classes are: - ImageContentItem, AudioContentItem, TextContentItem + AudioUrlContentItem, ImageContentItem, AudioDataContentItem, TextContentItem :ivar type: The discriminated object type. Required. Default value is None. :vartype type: str @@ -51,13 +51,13 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) -class AudioContentItem(ContentItem, discriminator="input_audio"): - """A structured chat content item containing an audio content. +class AudioDataContentItem(ContentItem, discriminator="input_audio"): + """A structured chat content item for audio content passed as base64 encoded data. :ivar type: The discriminated object type: always 'input_audio' for this type. Required. Default value is "input_audio". :vartype type: str - :ivar input_audio: The details of the input audio. Required. + :ivar input_audio: The details of the input audio data. Required. :vartype input_audio: ~azure.ai.inference.models.InputAudio """ @@ -65,7 +65,7 @@ class AudioContentItem(ContentItem, discriminator="input_audio"): """The discriminated object type: always 'input_audio' for this type. Required. Default value is \"input_audio\".""" input_audio: "_models.InputAudio" = rest_field(visibility=["read", "create", "update", "delete", "query"]) - """The details of the input audio. Required.""" + """The details of the input audio data. Required.""" @overload def __init__( @@ -85,6 +85,40 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, type="input_audio", **kwargs) +class AudioUrlContentItem(ContentItem, discriminator="audio_url"): + """A structured chat content item for audio content passed as a url. + + :ivar type: The discriminated object type: always 'audio_url' for this type. Required. Default + value is "audio_url". + :vartype type: str + :ivar audio_url: The details of the audio url. Required. + :vartype audio_url: ~azure.ai.inference.models.InputAudioUrl + """ + + type: Literal["audio_url"] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The discriminated object type: always 'audio_url' for this type. Required. Default value is + \"audio_url\".""" + audio_url: "_models.InputAudioUrl" = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The details of the audio url. Required.""" + + @overload + def __init__( + self, + *, + audio_url: "_models.InputAudioUrl", + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, type="audio_url", **kwargs) + + class ChatChoice(_model_base.Model): """The representation of a single prompt completion as part of an overall chat completions request. @@ -1230,7 +1264,7 @@ class FunctionDefinition(_model_base.Model): interpreting its parameters. :vartype description: str :ivar parameters: The parameters the function accepts, described as a JSON Schema object. - :vartype parameters: any + :vartype parameters: dict[str, any] """ name: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) @@ -1239,7 +1273,7 @@ class FunctionDefinition(_model_base.Model): """A description of what the function does. The model will use this description when selecting the function and interpreting its parameters.""" - parameters: Optional[Any] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + parameters: Optional[Dict[str, Any]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The parameters the function accepts, described as a JSON Schema object.""" @overload @@ -1248,7 +1282,7 @@ def __init__( *, name: str, description: Optional[str] = None, - parameters: Optional[Any] = None, + parameters: Optional[Dict[str, Any]] = None, ) -> None: ... @overload @@ -1446,7 +1480,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class InputAudio(_model_base.Model): - """The details of an audio chat message content part. + """The details of the input audio data. :ivar data: Base64 encoded audio data. Required. :vartype data: str @@ -1481,6 +1515,34 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class InputAudioUrl(_model_base.Model): + """The details of the audio url. + + :ivar url: The URL of the audio content. Required. + :vartype url: str + """ + + url: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The URL of the audio content. Required.""" + + @overload + def __init__( + self, + *, + url: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class JsonSchemaFormat(_model_base.Model): """Defines the response format for chat completions as JSON with a given schema. The AI model will need to adhere to this schema when generating completions. diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py index 09c3c07a38f5..f791e55c8585 100644 --- a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_data.py @@ -37,7 +37,7 @@ def sample_chat_completions_with_audio_data(): SystemMessage, UserMessage, TextContentItem, - AudioContentItem, + AudioDataContentItem, InputAudio, AudioContentFormat, ) @@ -69,7 +69,7 @@ def sample_chat_completions_with_audio_data(): UserMessage( [ TextContentItem(text="Please translate this audio snippet to spanish."), - AudioContentItem( + AudioDataContentItem( input_audio=InputAudio.load( audio_file="hello_how_are_you.mp3", audio_format=AudioContentFormat.MP3 ) diff --git a/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py new file mode 100644 index 000000000000..ceae9095675e --- /dev/null +++ b/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_audio_url.py @@ -0,0 +1,85 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +""" +DESCRIPTION: + This sample demonstrates how to get a chat completions response from + the service using a synchronous client. The sample shows how to use a + url pointer to an audio file in the input chat messages. + This sample will only work on AI models that support audio input. + Only these AI models accept the array form of `content` in the + `UserMessage`, as shown here. + + This sample assumes the AI model is hosted on a Serverless API or + Managed Compute endpoint. For GitHub Models or Azure OpenAI endpoints, + the client constructor needs to be modified. See package documentation: + https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/README.md#key-concepts + +USAGE: + python sample_chat_completions_with_audio_data.py + + Set these two or three environment variables before running the sample: + 1) AZURE_AI_CHAT_ENDPOINT - Your endpoint URL, in the form + https://..models.ai.azure.com + where `your-deployment-name` is your unique AI Model deployment name, and + `your-azure-region` is the Azure region where your model is deployed. + 2) AZURE_AI_CHAT_KEY - Your model key. Keep it secret. + 3) AZURE_AI_CHAT_DEPLOYMENT_NAME - Optional. The value for the HTTP + request header `azureml-model-deployment`. +""" + + +def sample_chat_completions_with_audio_url(): + import os + from azure.ai.inference import ChatCompletionsClient + from azure.ai.inference.models import ( + SystemMessage, + UserMessage, + TextContentItem, + AudioUrlContentItem, + InputAudioUrl, + ) + from azure.core.credentials import AzureKeyCredential + + try: + endpoint = os.environ["AZURE_AI_CHAT_ENDPOINT"] + key = os.environ["AZURE_AI_CHAT_KEY"] + except KeyError: + print("Missing environment variable 'AZURE_AI_CHAT_ENDPOINT' or 'AZURE_AI_CHAT_KEY'") + print("Set them before running this sample.") + exit() + + try: + model_deployment = os.environ["AZURE_AI_CHAT_DEPLOYMENT_NAME"] + except KeyError: + print("Could not read optional environment variable `AZURE_AI_CHAT_DEPLOYMENT_NAME`.") + print("No specific model target will not be set.") + model_deployment = None + + audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3" + + client = ChatCompletionsClient( + endpoint=endpoint, + credential=AzureKeyCredential(key), + ) + + response = client.complete( + messages=[ + SystemMessage("You are an AI assistant for translating and transcribing audio clips."), + UserMessage( + [ + TextContentItem(text="Please translate this audio snippet to spanish."), + AudioUrlContentItem(audio_url=InputAudioUrl(url=audio_url)), + ], + ), + ], + model=model_deployment, + ) + + print(response.choices[0].message.content) + + +if __name__ == "__main__": + sample_chat_completions_with_audio_url() diff --git a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py index 9619a2b15cc5..d16ad99b5d23 100644 --- a/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py +++ b/sdk/ai/azure-ai-inference/tests/model_inference_test_base.py @@ -92,6 +92,13 @@ azure_ai_image_embeddings_key="00000000000000000000000000000000", ) +ServicePreparerChatCompletionsWithAudio = functools.partial( + EnvironmentVariableLoader, + "azure_ai_chat_audio", + azure_ai_chat_audio_endpoint="https://your-deployment-name.eastus2.models.ai.azure.com", + azure_ai_chat_audio_key="00000000000000000000000000000000", +) + # The test class name needs to start with "Test" to get collected by pytest class ModelClientTestBase(AzureRecordedTestCase): @@ -266,6 +273,12 @@ def _load_image_embeddings_credentials_entra_id(self, is_async: bool = False, ** credential = self.get_credential(sdk.ImageEmbeddingsClient, is_async=is_async) return endpoint, credential + def _load_phi_audio_credentials(self, bad_key: bool, **kwargs): + endpoint = kwargs.pop("azure_ai_chat_audio_endpoint") + key = "00000000000000000000000000000000" if bad_key else kwargs.pop("azure_ai_chat_audio_key") + credential = AzureKeyCredential(key) + return endpoint, credential + # ********************************************************************************** # # HELPER METHODS TO CREATE CLIENTS USING THE SDK's load_client() FUNCTION @@ -444,6 +457,30 @@ def _create_embeddings_client_with_chat_completions_credentials(self, **kwargs) credential = AzureKeyCredential(key) return sdk.EmbeddingsClient(endpoint=endpoint, credential=credential, logging_enable=LOGGING_ENABLED) + def _create_phi_audio_chat_client(self, *, bad_key: bool = False, **kwargs) -> sdk.ChatCompletionsClient: + ( + endpoint, + credential, + ) = self._load_phi_audio_credentials(bad_key=bad_key, **kwargs) + return sdk.ChatCompletionsClient( + endpoint=endpoint, + credential=credential, + logging_enable=LOGGING_ENABLED, + ) + + def _create_async_phi_audio_chat_client( + self, *, bad_key: bool = False, **kwargs + ) -> async_sdk.ChatCompletionsClient: + ( + endpoint, + credential, + ) = self._load_phi_audio_credentials(bad_key=bad_key, **kwargs) + return async_sdk.ChatCompletionsClient( + endpoint=endpoint, + credential=credential, + logging_enable=LOGGING_ENABLED, + ) + # ********************************************************************************** # # HELPER METHODS TO VALIDATE TEST RESULTS @@ -532,7 +569,7 @@ def _validate_chat_completions_result( if is_aoai: assert bool(ModelClientTestBase.REGEX_AOAI_RESULT_ID.match(response.id)) else: - assert bool(ModelClientTestBase.REGEX_RESULT_ID.match(response.id)) + assert response.id assert response.created is not None assert response.created != "" assert response.model is not None diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py index 051a5372cf28..f796ead90b57 100644 --- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py +++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client.py @@ -11,6 +11,7 @@ ModelClientTestBase, ServicePreparerChatCompletions, ServicePreparerAOAIChatCompletions, + ServicePreparerChatCompletionsWithAudio, ) from devtools_testutils import recorded_by_proxy @@ -559,11 +560,9 @@ def test_aoai_chat_completions_with_structured_output(self, **kwargs): ) client.close() - # We use AOAI endpoint here because at the moment there is no MaaS model that supports - # input audio. @ServicePreparerAOAIChatCompletions() @recorded_by_proxy - def test_chat_completions_with_audio_input(self, **kwargs): + def test_chat_completions_with_audio_data_input(self, **kwargs): client = self._create_aoai_audio_chat_client(**kwargs) # Construct the full path to the image file @@ -578,7 +577,7 @@ def test_chat_completions_with_audio_input(self, **kwargs): sdk.models.UserMessage( content=[ sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."), - sdk.models.AudioContentItem( + sdk.models.AudioDataContentItem( input_audio=sdk.models.InputAudio.load( audio_file=audio_file_path, audio_format=sdk.models.AudioContentFormat.MP3 ) @@ -591,6 +590,32 @@ def test_chat_completions_with_audio_input(self, **kwargs): self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=True) client.close() + @ServicePreparerChatCompletionsWithAudio() + @recorded_by_proxy + def test_chat_completions_with_audio_url_input(self, **kwargs): + client = self._create_phi_audio_chat_client(**kwargs) + + # Construct the full path to the image file + script_dir = os.path.dirname(os.path.abspath(__file__)) + audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3" + + response = client.complete( + messages=[ + sdk.models.SystemMessage( + content="You are an AI assistant for translating and transcribing audio clips." + ), + sdk.models.UserMessage( + content=[ + sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."), + sdk.models.AudioUrlContentItem(audio_url=sdk.models.InputAudioUrl(url=audio_url)), + ], + ), + ], + ) + self._print_chat_completions_result(response) + self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=False) + client.close() + # ********************************************************************************** # # ERROR TESTS - CHAT COMPLETIONS diff --git a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py index 89f06d6da0e5..25be9b738c6b 100644 --- a/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py +++ b/sdk/ai/azure-ai-inference/tests/test_chat_completions_client_async.py @@ -12,6 +12,7 @@ ModelClientTestBase, ServicePreparerChatCompletions, ServicePreparerAOAIChatCompletions, + ServicePreparerChatCompletionsWithAudio, ) from devtools_testutils.aio import recorded_by_proxy_async @@ -515,11 +516,9 @@ async def test_async_aoai_chat_completions_with_structured_output(self, **kwargs ) await client.close() - # We use AOAI endpoint here because at the moment there is no MaaS model that supports - # input audio. @ServicePreparerAOAIChatCompletions() @recorded_by_proxy_async - async def test_chat_completions_with_audio_input(self, **kwargs): + async def test_chat_completions_with_audio_data_input(self, **kwargs): client = self._create_async_aoai_audio_chat_client(**kwargs) # Construct the full path to the image file @@ -534,7 +533,7 @@ async def test_chat_completions_with_audio_input(self, **kwargs): sdk.models.UserMessage( content=[ sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."), - sdk.models.AudioContentItem( + sdk.models.AudioDataContentItem( input_audio=sdk.models.InputAudio.load( audio_file=audio_file_path, audio_format=sdk.models.AudioContentFormat.MP3 ) @@ -546,3 +545,30 @@ async def test_chat_completions_with_audio_input(self, **kwargs): self._print_chat_completions_result(response) self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=True) await client.close() + + @ServicePreparerChatCompletionsWithAudio() + @recorded_by_proxy_async + async def test_chat_completions_with_audio_url_input(self, **kwargs): + client = self._create_async_phi_audio_chat_client(**kwargs) + + # Construct the full path to the image file + script_dir = os.path.dirname(os.path.abspath(__file__)) + audio_url = "https://github.com/Azure/azure-sdk-for-python/raw/refs/heads/main/sdk/ai/azure-ai-inference/samples/hello_how_are_you.mp3" + + response = await client.complete( + messages=[ + sdk.models.SystemMessage( + content="You are an AI assistant for translating and transcribing audio clips." + ), + sdk.models.UserMessage( + content=[ + sdk.models.TextContentItem(text="Please translate this audio snippet to spanish."), + sdk.models.AudioUrlContentItem(audio_url=sdk.models.InputAudioUrl(url=audio_url)), + ], + ), + ], + model="phi-4-multimodal-instruct-1", + ) + self._print_chat_completions_result(response) + self._validate_chat_completions_result(response, ["Hola", "cómo", "estás"], is_aoai=False) + await client.close() diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml index c79d02351f73..bc1ef4b901e7 100644 --- a/sdk/ai/azure-ai-inference/tsp-location.yaml +++ b/sdk/ai/azure-ai-inference/tsp-location.yaml @@ -1,4 +1,4 @@ directory: specification/ai/ModelClient -commit: 91fa01cca22d82bb2823e9238650ebf70e4a83a3 +commit: d61efb1bdfdf159ec15e4144f41e2828513bcfa4 repo: Azure/azure-rest-api-specs additionalDirectories: