# pylint: disable=too-many-lines # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # ------------------------------------ """Customize generated code here. Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize """ import json import logging import sys from io import IOBase from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, AsyncIterable from azure.core.pipeline import PipelineResponse from azure.core.credentials import AzureKeyCredential from azure.core.tracing.decorator_async import distributed_trace_async from azure.core.utils import case_insensitive_dict from azure.core.exceptions import ( ClientAuthenticationError, HttpResponseError, map_error, ResourceExistsError, ResourceNotFoundError, ResourceNotModifiedError, ) from .. import models as _models from .._model_base import SdkJSONEncoder, _deserialize from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated from ._client import EmbeddingsClient as EmbeddingsClientGenerated from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated from .._operations._operations import ( build_chat_completions_complete_request, build_embeddings_embed_request, build_image_embeddings_embed_request, ) from .._patch import _get_internal_response_format if TYPE_CHECKING: # pylint: disable=unused-import,ungrouped-imports from azure.core.credentials_async import AsyncTokenCredential if sys.version_info >= (3, 9): from collections.abc import MutableMapping else: from typing import MutableMapping # type: ignore # pylint: disable=ungrouped-imports JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object _Unset: Any = object() _LOGGER = logging.getLogger(__name__) async def load_client( endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any ) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]: """ Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route on the given endpoint, to determine the model type and therefore which client to instantiate. This method will only work when using Serverless API or Managed Compute endpoint. It will not work for GitHub Models endpoint or Azure OpenAI endpoint. Keyword arguments are passed through to the client constructor (you can set keywords such as `api_version`, `user_agent`, `logging_enable` etc. on the client constructor). :param endpoint: Service endpoint URL for AI model inference. Required. :type endpoint: str :param credential: Credential used to authenticate requests to the service. Is either a AzureKeyCredential type or a AsyncTokenCredential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :return: The appropriate asynchronous client associated with the given endpoint :rtype: ~azure.ai.inference.aio.ChatCompletionsClient or ~azure.ai.inference.aio.EmbeddingsClient or ~azure.ai.inference.aio.ImageEmbeddingsClient :raises ~azure.core.exceptions.HttpResponseError: """ async with ChatCompletionsClient( endpoint, credential, **kwargs ) as client: # Pick any of the clients, it does not matter. try: model_info = await client.get_model_info() # type: ignore except ResourceNotFoundError as error: error.message = ( "`load_client` function does not work on this endpoint (`/info` route not supported). " "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly." ) raise error _LOGGER.info("model_info=%s", model_info) if not model_info.model_type: raise ValueError( "The AI model information is missing a value for `model type`. Cannot create an appropriate client." ) # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type if model_info.model_type in ( _models.ModelType.CHAT_COMPLETION, "chat_completions", "chat", "completion", "chat-completion", "chat-completions", "chat completion", "chat completions", ): chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init model_info ) return chat_completion_client if model_info.model_type in ( _models.ModelType.EMBEDDINGS, "embedding", "text_embedding", "text-embeddings", "text embedding", "text embeddings", ): embedding_client = EmbeddingsClient(endpoint, credential, **kwargs) embedding_client._model_info = model_info # pylint: disable=protected-access,attribute-defined-outside-init return embedding_client if model_info.model_type in ( _models.ModelType.IMAGE_EMBEDDINGS, "image_embedding", "image-embeddings", "image-embedding", "image embedding", "image embeddings", ): image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs) image_embedding_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init model_info ) return image_embedding_client raise ValueError(f"No client available to support AI model type `{model_info.model_type}`") class ChatCompletionsClient(ChatCompletionsClientGenerated): # pylint: disable=too-many-instance-attributes """ChatCompletionsClient. :param endpoint: Service endpoint URL for AI model inference. Required. :type endpoint: str :param credential: Credential used to authenticate requests to the service. Is either a AzureKeyCredential type or a AsyncTokenCredential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword frequency_penalty: A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. Supported range is [-2, 2]. Default value is None. :paramtype frequency_penalty: float :keyword presence_penalty: A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. Supported range is [-2, 2]. Default value is None. :paramtype presence_penalty: float :keyword temperature: The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype temperature: float :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. As an example, a value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be considered. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype top_p: float :keyword max_tokens: The maximum number of tokens to generate. Default value is None. :paramtype max_tokens: int :keyword response_format: The format that the AI model must output. AI chat completions models typically output unformatted text by default. This is equivalent to setting "text" as the response_format. To output JSON format, without adhering to any schema, set to "json_object". To output JSON format adhering to a provided schema, set this to an object of the class ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] :keyword stop: A collection of textual sequences that will end completions generation. Default value is None. :paramtype stop: list[str] :keyword tools: The available tool definitions that the chat completions request can use, including caller-defined functions. Default value is None. :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] :keyword tool_choice: If specified, the model will configure which of the provided tools it can use for the chat completions response. Is either a Union[str, "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. Default value is None. :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or ~azure.ai.inference.models.ChatCompletionsNamedToolChoice :keyword seed: If specified, the system will make a best effort to sample deterministically such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Default value is None. :paramtype seed: int :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. :paramtype api_version: str """ def __init__( self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], *, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] ] = None, seed: Optional[int] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> None: self._model_info: Optional[_models.ModelInfo] = None # Store default chat completions settings, to be applied in all future service calls # unless overridden by arguments in the `complete` method. self._frequency_penalty = frequency_penalty self._presence_penalty = presence_penalty self._temperature = temperature self._top_p = top_p self._max_tokens = max_tokens self._internal_response_format = _get_internal_response_format(response_format) self._stop = stop self._tools = tools self._tool_choice = tool_choice self._seed = seed self._model = model self._model_extras = model_extras # For Key auth, we need to send these two auth HTTP request headers simultaneously: # 1. "Authorization: Bearer <key>" # 2. "api-key: <key>" # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, # and Azure OpenAI and the new Unified Inference endpoints support the second header. # The first header will be taken care of by auto-generated code. # The second one is added here. if isinstance(credential, AzureKeyCredential): headers = kwargs.pop("headers", {}) if "api-key" not in headers: headers["api-key"] = credential.key kwargs["headers"] = headers super().__init__(endpoint, credential, **kwargs) @overload async def complete( self, *, messages: List[_models.ChatRequestMessage], stream: Literal[False] = False, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] ] = None, seed: Optional[int] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> _models.ChatCompletions: ... @overload async def complete( self, *, messages: List[_models.ChatRequestMessage], stream: Literal[True], frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] ] = None, seed: Optional[int] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> AsyncIterable[_models.StreamingChatCompletionsUpdate]: ... @overload async def complete( self, *, messages: List[_models.ChatRequestMessage], stream: Optional[bool] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] ] = None, seed: Optional[int] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: # pylint: disable=line-too-long """Gets chat completions for the provided chat messages. Completions support a wide variety of tasks and generate text that continues from or "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route on the given endpoint. When using this method with `stream=True`, the response is streamed back to the client. Iterate over the resulting StreamingChatCompletions object to get content updates as they arrive. By default, the response is a ChatCompletions object (non-streaming). :keyword messages: The collection of context messages associated with this chat completions request. Typical usage begins with a chat message for the System role that provides instructions for the behavior of the assistant, followed by alternating messages between the User and Assistant roles. Required. :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] :keyword stream: A value indicating whether chat completions should be streamed for this request. Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. Otherwise the response will be a ChatCompletions. :paramtype stream: bool :keyword frequency_penalty: A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. Supported range is [-2, 2]. Default value is None. :paramtype frequency_penalty: float :keyword presence_penalty: A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. Supported range is [-2, 2]. Default value is None. :paramtype presence_penalty: float :keyword temperature: The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype temperature: float :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. As an example, a value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be considered. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype top_p: float :keyword max_tokens: The maximum number of tokens to generate. Default value is None. :paramtype max_tokens: int :keyword response_format: The format that the AI model must output. AI chat completions models typically output unformatted text by default. This is equivalent to setting "text" as the response_format. To output JSON format, without adhering to any schema, set to "json_object". To output JSON format adhering to a provided schema, set this to an object of the class ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] :keyword stop: A collection of textual sequences that will end completions generation. Default value is None. :paramtype stop: list[str] :keyword tools: The available tool definitions that the chat completions request can use, including caller-defined functions. Default value is None. :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] :keyword tool_choice: If specified, the model will configure which of the provided tools it can use for the chat completions response. Is either a Union[str, "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. Default value is None. :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or ~azure.ai.inference.models.ChatCompletionsNamedToolChoice :keyword seed: If specified, the system will make a best effort to sample deterministically such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Default value is None. :paramtype seed: int :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def complete( self, body: JSON, *, content_type: str = "application/json", **kwargs: Any, ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: # pylint: disable=line-too-long """Gets chat completions for the provided chat messages. Completions support a wide variety of tasks and generate text that continues from or "completes" provided prompt data. :param body: An object of type MutableMapping[str, Any], such as a dictionary, that specifies the full request payload. Required. :type body: JSON :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. Default value is "application/json". :paramtype content_type: str :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def complete( self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any, ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: # pylint: disable=line-too-long """Gets chat completions for the provided chat messages. Completions support a wide variety of tasks and generate text that continues from or "completes" provided prompt data. :param body: Specifies the full request payload. Required. :type body: IO[bytes] :keyword content_type: Body Parameter content-type. Content type parameter for binary body. Default value is "application/json". :paramtype content_type: str :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions :raises ~azure.core.exceptions.HttpResponseError: """ # pylint:disable=client-method-missing-tracing-decorator-async async def complete( self, body: Union[JSON, IO[bytes]] = _Unset, *, messages: List[_models.ChatRequestMessage] = _Unset, stream: Optional[bool] = None, frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] ] = None, seed: Optional[int] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: # pylint: disable=line-too-long # pylint: disable=too-many-locals """Gets chat completions for the provided chat messages. Completions support a wide variety of tasks and generate text that continues from or "completes" provided prompt data. When using this method with `stream=True`, the response is streamed back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions` object to get content updates as they arrive. :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type that specifies the full request payload. Required. :type body: JSON or IO[bytes] :keyword messages: The collection of context messages associated with this chat completions request. Typical usage begins with a chat message for the System role that provides instructions for the behavior of the assistant, followed by alternating messages between the User and Assistant roles. Required. :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] :keyword stream: A value indicating whether chat completions should be streamed for this request. Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. Otherwise the response will be a ChatCompletions. :paramtype stream: bool :keyword frequency_penalty: A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. Supported range is [-2, 2]. Default value is None. :paramtype frequency_penalty: float :keyword presence_penalty: A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. Supported range is [-2, 2]. Default value is None. :paramtype presence_penalty: float :keyword temperature: The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype temperature: float :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. As an example, a value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be considered. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. Supported range is [0, 1]. Default value is None. :paramtype top_p: float :keyword max_tokens: The maximum number of tokens to generate. Default value is None. :paramtype max_tokens: int :keyword response_format: The format that the AI model must output. AI chat completions models typically output unformatted text by default. This is equivalent to setting "text" as the response_format. To output JSON format, without adhering to any schema, set to "json_object". To output JSON format adhering to a provided schema, set this to an object of the class ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] :keyword stop: A collection of textual sequences that will end completions generation. Default value is None. :paramtype stop: list[str] :keyword tools: The available tool definitions that the chat completions request can use, including caller-defined functions. Default value is None. :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] :keyword tool_choice: If specified, the model will configure which of the provided tools it can use for the chat completions response. Is either a Union[str, "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. Default value is None. :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or ~azure.ai.inference.models.ChatCompletionsNamedToolChoice :keyword seed: If specified, the system will make a best effort to sample deterministically such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Default value is None. :paramtype seed: int :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions :raises ~azure.core.exceptions.HttpResponseError: """ error_map = { 401: ClientAuthenticationError, 404: ResourceNotFoundError, 409: ResourceExistsError, 304: ResourceNotModifiedError, } error_map.update(kwargs.pop("error_map", {}) or {}) _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) _params = kwargs.pop("params", {}) or {} _extra_parameters: Union[_models._enums.ExtraParameters, None] = None content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) internal_response_format = _get_internal_response_format(response_format) if body is _Unset: if messages is _Unset: raise TypeError("missing required argument: messages") body = { "messages": messages, "stream": stream, "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty, "max_tokens": max_tokens if max_tokens is not None else self._max_tokens, "model": model if model is not None else self._model, "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty, "response_format": ( internal_response_format if internal_response_format is not None else self._internal_response_format ), "seed": seed if seed is not None else self._seed, "stop": stop if stop is not None else self._stop, "temperature": temperature if temperature is not None else self._temperature, "tool_choice": tool_choice if tool_choice is not None else self._tool_choice, "tools": tools if tools is not None else self._tools, "top_p": top_p if top_p is not None else self._top_p, } if model_extras is not None and bool(model_extras): body.update(model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access elif self._model_extras is not None and bool(self._model_extras): body.update(self._model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access body = {k: v for k, v in body.items() if v is not None} elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool): stream = body["stream"] content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): _content = body else: _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore _request = build_chat_completions_complete_request( extra_params=_extra_parameters, content_type=content_type, api_version=self._config.api_version, content=_content, headers=_headers, params=_params, ) path_format_arguments = { "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), } _request.url = self._client.format_url(_request.url, **path_format_arguments) _stream = stream or False pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access _request, stream=_stream, **kwargs ) response = pipeline_response.http_response if response.status_code not in [200]: if _stream: await response.read() # Load the body in memory and close the socket map_error(status_code=response.status_code, response=response, error_map=error_map) raise HttpResponseError(response=response) if _stream: return _models.AsyncStreamingChatCompletions(response) return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access @distributed_trace_async async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: # pylint: disable=line-too-long """Returns information about the AI model. The method makes a REST API call to the ``/info`` route on the given endpoint. This method will only work when using Serverless API or Managed Compute endpoint. It will not work for GitHub Models endpoint or Azure OpenAI endpoint. :return: ModelInfo. The ModelInfo is compatible with MutableMapping :rtype: ~azure.ai.inference.models.ModelInfo :raises ~azure.core.exceptions.HttpResponseError: """ if not self._model_info: try: self._model_info = await self._get_model_info( **kwargs ) # pylint: disable=attribute-defined-outside-init except ResourceNotFoundError as error: error.message = "Model information is not available on this endpoint (`/info` route not supported)." raise error return self._model_info def __str__(self) -> str: # pylint: disable=client-method-name-no-double-underscore return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() class EmbeddingsClient(EmbeddingsClientGenerated): """EmbeddingsClient. :param endpoint: Service endpoint URL for AI model inference. Required. :type endpoint: str :param credential: Credential used to authenticate requests to the service. Is either a AzureKeyCredential type or a AsyncTokenCredential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. The type of the input. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. :paramtype api_version: str """ def __init__( self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], *, dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> None: self._model_info: Optional[_models.ModelInfo] = None # Store default embeddings settings, to be applied in all future service calls # unless overridden by arguments in the `embed` method. self._dimensions = dimensions self._encoding_format = encoding_format self._input_type = input_type self._model = model self._model_extras = model_extras # For Key auth, we need to send these two auth HTTP request headers simultaneously: # 1. "Authorization: Bearer <key>" # 2. "api-key: <key>" # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, # and Azure OpenAI and the new Unified Inference endpoints support the second header. # The first header will be taken care of by auto-generated code. # The second one is added here. if isinstance(credential, AzureKeyCredential): headers = kwargs.pop("headers", {}) if "api-key" not in headers: headers["api-key"] = credential.key kwargs["headers"] = headers super().__init__(endpoint, credential, **kwargs) @overload async def embed( self, *, input: List[str], dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given text prompts. The method makes a REST API call to the `/embeddings` route on the given endpoint. :keyword input: Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Required. :paramtype input: list[str] :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. The type of the input. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def embed( self, body: JSON, *, content_type: str = "application/json", **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given text prompts. The method makes a REST API call to the `/embeddings` route on the given endpoint. :param body: An object of type MutableMapping[str, Any], such as a dictionary, that specifies the full request payload. Required. :type body: JSON :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. Default value is "application/json". :paramtype content_type: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def embed( self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given text prompts. The method makes a REST API call to the `/embeddings` route on the given endpoint. :param body: Specifies the full request payload. Required. :type body: IO[bytes] :keyword content_type: Body Parameter content-type. Content type parameter for binary body. Default value is "application/json". :paramtype content_type: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @distributed_trace_async async def embed( self, body: Union[JSON, IO[bytes]] = _Unset, *, input: List[str] = _Unset, dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> _models.EmbeddingsResult: # pylint: disable=line-too-long """Return the embedding vectors for given text prompts. The method makes a REST API call to the `/embeddings` route on the given endpoint. :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type that specifies the full request payload. Required. :type body: JSON or IO[bytes] :keyword input: Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Required. :paramtype input: list[str] :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. The type of the input. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ error_map: MutableMapping[int, Type[HttpResponseError]] = { 401: ClientAuthenticationError, 404: ResourceNotFoundError, 409: ResourceExistsError, 304: ResourceNotModifiedError, } error_map.update(kwargs.pop("error_map", {}) or {}) _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) _params = kwargs.pop("params", {}) or {} _extra_parameters: Union[_models._enums.ExtraParameters, None] = None content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) if body is _Unset: if input is _Unset: raise TypeError("missing required argument: input") body = { "input": input, "dimensions": dimensions if dimensions is not None else self._dimensions, "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, "input_type": input_type if input_type is not None else self._input_type, "model": model if model is not None else self._model, } if model_extras is not None and bool(model_extras): body.update(model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access elif self._model_extras is not None and bool(self._model_extras): body.update(self._model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): _content = body else: _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore _request = build_embeddings_embed_request( extra_params=_extra_parameters, content_type=content_type, api_version=self._config.api_version, content=_content, headers=_headers, params=_params, ) path_format_arguments = { "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), } _request.url = self._client.format_url(_request.url, **path_format_arguments) _stream = kwargs.pop("stream", False) pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access _request, stream=_stream, **kwargs ) response = pipeline_response.http_response if response.status_code not in [200]: if _stream: await response.read() # Load the body in memory and close the socket map_error(status_code=response.status_code, response=response, error_map=error_map) raise HttpResponseError(response=response) if _stream: deserialized = response.iter_bytes() else: deserialized = _deserialize( _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access ) return deserialized # type: ignore @distributed_trace_async async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: # pylint: disable=line-too-long """Returns information about the AI model. The method makes a REST API call to the ``/info`` route on the given endpoint. This method will only work when using Serverless API or Managed Compute endpoint. It will not work for GitHub Models endpoint or Azure OpenAI endpoint. :return: ModelInfo. The ModelInfo is compatible with MutableMapping :rtype: ~azure.ai.inference.models.ModelInfo :raises ~azure.core.exceptions.HttpResponseError: """ if not self._model_info: try: self._model_info = await self._get_model_info( **kwargs ) # pylint: disable=attribute-defined-outside-init except ResourceNotFoundError as error: error.message = "Model information is not available on this endpoint (`/info` route not supported)." raise error return self._model_info def __str__(self) -> str: # pylint: disable=client-method-name-no-double-underscore return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated): """ImageEmbeddingsClient. :param endpoint: Service endpoint URL for AI model inference. Required. :type endpoint: str :param credential: Credential used to authenticate requests to the service. Is either a AzureKeyCredential type or a AsyncTokenCredential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials_async.AsyncTokenCredential :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. The type of the input. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :keyword api_version: The API version to use for this operation. Default value is "2024-05-01-preview". Note that overriding this default value may result in unsupported behavior. :paramtype api_version: str """ def __init__( self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], *, dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> None: self._model_info: Optional[_models.ModelInfo] = None # Store default embeddings settings, to be applied in all future service calls # unless overridden by arguments in the `embed` method. self._dimensions = dimensions self._encoding_format = encoding_format self._input_type = input_type self._model = model self._model_extras = model_extras # For Key auth, we need to send these two auth HTTP request headers simultaneously: # 1. "Authorization: Bearer <key>" # 2. "api-key: <key>" # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, # and Azure OpenAI and the new Unified Inference endpoints support the second header. # The first header will be taken care of by auto-generated code. # The second one is added here. if isinstance(credential, AzureKeyCredential): headers = kwargs.pop("headers", {}) if "api-key" not in headers: headers["api-key"] = credential.key kwargs["headers"] = headers super().__init__(endpoint, credential, **kwargs) @overload async def embed( self, *, input: List[_models.ImageEmbeddingInput], dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given images. The method makes a REST API call to the `/images/embeddings` route on the given endpoint. :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. Required. :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def embed( self, body: JSON, *, content_type: str = "application/json", **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given images. The method makes a REST API call to the `/images/embeddings` route on the given endpoint. :param body: An object of type MutableMapping[str, Any], such as a dictionary, that specifies the full request payload. Required. :type body: JSON :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. Default value is "application/json". :paramtype content_type: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @overload async def embed( self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any, ) -> _models.EmbeddingsResult: """Return the embedding vectors for given images. The method makes a REST API call to the `/images/embeddings` route on the given endpoint. :param body: Specifies the full request payload. Required. :type body: IO[bytes] :keyword content_type: Body Parameter content-type. Content type parameter for binary body. Default value is "application/json". :paramtype content_type: str :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ @distributed_trace_async async def embed( self, body: Union[JSON, IO[bytes]] = _Unset, *, input: List[_models.ImageEmbeddingInput] = _Unset, dimensions: Optional[int] = None, encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, model: Optional[str] = None, model_extras: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> _models.EmbeddingsResult: # pylint: disable=line-too-long """Return the embedding vectors for given images. The method makes a REST API call to the `/images/embeddings` route on the given endpoint. :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type that specifies the full request payload. Required. :type body: JSON or IO[bytes] :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an array. The input must not exceed the max input tokens for the model. Required. :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should have. Default value is None. :paramtype dimensions: int :keyword encoding_format: Optional. The desired format for the returned embeddings. Known values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat :keyword input_type: Optional. The type of the input. Known values are: "text", "query", and "document". Default value is None. :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType :keyword model: ID of the specific AI model to use, if more than one model is available on the endpoint. Default value is None. :paramtype model: str :keyword model_extras: Additional, model-specific parameters that are not in the standard request payload. They will be added as-is to the root of the JSON in the request body. How the service handles these extra parameters depends on the value of the ``extra-parameters`` request header. Default value is None. :paramtype model_extras: dict[str, Any] :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping :rtype: ~azure.ai.inference.models.EmbeddingsResult :raises ~azure.core.exceptions.HttpResponseError: """ error_map: MutableMapping[int, Type[HttpResponseError]] = { 401: ClientAuthenticationError, 404: ResourceNotFoundError, 409: ResourceExistsError, 304: ResourceNotModifiedError, } error_map.update(kwargs.pop("error_map", {}) or {}) _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) _params = kwargs.pop("params", {}) or {} _extra_parameters: Union[_models._enums.ExtraParameters, None] = None content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) if body is _Unset: if input is _Unset: raise TypeError("missing required argument: input") body = { "input": input, "dimensions": dimensions if dimensions is not None else self._dimensions, "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, "input_type": input_type if input_type is not None else self._input_type, "model": model if model is not None else self._model, } if model_extras is not None and bool(model_extras): body.update(model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access elif self._model_extras is not None and bool(self._model_extras): body.update(self._model_extras) _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access body = {k: v for k, v in body.items() if v is not None} content_type = content_type or "application/json" _content = None if isinstance(body, (IOBase, bytes)): _content = body else: _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore _request = build_image_embeddings_embed_request( extra_params=_extra_parameters, content_type=content_type, api_version=self._config.api_version, content=_content, headers=_headers, params=_params, ) path_format_arguments = { "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), } _request.url = self._client.format_url(_request.url, **path_format_arguments) _stream = kwargs.pop("stream", False) pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access _request, stream=_stream, **kwargs ) response = pipeline_response.http_response if response.status_code not in [200]: if _stream: await response.read() # Load the body in memory and close the socket map_error(status_code=response.status_code, response=response, error_map=error_map) raise HttpResponseError(response=response) if _stream: deserialized = response.iter_bytes() else: deserialized = _deserialize( _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access ) return deserialized # type: ignore @distributed_trace_async async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: # pylint: disable=line-too-long """Returns information about the AI model. The method makes a REST API call to the ``/info`` route on the given endpoint. This method will only work when using Serverless API or Managed Compute endpoint. It will not work for GitHub Models endpoint or Azure OpenAI endpoint. :return: ModelInfo. The ModelInfo is compatible with MutableMapping :rtype: ~azure.ai.inference.models.ModelInfo :raises ~azure.core.exceptions.HttpResponseError: """ if not self._model_info: try: self._model_info = await self._get_model_info( **kwargs ) # pylint: disable=attribute-defined-outside-init except ResourceNotFoundError as error: error.message = "Model information is not available on this endpoint (`/info` route not supported)." raise error return self._model_info def __str__(self) -> str: # pylint: disable=client-method-name-no-double-underscore return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() __all__: List[str] = [ "load_client", "ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient", ] # Add all objects you want publicly available to users at this package level def patch_sdk(): """Do not remove from this file. `patch_sdk` is a last resort escape hatch that allows you to do customizations you can't accomplish using the techniques described in https://aka.ms/azsdk/python/dpcodegen/python/customize """