aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py1387
1 files changed, 1387 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
new file mode 100644
index 00000000..da95cf93
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
@@ -0,0 +1,1387 @@
+# pylint: disable=too-many-lines
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+
+Why do we patch auto-generated code? Below is a summary of the changes made in all _patch files (not just this one):
+1. Add support for input argument `model_extras` (all clients)
+2. Add support for function load_client
+3. Add support for setting sticky chat completions/embeddings input arguments in the client constructor
+4. Add support for get_model_info, while caching the result (all clients)
+5. Add support for chat completion streaming (ChatCompletionsClient client only)
+6. Add support for friendly print of result objects (__str__ method) (all clients)
+7. Add support for load() method in ImageUrl class (see /models/_patch.py)
+8. Add support for sending two auth headers for api-key auth (all clients)
+9. Simplify how chat completions "response_format" is set. Define "response_format" as a flat Union of strings and
+ JsonSchemaFormat object, instead of using auto-generated base/derived classes named
+ ChatCompletionsResponseFormatXxxInternal.
+10. Allow UserMessage("my message") in addition to UserMessage(content="my message"). Same applies to
+AssistantMessage, SystemMessage, DeveloperMessage and ToolMessage.
+
+"""
+import json
+import logging
+import sys
+
+from io import IOBase
+from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, Iterable
+
+from azure.core.pipeline import PipelineResponse
+from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.decorator import distributed_trace
+from azure.core.utils import case_insensitive_dict
+from azure.core.exceptions import (
+ ClientAuthenticationError,
+ HttpResponseError,
+ map_error,
+ ResourceExistsError,
+ ResourceNotFoundError,
+ ResourceNotModifiedError,
+)
+from . import models as _models
+from ._model_base import SdkJSONEncoder, _deserialize
+from ._serialization import Serializer
+from ._operations._operations import (
+ build_chat_completions_complete_request,
+ build_embeddings_embed_request,
+ build_image_embeddings_embed_request,
+)
+from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated
+from ._client import EmbeddingsClient as EmbeddingsClientGenerated
+from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated
+
+if sys.version_info >= (3, 9):
+ from collections.abc import MutableMapping
+else:
+ from typing import MutableMapping # type: ignore # pylint: disable=ungrouped-imports
+
+if TYPE_CHECKING:
+ # pylint: disable=unused-import,ungrouped-imports
+ from azure.core.credentials import TokenCredential
+
+JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+
+_SERIALIZER = Serializer()
+_SERIALIZER.client_side_validation = False
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def _get_internal_response_format(
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]]
+) -> Optional[_models._models.ChatCompletionsResponseFormat]:
+ """
+ Internal helper method to convert between the public response format type that's supported in the `complete` method,
+ and the internal response format type that's used in the generated code.
+
+ :param response_format: Response format. Required.
+ :type response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]]
+ :return: Internal response format.
+ :rtype: ~azure.ai.inference._models._models.ChatCompletionsResponseFormat
+ """
+ if response_format is not None:
+
+ # To make mypy tool happy, start by declaring the type as the base class
+ internal_response_format: _models._models.ChatCompletionsResponseFormat
+
+ if isinstance(response_format, str) and response_format == "text":
+ internal_response_format = (
+ _models._models.ChatCompletionsResponseFormatText() # pylint: disable=protected-access
+ )
+ elif isinstance(response_format, str) and response_format == "json_object":
+ internal_response_format = (
+ _models._models.ChatCompletionsResponseFormatJsonObject() # pylint: disable=protected-access
+ )
+ elif isinstance(response_format, _models.JsonSchemaFormat):
+ internal_response_format = (
+ _models._models.ChatCompletionsResponseFormatJsonSchema( # pylint: disable=protected-access
+ json_schema=response_format
+ )
+ )
+ else:
+ raise ValueError(f"Unsupported `response_format` {response_format}")
+
+ return internal_response_format
+
+ return None
+
+
+def load_client(
+ endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any
+) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]:
+ """
+ Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route
+ on the given endpoint, to determine the model type and therefore which client to instantiate.
+ Keyword arguments are passed to the appropriate client's constructor, so if you need to set things like
+ `api_version`, `logging_enable`, `user_agent`, etc., you can do so here.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+ Keyword arguments are passed through to the client constructor (you can set keywords such as
+ `api_version`, `user_agent`, `logging_enable` etc. on the client constructor).
+
+ :param endpoint: Service endpoint URL for AI model inference. Required.
+ :type endpoint: str
+ :param credential: Credential used to authenticate requests to the service. Is either a
+ AzureKeyCredential type or a TokenCredential type. Required.
+ :type credential: ~azure.core.credentials.AzureKeyCredential or
+ ~azure.core.credentials.TokenCredential
+ :return: The appropriate synchronous client associated with the given endpoint
+ :rtype: ~azure.ai.inference.ChatCompletionsClient or ~azure.ai.inference.EmbeddingsClient
+ or ~azure.ai.inference.ImageEmbeddingsClient
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ with ChatCompletionsClient(
+ endpoint, credential, **kwargs
+ ) as client: # Pick any of the clients, it does not matter.
+ try:
+ model_info = client.get_model_info() # type: ignore
+ except ResourceNotFoundError as error:
+ error.message = (
+ "`load_client` function does not work on this endpoint (`/info` route not supported). "
+ "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly."
+ )
+ raise error
+
+ _LOGGER.info("model_info=%s", model_info)
+ if not model_info.model_type:
+ raise ValueError(
+ "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
+ )
+
+ # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+ if model_info.model_type in (
+ _models.ModelType.CHAT_COMPLETION,
+ "chat_completions",
+ "chat",
+ "completion",
+ "chat-completion",
+ "chat-completions",
+ "chat completion",
+ "chat completions",
+ ):
+ chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
+ chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init
+ model_info
+ )
+ return chat_completion_client
+
+ if model_info.model_type in (
+ _models.ModelType.EMBEDDINGS,
+ "embedding",
+ "text_embedding",
+ "text-embeddings",
+ "text embedding",
+ "text embeddings",
+ ):
+ embedding_client = EmbeddingsClient(endpoint, credential, **kwargs)
+ embedding_client._model_info = model_info # pylint: disable=protected-access,attribute-defined-outside-init
+ return embedding_client
+
+ if model_info.model_type in (
+ _models.ModelType.IMAGE_EMBEDDINGS,
+ "image_embedding",
+ "image-embeddings",
+ "image-embedding",
+ "image embedding",
+ "image embeddings",
+ ):
+ image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs)
+ image_embedding_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init
+ model_info
+ )
+ return image_embedding_client
+
+ raise ValueError(f"No client available to support AI model type `{model_info.model_type}`")
+
+
+class ChatCompletionsClient(ChatCompletionsClientGenerated): # pylint: disable=too-many-instance-attributes
+ """ChatCompletionsClient.
+
+ :param endpoint: Service endpoint URL for AI model inference. Required.
+ :type endpoint: str
+ :param credential: Credential used to authenticate requests to the service. Is either a
+ AzureKeyCredential type or a TokenCredential type. Required.
+ :type credential: ~azure.core.credentials.AzureKeyCredential or
+ ~azure.core.credentials.TokenCredential
+ :keyword frequency_penalty: A value that influences the probability of generated tokens
+ appearing based on their cumulative frequency in generated text.
+ Positive values will make tokens less likely to appear as their frequency increases and
+ decrease the likelihood of the model repeating the same statements verbatim.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype frequency_penalty: float
+ :keyword presence_penalty: A value that influences the probability of generated tokens
+ appearing based on their existing
+ presence in generated text.
+ Positive values will make tokens less likely to appear when they already exist and increase
+ the model's likelihood to output new topics.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype presence_penalty: float
+ :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+ generated completions.
+ Higher values will make output more random while lower values will make results more focused
+ and deterministic.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype temperature: float
+ :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+ causes the
+ model to consider the results of tokens with the provided probability mass. As an example, a
+ value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+ considered.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype top_p: float
+ :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+ :paramtype max_tokens: int
+ :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+ unformatted text by default. This is equivalent to setting "text" as the response_format.
+ To output JSON format, without adhering to any schema, set to "json_object".
+ To output JSON format adhering to a provided schema, set this to an object of the class
+ ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+ :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+ :keyword stop: A collection of textual sequences that will end completions generation. Default
+ value is None.
+ :paramtype stop: list[str]
+ :keyword tools: The available tool definitions that the chat completions request can use,
+ including caller-defined functions. Default value is None.
+ :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+ :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+ use for the chat completions response. Is either a Union[str,
+ "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+ Default value is None.
+ :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+ ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+ :keyword seed: If specified, the system will make a best effort to sample deterministically
+ such that repeated requests with the
+ same seed and parameters should return the same result. Determinism is not guaranteed.
+ Default value is None.
+ :paramtype seed: int
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :keyword api_version: The API version to use for this operation. Default value is
+ "2024-05-01-preview". Note that overriding this default value may result in unsupported
+ behavior.
+ :paramtype api_version: str
+ """
+
+ def __init__(
+ self,
+ endpoint: str,
+ credential: Union[AzureKeyCredential, "TokenCredential"],
+ *,
+ frequency_penalty: Optional[float] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> None:
+
+ self._model_info: Optional[_models.ModelInfo] = None
+
+ # Store default chat completions settings, to be applied in all future service calls
+ # unless overridden by arguments in the `complete` method.
+ self._frequency_penalty = frequency_penalty
+ self._presence_penalty = presence_penalty
+ self._temperature = temperature
+ self._top_p = top_p
+ self._max_tokens = max_tokens
+ self._internal_response_format = _get_internal_response_format(response_format)
+ self._stop = stop
+ self._tools = tools
+ self._tool_choice = tool_choice
+ self._seed = seed
+ self._model = model
+ self._model_extras = model_extras
+
+ # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+ # 1. "Authorization: Bearer <key>"
+ # 2. "api-key: <key>"
+ # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+ # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+ # The first header will be taken care of by auto-generated code.
+ # The second one is added here.
+ if isinstance(credential, AzureKeyCredential):
+ headers = kwargs.pop("headers", {})
+ if "api-key" not in headers:
+ headers["api-key"] = credential.key
+ kwargs["headers"] = headers
+
+ super().__init__(endpoint, credential, **kwargs)
+
+ @overload
+ def complete(
+ self,
+ *,
+ messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+ stream: Literal[False] = False,
+ frequency_penalty: Optional[float] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> _models.ChatCompletions: ...
+
+ @overload
+ def complete(
+ self,
+ *,
+ messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+ stream: Literal[True],
+ frequency_penalty: Optional[float] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> Iterable[_models.StreamingChatCompletionsUpdate]: ...
+
+ @overload
+ def complete(
+ self,
+ *,
+ messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+ stream: Optional[bool] = None,
+ frequency_penalty: Optional[float] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+ # pylint: disable=line-too-long
+ """Gets chat completions for the provided chat messages.
+ Completions support a wide variety of tasks and generate text that continues from or
+ "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route
+ on the given endpoint.
+ When using this method with `stream=True`, the response is streamed
+ back to the client. Iterate over the resulting StreamingChatCompletions
+ object to get content updates as they arrive. By default, the response is a ChatCompletions object
+ (non-streaming).
+
+ :keyword messages: The collection of context messages associated with this chat completions
+ request.
+ Typical usage begins with a chat message for the System role that provides instructions for
+ the behavior of the assistant, followed by alternating messages between the User and
+ Assistant roles. Required.
+ :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]]
+ :keyword stream: A value indicating whether chat completions should be streamed for this request.
+ Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+ Otherwise the response will be a ChatCompletions.
+ :paramtype stream: bool
+ :keyword frequency_penalty: A value that influences the probability of generated tokens
+ appearing based on their cumulative frequency in generated text.
+ Positive values will make tokens less likely to appear as their frequency increases and
+ decrease the likelihood of the model repeating the same statements verbatim.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype frequency_penalty: float
+ :keyword presence_penalty: A value that influences the probability of generated tokens
+ appearing based on their existing
+ presence in generated text.
+ Positive values will make tokens less likely to appear when they already exist and increase
+ the model's likelihood to output new topics.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype presence_penalty: float
+ :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+ generated completions.
+ Higher values will make output more random while lower values will make results more focused
+ and deterministic.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype temperature: float
+ :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+ causes the
+ model to consider the results of tokens with the provided probability mass. As an example, a
+ value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+ considered.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype top_p: float
+ :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+ :paramtype max_tokens: int
+ :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+ unformatted text by default. This is equivalent to setting "text" as the response_format.
+ To output JSON format, without adhering to any schema, set to "json_object".
+ To output JSON format adhering to a provided schema, set this to an object of the class
+ ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+ :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+ :keyword stop: A collection of textual sequences that will end completions generation. Default
+ value is None.
+ :paramtype stop: list[str]
+ :keyword tools: The available tool definitions that the chat completions request can use,
+ including caller-defined functions. Default value is None.
+ :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+ :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+ use for the chat completions response. Is either a Union[str,
+ "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+ Default value is None.
+ :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+ ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+ :keyword seed: If specified, the system will make a best effort to sample deterministically
+ such that repeated requests with the
+ same seed and parameters should return the same result. Determinism is not guaranteed.
+ Default value is None.
+ :paramtype seed: int
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+ :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def complete(
+ self,
+ body: JSON,
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+ # pylint: disable=line-too-long
+ """Gets chat completions for the provided chat messages.
+ Completions support a wide variety of tasks and generate text that continues from or
+ "completes" provided prompt data.
+
+ :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+ specifies the full request payload. Required.
+ :type body: JSON
+ :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+ :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def complete(
+ self,
+ body: IO[bytes],
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+ # pylint: disable=line-too-long
+ # pylint: disable=too-many-locals
+ """Gets chat completions for the provided chat messages.
+ Completions support a wide variety of tasks and generate text that continues from or
+ "completes" provided prompt data.
+
+ :param body: Specifies the full request payload. Required.
+ :type body: IO[bytes]
+ :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+ :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ # pylint:disable=client-method-missing-tracing-decorator
+ def complete(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]] = _Unset,
+ stream: Optional[bool] = None,
+ frequency_penalty: Optional[float] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+ # pylint: disable=line-too-long
+ # pylint: disable=too-many-locals
+ """Gets chat completions for the provided chat messages.
+ Completions support a wide variety of tasks and generate text that continues from or
+ "completes" provided prompt data. When using this method with `stream=True`, the response is streamed
+ back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions`
+ object to get content updates as they arrive.
+
+ :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+ that specifies the full request payload. Required.
+ :type body: JSON or IO[bytes]
+ :keyword messages: The collection of context messages associated with this chat completions
+ request.
+ Typical usage begins with a chat message for the System role that provides instructions for
+ the behavior of the assistant, followed by alternating messages between the User and
+ Assistant roles. Required.
+ :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]]
+ :keyword stream: A value indicating whether chat completions should be streamed for this request.
+ Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+ Otherwise the response will be a ChatCompletions.
+ :paramtype stream: bool
+ :keyword frequency_penalty: A value that influences the probability of generated tokens
+ appearing based on their cumulative frequency in generated text.
+ Positive values will make tokens less likely to appear as their frequency increases and
+ decrease the likelihood of the model repeating the same statements verbatim.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype frequency_penalty: float
+ :keyword presence_penalty: A value that influences the probability of generated tokens
+ appearing based on their existing
+ presence in generated text.
+ Positive values will make tokens less likely to appear when they already exist and increase
+ the model's likelihood to output new topics.
+ Supported range is [-2, 2].
+ Default value is None.
+ :paramtype presence_penalty: float
+ :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+ generated completions.
+ Higher values will make output more random while lower values will make results more focused
+ and deterministic.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype temperature: float
+ :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+ causes the
+ model to consider the results of tokens with the provided probability mass. As an example, a
+ value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+ considered.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1].
+ Default value is None.
+ :paramtype top_p: float
+ :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+ :paramtype max_tokens: int
+ :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+ unformatted text by default. This is equivalent to setting "text" as the response_format.
+ To output JSON format, without adhering to any schema, set to "json_object".
+ To output JSON format adhering to a provided schema, set this to an object of the class
+ ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+ :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+ :keyword stop: A collection of textual sequences that will end completions generation. Default
+ value is None.
+ :paramtype stop: list[str]
+ :keyword tools: The available tool definitions that the chat completions request can use,
+ including caller-defined functions. Default value is None.
+ :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+ :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+ use for the chat completions response. Is either a Union[str,
+ "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+ Default value is None.
+ :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+ ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+ :keyword seed: If specified, the system will make a best effort to sample deterministically
+ such that repeated requests with the
+ same seed and parameters should return the same result. Determinism is not guaranteed.
+ Default value is None.
+ :paramtype seed: int
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+ :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+ _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+ internal_response_format = _get_internal_response_format(response_format)
+
+ if body is _Unset:
+ if messages is _Unset:
+ raise TypeError("missing required argument: messages")
+ body = {
+ "messages": messages,
+ "stream": stream,
+ "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty,
+ "max_tokens": max_tokens if max_tokens is not None else self._max_tokens,
+ "model": model if model is not None else self._model,
+ "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty,
+ "response_format": (
+ internal_response_format if internal_response_format is not None else self._internal_response_format
+ ),
+ "seed": seed if seed is not None else self._seed,
+ "stop": stop if stop is not None else self._stop,
+ "temperature": temperature if temperature is not None else self._temperature,
+ "tool_choice": tool_choice if tool_choice is not None else self._tool_choice,
+ "tools": tools if tools is not None else self._tools,
+ "top_p": top_p if top_p is not None else self._top_p,
+ }
+ if model_extras is not None and bool(model_extras):
+ body.update(model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ elif self._model_extras is not None and bool(self._model_extras):
+ body.update(self._model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ body = {k: v for k, v in body.items() if v is not None}
+ elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool):
+ stream = body["stream"]
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_chat_completions_complete_request(
+ extra_params=_extra_parameters,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = stream or False
+ pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ response.read() # Load the body in memory and close the socket
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ return _models.StreamingChatCompletions(response)
+
+ return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access
+
+ @distributed_trace
+ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ # pylint: disable=line-too-long
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ if not self._model_info:
+ try:
+ self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+ except ResourceNotFoundError as error:
+ error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+ raise error
+
+ return self._model_info
+
+ def __str__(self) -> str:
+ # pylint: disable=client-method-name-no-double-underscore
+ return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class EmbeddingsClient(EmbeddingsClientGenerated):
+ """EmbeddingsClient.
+
+ :param endpoint: Service endpoint URL for AI model inference. Required.
+ :type endpoint: str
+ :param credential: Credential used to authenticate requests to the service. Is either a
+ AzureKeyCredential type or a TokenCredential type. Required.
+ :type credential: ~azure.core.credentials.AzureKeyCredential or
+ ~azure.core.credentials.TokenCredential
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :keyword api_version: The API version to use for this operation. Default value is
+ "2024-05-01-preview". Note that overriding this default value may result in unsupported
+ behavior.
+ :paramtype api_version: str
+ """
+
+ def __init__(
+ self,
+ endpoint: str,
+ credential: Union[AzureKeyCredential, "TokenCredential"],
+ *,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> None:
+
+ self._model_info: Optional[_models.ModelInfo] = None
+
+ # Store default embeddings settings, to be applied in all future service calls
+ # unless overridden by arguments in the `embed` method.
+ self._dimensions = dimensions
+ self._encoding_format = encoding_format
+ self._input_type = input_type
+ self._model = model
+ self._model_extras = model_extras
+
+ # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+ # 1. "Authorization: Bearer <key>"
+ # 2. "api-key: <key>"
+ # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+ # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+ # The first header will be taken care of by auto-generated code.
+ # The second one is added here.
+ if isinstance(credential, AzureKeyCredential):
+ headers = kwargs.pop("headers", {})
+ if "api-key" not in headers:
+ headers["api-key"] = credential.key
+ kwargs["headers"] = headers
+
+ super().__init__(endpoint, credential, **kwargs)
+
+ @overload
+ def embed(
+ self,
+ *,
+ input: List[str],
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given text prompts.
+ The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+ :keyword input: Input text to embed, encoded as a string or array of tokens.
+ To embed multiple inputs in a single request, pass an array
+ of strings or array of token arrays. Required.
+ :paramtype input: list[str]
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def embed(
+ self,
+ body: JSON,
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given text prompts.
+ The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+ :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+ specifies the full request payload. Required.
+ :type body: JSON
+ :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def embed(
+ self,
+ body: IO[bytes],
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given text prompts.
+ The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+ :param body: Specifies the full request payload. Required.
+ :type body: IO[bytes]
+ :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @distributed_trace
+ def embed(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ input: List[str] = _Unset,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ # pylint: disable=line-too-long
+ """Return the embedding vectors for given text prompts.
+ The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+ :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+ that specifies the full request payload. Required.
+ :type body: JSON or IO[bytes]
+ :keyword input: Input text to embed, encoded as a string or array of tokens.
+ To embed multiple inputs in a single request, pass an array
+ of strings or array of token arrays. Required.
+ :paramtype input: list[str]
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping[int, Type[HttpResponseError]] = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+ _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+ if body is _Unset:
+ if input is _Unset:
+ raise TypeError("missing required argument: input")
+ body = {
+ "input": input,
+ "dimensions": dimensions if dimensions is not None else self._dimensions,
+ "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+ "input_type": input_type if input_type is not None else self._input_type,
+ "model": model if model is not None else self._model,
+ }
+ if model_extras is not None and bool(model_extras):
+ body.update(model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ elif self._model_extras is not None and bool(self._model_extras):
+ body.update(self._model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ body = {k: v for k, v in body.items() if v is not None}
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_embeddings_embed_request(
+ extra_params=_extra_parameters,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ response.read() # Load the body in memory and close the socket
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(
+ _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access
+ )
+
+ return deserialized # type: ignore
+
+ @distributed_trace
+ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ # pylint: disable=line-too-long
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ if not self._model_info:
+ try:
+ self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+ except ResourceNotFoundError as error:
+ error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+ raise error
+
+ return self._model_info
+
+ def __str__(self) -> str:
+ # pylint: disable=client-method-name-no-double-underscore
+ return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated):
+ """ImageEmbeddingsClient.
+
+ :param endpoint: Service endpoint URL for AI model inference. Required.
+ :type endpoint: str
+ :param credential: Credential used to authenticate requests to the service. Is either a
+ AzureKeyCredential type or a TokenCredential type. Required.
+ :type credential: ~azure.core.credentials.AzureKeyCredential or
+ ~azure.core.credentials.TokenCredential
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :keyword api_version: The API version to use for this operation. Default value is
+ "2024-05-01-preview". Note that overriding this default value may result in unsupported
+ behavior.
+ :paramtype api_version: str
+ """
+
+ def __init__(
+ self,
+ endpoint: str,
+ credential: Union[AzureKeyCredential, "TokenCredential"],
+ *,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> None:
+
+ self._model_info: Optional[_models.ModelInfo] = None
+
+ # Store default embeddings settings, to be applied in all future service calls
+ # unless overridden by arguments in the `embed` method.
+ self._dimensions = dimensions
+ self._encoding_format = encoding_format
+ self._input_type = input_type
+ self._model = model
+ self._model_extras = model_extras
+
+ # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+ # 1. "Authorization: Bearer <key>"
+ # 2. "api-key: <key>"
+ # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+ # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+ # The first header will be taken care of by auto-generated code.
+ # The second one is added here.
+ if isinstance(credential, AzureKeyCredential):
+ headers = kwargs.pop("headers", {})
+ if "api-key" not in headers:
+ headers["api-key"] = credential.key
+ kwargs["headers"] = headers
+
+ super().__init__(endpoint, credential, **kwargs)
+
+ @overload
+ def embed(
+ self,
+ *,
+ input: List[_models.ImageEmbeddingInput],
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given images.
+ The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+ :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+ array.
+ The input must not exceed the max input tokens for the model. Required.
+ :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def embed(
+ self,
+ body: JSON,
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given images.
+ The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+ :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+ specifies the full request payload. Required.
+ :type body: JSON
+ :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @overload
+ def embed(
+ self,
+ body: IO[bytes],
+ *,
+ content_type: str = "application/json",
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given images.
+ The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+ :param body: Specifies the full request payload. Required.
+ :type body: IO[bytes]
+ :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+ Default value is "application/json".
+ :paramtype content_type: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+
+ @distributed_trace
+ def embed(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ input: List[_models.ImageEmbeddingInput] = _Unset,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ model_extras: Optional[Dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> _models.EmbeddingsResult:
+ # pylint: disable=line-too-long
+ """Return the embedding vectors for given images.
+ The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+ :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+ that specifies the full request payload. Required.
+ :type body: JSON or IO[bytes]
+ :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+ array.
+ The input must not exceed the max input tokens for the model. Required.
+ :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have. Default value is None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings.
+ Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :keyword model_extras: Additional, model-specific parameters that are not in the
+ standard request payload. They will be added as-is to the root of the JSON in the request body.
+ How the service handles these extra parameters depends on the value of the
+ ``extra-parameters`` request header. Default value is None.
+ :paramtype model_extras: dict[str, Any]
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping[int, Type[HttpResponseError]] = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+ _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+ if body is _Unset:
+ if input is _Unset:
+ raise TypeError("missing required argument: input")
+ body = {
+ "input": input,
+ "dimensions": dimensions if dimensions is not None else self._dimensions,
+ "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+ "input_type": input_type if input_type is not None else self._input_type,
+ "model": model if model is not None else self._model,
+ }
+ if model_extras is not None and bool(model_extras):
+ body.update(model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ elif self._model_extras is not None and bool(self._model_extras):
+ body.update(self._model_extras)
+ _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access
+ body = {k: v for k, v in body.items() if v is not None}
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_image_embeddings_embed_request(
+ extra_params=_extra_parameters,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ response.read() # Load the body in memory and close the socket
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(
+ _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access
+ )
+
+ return deserialized # type: ignore
+
+ @distributed_trace
+ def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ # pylint: disable=line-too-long
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ if not self._model_info:
+ try:
+ self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init
+ except ResourceNotFoundError as error:
+ error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+ raise error
+
+ return self._model_info
+
+ def __str__(self) -> str:
+ # pylint: disable=client-method-name-no-double-underscore
+ return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+__all__: List[str] = [
+ "load_client",
+ "ChatCompletionsClient",
+ "EmbeddingsClient",
+ "ImageEmbeddingsClient",
+] # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+ """Do not remove from this file.
+
+ `patch_sdk` is a last resort escape hatch that allows you to do customizations
+ you can't accomplish using the techniques described in
+ https://aka.ms/azsdk/python/dpcodegen/python/customize
+ """