aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py781
1 files changed, 781 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
new file mode 100644
index 00000000..62ec772f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
@@ -0,0 +1,781 @@
+# pylint: disable=too-many-locals
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from io import IOBase
+import json
+import sys
+from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+
+from azure.core.exceptions import (
+ ClientAuthenticationError,
+ HttpResponseError,
+ ResourceExistsError,
+ ResourceNotFoundError,
+ ResourceNotModifiedError,
+ StreamClosedError,
+ StreamConsumedError,
+ map_error,
+)
+from azure.core.pipeline import PipelineResponse
+from azure.core.rest import AsyncHttpResponse, HttpRequest
+from azure.core.tracing.decorator_async import distributed_trace_async
+from azure.core.utils import case_insensitive_dict
+
+from ... import models as _models
+from ..._model_base import SdkJSONEncoder, _deserialize
+from ..._operations._operations import (
+ build_chat_completions_complete_request,
+ build_chat_completions_get_model_info_request,
+ build_embeddings_embed_request,
+ build_embeddings_get_model_info_request,
+ build_image_embeddings_embed_request,
+ build_image_embeddings_get_model_info_request,
+)
+from .._vendor import ChatCompletionsClientMixinABC, EmbeddingsClientMixinABC, ImageEmbeddingsClientMixinABC
+
+if sys.version_info >= (3, 9):
+ from collections.abc import MutableMapping
+else:
+ from typing import MutableMapping # type: ignore
+JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+T = TypeVar("T")
+ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]]
+
+
+class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
+
+ @overload
+ async def _complete(
+ self,
+ *,
+ messages: List[_models._models.ChatRequestMessage],
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ frequency_penalty: Optional[float] = None,
+ stream_parameter: Optional[bool] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.ChatCompletions: ...
+ @overload
+ async def _complete(
+ self,
+ body: JSON,
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.ChatCompletions: ...
+ @overload
+ async def _complete(
+ self,
+ body: IO[bytes],
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.ChatCompletions: ...
+
+ @distributed_trace_async
+ async def _complete(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ messages: List[_models._models.ChatRequestMessage] = _Unset,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ frequency_penalty: Optional[float] = None,
+ stream_parameter: Optional[bool] = None,
+ presence_penalty: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+ stop: Optional[List[str]] = None,
+ tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+ tool_choice: Optional[
+ Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+ ] = None,
+ seed: Optional[int] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.ChatCompletions:
+ """Gets chat completions for the provided chat messages.
+ Completions support a wide variety of tasks and generate text that continues from or
+ "completes"
+ provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
+ on the given endpoint.
+
+ :param body: Is either a JSON type or a IO[bytes] type. Required.
+ :type body: JSON or IO[bytes]
+ :keyword messages: The collection of context messages associated with this chat completions
+ request.
+ Typical usage begins with a chat message for the System role that provides instructions for
+ the behavior of the assistant, followed by alternating messages between the User and
+ Assistant roles. Required.
+ :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+ :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+ are passed in the JSON request payload.
+ This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+ "pass-through". Default value is None.
+ :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+ :keyword frequency_penalty: A value that influences the probability of generated tokens
+ appearing based on their cumulative
+ frequency in generated text.
+ Positive values will make tokens less likely to appear as their frequency increases and
+ decrease the likelihood of the model repeating the same statements verbatim.
+ Supported range is [-2, 2]. Default value is None.
+ :paramtype frequency_penalty: float
+ :keyword stream_parameter: A value indicating whether chat completions should be streamed for
+ this request. Default value is None.
+ :paramtype stream_parameter: bool
+ :keyword presence_penalty: A value that influences the probability of generated tokens
+ appearing based on their existing
+ presence in generated text.
+ Positive values will make tokens less likely to appear when they already exist and increase
+ the
+ model's likelihood to output new topics.
+ Supported range is [-2, 2]. Default value is None.
+ :paramtype presence_penalty: float
+ :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+ generated completions.
+ Higher values will make output more random while lower values will make results more focused
+ and deterministic.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1]. Default value is None.
+ :paramtype temperature: float
+ :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+ causes the
+ model to consider the results of tokens with the provided probability mass. As an example, a
+ value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+ considered.
+ It is not recommended to modify temperature and top_p for the same completions request as the
+ interaction of these two settings is difficult to predict.
+ Supported range is [0, 1]. Default value is None.
+ :paramtype top_p: float
+ :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+ :paramtype max_tokens: int
+ :keyword response_format: An object specifying the format that the model must output.
+
+ Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
+ which ensures the model will match your supplied JSON schema.
+
+ Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
+ model generates is valid JSON.
+
+ **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+ yourself via a system or user message. Without this, the model may generate an unending stream
+ of whitespace until the generation reaches the token limit, resulting in a long-running and
+ seemingly "stuck" request. Also note that the message content may be partially cut off if
+ ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+ conversation exceeded the max context length. Default value is None.
+ :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
+ :keyword stop: A collection of textual sequences that will end completions generation. Default
+ value is None.
+ :paramtype stop: list[str]
+ :keyword tools: A list of tools the model may request to call. Currently, only functions are
+ supported as a tool. The model
+ may response with a function call request and provide the input arguments in JSON format for
+ that function. Default value is None.
+ :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+ :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+ use for the chat completions response. Is either a Union[str,
+ "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+ Default value is None.
+ :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+ ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+ :keyword seed: If specified, the system will make a best effort to sample deterministically
+ such that repeated requests with the
+ same seed and parameters should return the same result. Determinism is not guaranteed. Default
+ value is None.
+ :paramtype seed: int
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ChatCompletions
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+ cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
+
+ if body is _Unset:
+ if messages is _Unset:
+ raise TypeError("missing required argument: messages")
+ body = {
+ "frequency_penalty": frequency_penalty,
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "presence_penalty": presence_penalty,
+ "response_format": response_format,
+ "seed": seed,
+ "stop": stop,
+ "stream": stream_parameter,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_p": top_p,
+ }
+ body = {k: v for k, v in body.items() if v is not None}
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_chat_completions_complete_request(
+ extra_params=extra_params,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.ChatCompletions, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore
+
+ @distributed_trace_async
+ async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = kwargs.pop("headers", {}) or {}
+ _params = kwargs.pop("params", {}) or {}
+
+ cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+ _request = build_chat_completions_get_model_info_request(
+ api_version=self._config.api_version,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.ModelInfo, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore
+
+
+class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
+
+ @overload
+ async def _embed(
+ self,
+ *,
+ input: List[str],
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+ @overload
+ async def _embed(
+ self,
+ body: JSON,
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+ @overload
+ async def _embed(
+ self,
+ body: IO[bytes],
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+
+ @distributed_trace_async
+ async def _embed(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ input: List[str] = _Unset,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given text prompts.
+ The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
+
+ :param body: Is either a JSON type or a IO[bytes] type. Required.
+ :type body: JSON or IO[bytes]
+ :keyword input: Input text to embed, encoded as a string or array of tokens.
+ To embed multiple inputs in a single request, pass an array
+ of strings or array of token arrays. Required.
+ :paramtype input: list[str]
+ :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+ are passed in the JSON request payload.
+ This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+ "pass-through". Default value is None.
+ :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have.
+ Passing null causes the model to use its default value.
+ Returns a 422 error if the model doesn't support the value or parameter. Default value is
+ None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
+ values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input.
+ Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+ cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+ if body is _Unset:
+ if input is _Unset:
+ raise TypeError("missing required argument: input")
+ body = {
+ "dimensions": dimensions,
+ "encoding_format": encoding_format,
+ "input": input,
+ "input_type": input_type,
+ "model": model,
+ }
+ body = {k: v for k, v in body.items() if v is not None}
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_embeddings_embed_request(
+ extra_params=extra_params,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore
+
+ @distributed_trace_async
+ async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = kwargs.pop("headers", {}) or {}
+ _params = kwargs.pop("params", {}) or {}
+
+ cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+ _request = build_embeddings_get_model_info_request(
+ api_version=self._config.api_version,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.ModelInfo, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore
+
+
+class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
+
+ @overload
+ async def _embed(
+ self,
+ *,
+ input: List[_models.ImageEmbeddingInput],
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+ @overload
+ async def _embed(
+ self,
+ body: JSON,
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+ @overload
+ async def _embed(
+ self,
+ body: IO[bytes],
+ *,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ content_type: str = "application/json",
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult: ...
+
+ @distributed_trace_async
+ async def _embed(
+ self,
+ body: Union[JSON, IO[bytes]] = _Unset,
+ *,
+ input: List[_models.ImageEmbeddingInput] = _Unset,
+ extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+ dimensions: Optional[int] = None,
+ encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+ input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+ model: Optional[str] = None,
+ **kwargs: Any
+ ) -> _models.EmbeddingsResult:
+ """Return the embedding vectors for given images.
+ The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
+
+ :param body: Is either a JSON type or a IO[bytes] type. Required.
+ :type body: JSON or IO[bytes]
+ :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+ array.
+ The input must not exceed the max input tokens for the model. Required.
+ :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+ :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+ are passed in the JSON request payload.
+ This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+ "pass-through". Default value is None.
+ :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+ :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+ have.
+ Passing null causes the model to use its default value.
+ Returns a 422 error if the model doesn't support the value or parameter. Default value is
+ None.
+ :paramtype dimensions: int
+ :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
+ should have.
+ Passing null causes the model to use its default value.
+ Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+ "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+ :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+ :keyword input_type: Optional. The type of the input.
+ Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+ "text", "query", and "document". Default value is None.
+ :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+ :keyword model: ID of the specific AI model to use, if more than one model is available on the
+ endpoint. Default value is None.
+ :paramtype model: str
+ :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.EmbeddingsResult
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+ _params = kwargs.pop("params", {}) or {}
+
+ content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+ cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+ if body is _Unset:
+ if input is _Unset:
+ raise TypeError("missing required argument: input")
+ body = {
+ "dimensions": dimensions,
+ "encoding_format": encoding_format,
+ "input": input,
+ "input_type": input_type,
+ "model": model,
+ }
+ body = {k: v for k, v in body.items() if v is not None}
+ content_type = content_type or "application/json"
+ _content = None
+ if isinstance(body, (IOBase, bytes)):
+ _content = body
+ else:
+ _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore
+
+ _request = build_image_embeddings_embed_request(
+ extra_params=extra_params,
+ content_type=content_type,
+ api_version=self._config.api_version,
+ content=_content,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore
+
+ @distributed_trace_async
+ async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+ """Returns information about the AI model.
+ The method makes a REST API call to the ``/info`` route on the given endpoint.
+ This method will only work when using Serverless API or Managed Compute endpoint.
+ It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+ :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+ :rtype: ~azure.ai.inference.models.ModelInfo
+ :raises ~azure.core.exceptions.HttpResponseError:
+ """
+ error_map: MutableMapping = {
+ 401: ClientAuthenticationError,
+ 404: ResourceNotFoundError,
+ 409: ResourceExistsError,
+ 304: ResourceNotModifiedError,
+ }
+ error_map.update(kwargs.pop("error_map", {}) or {})
+
+ _headers = kwargs.pop("headers", {}) or {}
+ _params = kwargs.pop("params", {}) or {}
+
+ cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+ _request = build_image_embeddings_get_model_info_request(
+ api_version=self._config.api_version,
+ headers=_headers,
+ params=_params,
+ )
+ path_format_arguments = {
+ "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+ }
+ _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+ _stream = kwargs.pop("stream", False)
+ pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access
+ _request, stream=_stream, **kwargs
+ )
+
+ response = pipeline_response.http_response
+
+ if response.status_code not in [200]:
+ if _stream:
+ try:
+ await response.read() # Load the body in memory and close the socket
+ except (StreamConsumedError, StreamClosedError):
+ pass
+ map_error(status_code=response.status_code, response=response, error_map=error_map)
+ raise HttpResponseError(response=response)
+
+ if _stream:
+ deserialized = response.iter_bytes()
+ else:
+ deserialized = _deserialize(_models.ModelInfo, response.json())
+
+ if cls:
+ return cls(pipeline_response, deserialized, {}) # type: ignore
+
+ return deserialized # type: ignore