diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/inference/aio')
8 files changed, 2718 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py new file mode 100644 index 00000000..668f989a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py @@ -0,0 +1,33 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- +# pylint: disable=wrong-import-position + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ._patch import * # pylint: disable=unused-wildcard-import + +from ._client import ChatCompletionsClient # type: ignore +from ._client import EmbeddingsClient # type: ignore +from ._client import ImageEmbeddingsClient # type: ignore + +try: + from ._patch import __all__ as _patch_all + from ._patch import * +except ImportError: + _patch_all = [] +from ._patch import patch_sdk as _patch_sdk + +__all__ = [ + "ChatCompletionsClient", + "EmbeddingsClient", + "ImageEmbeddingsClient", +] +__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore + +_patch_sdk() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py new file mode 100644 index 00000000..88e6773b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py @@ -0,0 +1,280 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +from copy import deepcopy +from typing import Any, Awaitable, TYPE_CHECKING, Union +from typing_extensions import Self + +from azure.core import AsyncPipelineClient +from azure.core.credentials import AzureKeyCredential +from azure.core.pipeline import policies +from azure.core.rest import AsyncHttpResponse, HttpRequest + +from .._serialization import Deserializer, Serializer +from ._configuration import ( + ChatCompletionsClientConfiguration, + EmbeddingsClientConfiguration, + ImageEmbeddingsClientConfiguration, +) +from ._operations import ( + ChatCompletionsClientOperationsMixin, + EmbeddingsClientOperationsMixin, + ImageEmbeddingsClientOperationsMixin, +) + +if TYPE_CHECKING: + from azure.core.credentials_async import AsyncTokenCredential + + +class ChatCompletionsClient(ChatCompletionsClientOperationsMixin): + """ChatCompletionsClient. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + _endpoint = "{endpoint}" + self._config = ChatCompletionsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs) + _policies = kwargs.pop("policies", None) + if _policies is None: + _policies = [ + policies.RequestIdPolicy(**kwargs), + self._config.headers_policy, + self._config.user_agent_policy, + self._config.proxy_policy, + policies.ContentDecodePolicy(**kwargs), + self._config.redirect_policy, + self._config.retry_policy, + self._config.authentication_policy, + self._config.custom_hook_policy, + self._config.logging_policy, + policies.DistributedTracingPolicy(**kwargs), + policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None, + self._config.http_logging_policy, + ] + self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs) + + self._serialize = Serializer() + self._deserialize = Deserializer() + self._serialize.client_side_validation = False + + def send_request( + self, request: HttpRequest, *, stream: bool = False, **kwargs: Any + ) -> Awaitable[AsyncHttpResponse]: + """Runs the network request through the client's chained policies. + + >>> from azure.core.rest import HttpRequest + >>> request = HttpRequest("GET", "https://www.example.org/") + <HttpRequest [GET], url: 'https://www.example.org/'> + >>> response = await client.send_request(request) + <AsyncHttpResponse: 200 OK> + + For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request + + :param request: The network request you want to make. Required. + :type request: ~azure.core.rest.HttpRequest + :keyword bool stream: Whether the response payload will be streamed. Defaults to False. + :return: The response of your network call. Does not do error handling on your response. + :rtype: ~azure.core.rest.AsyncHttpResponse + """ + + request_copy = deepcopy(request) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + + request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments) + return self._client.send_request(request_copy, stream=stream, **kwargs) # type: ignore + + async def close(self) -> None: + await self._client.close() + + async def __aenter__(self) -> Self: + await self._client.__aenter__() + return self + + async def __aexit__(self, *exc_details: Any) -> None: + await self._client.__aexit__(*exc_details) + + +class EmbeddingsClient(EmbeddingsClientOperationsMixin): + """EmbeddingsClient. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + _endpoint = "{endpoint}" + self._config = EmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs) + _policies = kwargs.pop("policies", None) + if _policies is None: + _policies = [ + policies.RequestIdPolicy(**kwargs), + self._config.headers_policy, + self._config.user_agent_policy, + self._config.proxy_policy, + policies.ContentDecodePolicy(**kwargs), + self._config.redirect_policy, + self._config.retry_policy, + self._config.authentication_policy, + self._config.custom_hook_policy, + self._config.logging_policy, + policies.DistributedTracingPolicy(**kwargs), + policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None, + self._config.http_logging_policy, + ] + self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs) + + self._serialize = Serializer() + self._deserialize = Deserializer() + self._serialize.client_side_validation = False + + def send_request( + self, request: HttpRequest, *, stream: bool = False, **kwargs: Any + ) -> Awaitable[AsyncHttpResponse]: + """Runs the network request through the client's chained policies. + + >>> from azure.core.rest import HttpRequest + >>> request = HttpRequest("GET", "https://www.example.org/") + <HttpRequest [GET], url: 'https://www.example.org/'> + >>> response = await client.send_request(request) + <AsyncHttpResponse: 200 OK> + + For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request + + :param request: The network request you want to make. Required. + :type request: ~azure.core.rest.HttpRequest + :keyword bool stream: Whether the response payload will be streamed. Defaults to False. + :return: The response of your network call. Does not do error handling on your response. + :rtype: ~azure.core.rest.AsyncHttpResponse + """ + + request_copy = deepcopy(request) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + + request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments) + return self._client.send_request(request_copy, stream=stream, **kwargs) # type: ignore + + async def close(self) -> None: + await self._client.close() + + async def __aenter__(self) -> Self: + await self._client.__aenter__() + return self + + async def __aexit__(self, *exc_details: Any) -> None: + await self._client.__aexit__(*exc_details) + + +class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin): + """ImageEmbeddingsClient. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + _endpoint = "{endpoint}" + self._config = ImageEmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs) + _policies = kwargs.pop("policies", None) + if _policies is None: + _policies = [ + policies.RequestIdPolicy(**kwargs), + self._config.headers_policy, + self._config.user_agent_policy, + self._config.proxy_policy, + policies.ContentDecodePolicy(**kwargs), + self._config.redirect_policy, + self._config.retry_policy, + self._config.authentication_policy, + self._config.custom_hook_policy, + self._config.logging_policy, + policies.DistributedTracingPolicy(**kwargs), + policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None, + self._config.http_logging_policy, + ] + self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs) + + self._serialize = Serializer() + self._deserialize = Deserializer() + self._serialize.client_side_validation = False + + def send_request( + self, request: HttpRequest, *, stream: bool = False, **kwargs: Any + ) -> Awaitable[AsyncHttpResponse]: + """Runs the network request through the client's chained policies. + + >>> from azure.core.rest import HttpRequest + >>> request = HttpRequest("GET", "https://www.example.org/") + <HttpRequest [GET], url: 'https://www.example.org/'> + >>> response = await client.send_request(request) + <AsyncHttpResponse: 200 OK> + + For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request + + :param request: The network request you want to make. Required. + :type request: ~azure.core.rest.HttpRequest + :keyword bool stream: Whether the response payload will be streamed. Defaults to False. + :return: The response of your network call. Does not do error handling on your response. + :rtype: ~azure.core.rest.AsyncHttpResponse + """ + + request_copy = deepcopy(request) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + + request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments) + return self._client.send_request(request_copy, stream=stream, **kwargs) # type: ignore + + async def close(self) -> None: + await self._client.close() + + async def __aenter__(self) -> Self: + await self._client.__aenter__() + return self + + async def __aexit__(self, *exc_details: Any) -> None: + await self._client.__aexit__(*exc_details) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py new file mode 100644 index 00000000..f60e1125 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py @@ -0,0 +1,197 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +from typing import Any, TYPE_CHECKING, Union + +from azure.core.credentials import AzureKeyCredential +from azure.core.pipeline import policies + +from .._version import VERSION + +if TYPE_CHECKING: + from azure.core.credentials_async import AsyncTokenCredential + + +class ChatCompletionsClientConfiguration: # pylint: disable=too-many-instance-attributes + """Configuration for ChatCompletionsClient. + + Note that all parameters used to create this instance are saved as instance + attributes. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + api_version: str = kwargs.pop("api_version", "2024-05-01-preview") + + if endpoint is None: + raise ValueError("Parameter 'endpoint' must not be None.") + if credential is None: + raise ValueError("Parameter 'credential' must not be None.") + + self.endpoint = endpoint + self.credential = credential + self.api_version = api_version + self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"]) + kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION)) + self.polling_interval = kwargs.get("polling_interval", 30) + self._configure(**kwargs) + + def _infer_policy(self, **kwargs): + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) + if hasattr(self.credential, "get_token"): + return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) + raise TypeError(f"Unsupported credential: {self.credential}") + + def _configure(self, **kwargs: Any) -> None: + self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs) + self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs) + self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs) + self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs) + self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs) + self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs) + self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs) + self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs) + self.authentication_policy = kwargs.get("authentication_policy") + if self.credential and not self.authentication_policy: + self.authentication_policy = self._infer_policy(**kwargs) + + +class EmbeddingsClientConfiguration: # pylint: disable=too-many-instance-attributes + """Configuration for EmbeddingsClient. + + Note that all parameters used to create this instance are saved as instance + attributes. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + api_version: str = kwargs.pop("api_version", "2024-05-01-preview") + + if endpoint is None: + raise ValueError("Parameter 'endpoint' must not be None.") + if credential is None: + raise ValueError("Parameter 'credential' must not be None.") + + self.endpoint = endpoint + self.credential = credential + self.api_version = api_version + self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"]) + kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION)) + self.polling_interval = kwargs.get("polling_interval", 30) + self._configure(**kwargs) + + def _infer_policy(self, **kwargs): + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) + if hasattr(self.credential, "get_token"): + return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) + raise TypeError(f"Unsupported credential: {self.credential}") + + def _configure(self, **kwargs: Any) -> None: + self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs) + self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs) + self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs) + self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs) + self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs) + self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs) + self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs) + self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs) + self.authentication_policy = kwargs.get("authentication_policy") + if self.credential and not self.authentication_policy: + self.authentication_policy = self._infer_policy(**kwargs) + + +class ImageEmbeddingsClientConfiguration: # pylint: disable=too-many-instance-attributes + """Configuration for ImageEmbeddingsClient. + + Note that all parameters used to create this instance are saved as instance + attributes. + + :param endpoint: Service host. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any + ) -> None: + api_version: str = kwargs.pop("api_version", "2024-05-01-preview") + + if endpoint is None: + raise ValueError("Parameter 'endpoint' must not be None.") + if credential is None: + raise ValueError("Parameter 'credential' must not be None.") + + self.endpoint = endpoint + self.credential = credential + self.api_version = api_version + self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"]) + kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION)) + self.polling_interval = kwargs.get("polling_interval", 30) + self._configure(**kwargs) + + def _infer_policy(self, **kwargs): + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs) + if isinstance(self.credential, AzureKeyCredential): + return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs) + if hasattr(self.credential, "get_token"): + return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs) + raise TypeError(f"Unsupported credential: {self.credential}") + + def _configure(self, **kwargs: Any) -> None: + self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs) + self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs) + self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs) + self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs) + self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs) + self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs) + self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs) + self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs) + self.authentication_policy = kwargs.get("authentication_policy") + if self.credential and not self.authentication_policy: + self.authentication_policy = self._infer_policy(**kwargs) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py new file mode 100644 index 00000000..ab870887 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py @@ -0,0 +1,29 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- +# pylint: disable=wrong-import-position + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ._patch import * # pylint: disable=unused-wildcard-import + +from ._operations import ChatCompletionsClientOperationsMixin # type: ignore +from ._operations import EmbeddingsClientOperationsMixin # type: ignore +from ._operations import ImageEmbeddingsClientOperationsMixin # type: ignore + +from ._patch import __all__ as _patch_all +from ._patch import * +from ._patch import patch_sdk as _patch_sdk + +__all__ = [ + "ChatCompletionsClientOperationsMixin", + "EmbeddingsClientOperationsMixin", + "ImageEmbeddingsClientOperationsMixin", +] +__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore +_patch_sdk() diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py new file mode 100644 index 00000000..62ec772f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py @@ -0,0 +1,781 @@ +# pylint: disable=too-many-locals +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- +from io import IOBase +import json +import sys +from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload + +from azure.core.exceptions import ( + ClientAuthenticationError, + HttpResponseError, + ResourceExistsError, + ResourceNotFoundError, + ResourceNotModifiedError, + StreamClosedError, + StreamConsumedError, + map_error, +) +from azure.core.pipeline import PipelineResponse +from azure.core.rest import AsyncHttpResponse, HttpRequest +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.core.utils import case_insensitive_dict + +from ... import models as _models +from ..._model_base import SdkJSONEncoder, _deserialize +from ..._operations._operations import ( + build_chat_completions_complete_request, + build_chat_completions_get_model_info_request, + build_embeddings_embed_request, + build_embeddings_get_model_info_request, + build_image_embeddings_embed_request, + build_image_embeddings_get_model_info_request, +) +from .._vendor import ChatCompletionsClientMixinABC, EmbeddingsClientMixinABC, ImageEmbeddingsClientMixinABC + +if sys.version_info >= (3, 9): + from collections.abc import MutableMapping +else: + from typing import MutableMapping # type: ignore +JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object +_Unset: Any = object() +T = TypeVar("T") +ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]] + + +class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC): + + @overload + async def _complete( + self, + *, + messages: List[_models._models.ChatRequestMessage], + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + frequency_penalty: Optional[float] = None, + stream_parameter: Optional[bool] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.ChatCompletions: ... + @overload + async def _complete( + self, + body: JSON, + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.ChatCompletions: ... + @overload + async def _complete( + self, + body: IO[bytes], + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.ChatCompletions: ... + + @distributed_trace_async + async def _complete( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + messages: List[_models._models.ChatRequestMessage] = _Unset, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + frequency_penalty: Optional[float] = None, + stream_parameter: Optional[bool] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.ChatCompletions: + """Gets chat completions for the provided chat messages. + Completions support a wide variety of tasks and generate text that continues from or + "completes" + provided prompt data. The method makes a REST API call to the ``/chat/completions`` route + on the given endpoint. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :keyword messages: The collection of context messages associated with this chat completions + request. + Typical usage begins with a chat message for the System role that provides instructions for + the behavior of the assistant, followed by alternating messages between the User and + Assistant roles. Required. + :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage] + :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, + are passed in the JSON request payload. + This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and + "pass-through". Default value is None. + :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters + :keyword frequency_penalty: A value that influences the probability of generated tokens + appearing based on their cumulative + frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2]. Default value is None. + :paramtype frequency_penalty: float + :keyword stream_parameter: A value indicating whether chat completions should be streamed for + this request. Default value is None. + :paramtype stream_parameter: bool + :keyword presence_penalty: A value that influences the probability of generated tokens + appearing based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase + the + model's likelihood to output new topics. + Supported range is [-2, 2]. Default value is None. + :paramtype presence_penalty: float + :keyword temperature: The sampling temperature to use that controls the apparent creativity of + generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. Default value is None. + :paramtype temperature: float + :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value + causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. Default value is None. + :paramtype top_p: float + :keyword max_tokens: The maximum number of tokens to generate. Default value is None. + :paramtype max_tokens: int + :keyword response_format: An object specifying the format that the model must output. + + Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs + which ensures the model will match your supplied JSON schema. + + Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the + model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON + yourself via a system or user message. Without this, the model may generate an unending stream + of whitespace until the generation reaches the token limit, resulting in a long-running and + seemingly "stuck" request. Also note that the message content may be partially cut off if + ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the + conversation exceeded the max context length. Default value is None. + :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat + :keyword stop: A collection of textual sequences that will end completions generation. Default + value is None. + :paramtype stop: list[str] + :keyword tools: A list of tools the model may request to call. Currently, only functions are + supported as a tool. The model + may response with a function call request and provide the input arguments in JSON format for + that function. Default value is None. + :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] + :keyword tool_choice: If specified, the model will configure which of the provided tools it can + use for the chat completions response. Is either a Union[str, + "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. + Default value is None. + :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or + ~azure.ai.inference.models.ChatCompletionsNamedToolChoice + :keyword seed: If specified, the system will make a best effort to sample deterministically + such that repeated requests with the + same seed and parameters should return the same result. Determinism is not guaranteed. Default + value is None. + :paramtype seed: int + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ChatCompletions + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None) + + if body is _Unset: + if messages is _Unset: + raise TypeError("missing required argument: messages") + body = { + "frequency_penalty": frequency_penalty, + "max_tokens": max_tokens, + "messages": messages, + "model": model, + "presence_penalty": presence_penalty, + "response_format": response_format, + "seed": seed, + "stop": stop, + "stream": stream_parameter, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "top_p": top_p, + } + body = {k: v for k, v in body.items() if v is not None} + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_chat_completions_complete_request( + extra_params=extra_params, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.ChatCompletions, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore + + @distributed_trace_async + async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = kwargs.pop("headers", {}) or {} + _params = kwargs.pop("params", {}) or {} + + cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None) + + _request = build_chat_completions_get_model_info_request( + api_version=self._config.api_version, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.ModelInfo, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore + + +class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC): + + @overload + async def _embed( + self, + *, + input: List[str], + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + @overload + async def _embed( + self, + body: JSON, + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + @overload + async def _embed( + self, + body: IO[bytes], + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + + @distributed_trace_async + async def _embed( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + input: List[str] = _Unset, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given text prompts. + The method makes a REST API call to the ``/embeddings`` route on the given endpoint. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :keyword input: Input text to embed, encoded as a string or array of tokens. + To embed multiple inputs in a single request, pass an array + of strings or array of token arrays. Required. + :paramtype input: list[str] + :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, + are passed in the JSON request payload. + This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and + "pass-through". Default value is None. + :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. Default value is + None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. Known + values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) + + if body is _Unset: + if input is _Unset: + raise TypeError("missing required argument: input") + body = { + "dimensions": dimensions, + "encoding_format": encoding_format, + "input": input, + "input_type": input_type, + "model": model, + } + body = {k: v for k, v in body.items() if v is not None} + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_embeddings_embed_request( + extra_params=extra_params, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.EmbeddingsResult, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore + + @distributed_trace_async + async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = kwargs.pop("headers", {}) or {} + _params = kwargs.pop("params", {}) or {} + + cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None) + + _request = build_embeddings_get_model_info_request( + api_version=self._config.api_version, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.ModelInfo, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore + + +class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC): + + @overload + async def _embed( + self, + *, + input: List[_models.ImageEmbeddingInput], + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + @overload + async def _embed( + self, + body: JSON, + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + @overload + async def _embed( + self, + body: IO[bytes], + *, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + content_type: str = "application/json", + **kwargs: Any + ) -> _models.EmbeddingsResult: ... + + @distributed_trace_async + async def _embed( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + input: List[_models.ImageEmbeddingInput] = _Unset, + extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + **kwargs: Any + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given images. + The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an + array. + The input must not exceed the max input tokens for the model. Required. + :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, + are passed in the JSON request payload. + This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and + "pass-through". Default value is None. + :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. Default value is + None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings + should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None) + + if body is _Unset: + if input is _Unset: + raise TypeError("missing required argument: input") + body = { + "dimensions": dimensions, + "encoding_format": encoding_format, + "input": input, + "input_type": input_type, + "model": model, + } + body = {k: v for k, v in body.items() if v is not None} + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_image_embeddings_embed_request( + extra_params=extra_params, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.EmbeddingsResult, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore + + @distributed_trace_async + async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = kwargs.pop("headers", {}) or {} + _params = kwargs.pop("params", {}) or {} + + cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None) + + _request = build_image_embeddings_get_model_info_request( + api_version=self._config.api_version, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + try: + await response.read() # Load the body in memory and close the socket + except (StreamConsumedError, StreamClosedError): + pass + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize(_models.ModelInfo, response.json()) + + if cls: + return cls(pipeline_response, deserialized, {}) # type: ignore + + return deserialized # type: ignore diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py new file mode 100644 index 00000000..f7dd3251 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py @@ -0,0 +1,20 @@ +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +from typing import List + +__all__: List[str] = [] # Add all objects you want publicly available to users at this package level + + +def patch_sdk(): + """Do not remove from this file. + + `patch_sdk` is a last resort escape hatch that allows you to do customizations + you can't accomplish using the techniques described in + https://aka.ms/azsdk/python/dpcodegen/python/customize + """ diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py new file mode 100644 index 00000000..2f987380 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py @@ -0,0 +1,1331 @@ +# pylint: disable=too-many-lines +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ +"""Customize generated code here. + +Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize +""" +import json +import logging +import sys + +from io import IOBase +from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, AsyncIterable + +from azure.core.pipeline import PipelineResponse +from azure.core.credentials import AzureKeyCredential +from azure.core.tracing.decorator_async import distributed_trace_async +from azure.core.utils import case_insensitive_dict +from azure.core.exceptions import ( + ClientAuthenticationError, + HttpResponseError, + map_error, + ResourceExistsError, + ResourceNotFoundError, + ResourceNotModifiedError, +) +from .. import models as _models +from .._model_base import SdkJSONEncoder, _deserialize +from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated +from ._client import EmbeddingsClient as EmbeddingsClientGenerated +from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated +from .._operations._operations import ( + build_chat_completions_complete_request, + build_embeddings_embed_request, + build_image_embeddings_embed_request, +) +from .._patch import _get_internal_response_format + +if TYPE_CHECKING: + # pylint: disable=unused-import,ungrouped-imports + from azure.core.credentials_async import AsyncTokenCredential + +if sys.version_info >= (3, 9): + from collections.abc import MutableMapping +else: + from typing import MutableMapping # type: ignore # pylint: disable=ungrouped-imports + +JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object +_Unset: Any = object() +_LOGGER = logging.getLogger(__name__) + + +async def load_client( + endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any +) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]: + """ + Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route + on the given endpoint, to determine the model type and therefore which client to instantiate. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + Keyword arguments are passed through to the client constructor (you can set keywords such as + `api_version`, `user_agent`, `logging_enable` etc. on the client constructor). + + :param endpoint: Service endpoint URL for AI model inference. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a + AzureKeyCredential type or a AsyncTokenCredential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :return: The appropriate asynchronous client associated with the given endpoint + :rtype: ~azure.ai.inference.aio.ChatCompletionsClient or ~azure.ai.inference.aio.EmbeddingsClient + or ~azure.ai.inference.aio.ImageEmbeddingsClient + :raises ~azure.core.exceptions.HttpResponseError: + """ + + async with ChatCompletionsClient( + endpoint, credential, **kwargs + ) as client: # Pick any of the clients, it does not matter. + try: + model_info = await client.get_model_info() # type: ignore + except ResourceNotFoundError as error: + error.message = ( + "`load_client` function does not work on this endpoint (`/info` route not supported). " + "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly." + ) + raise error + + _LOGGER.info("model_info=%s", model_info) + if not model_info.model_type: + raise ValueError( + "The AI model information is missing a value for `model type`. Cannot create an appropriate client." + ) + + # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type + if model_info.model_type in ( + _models.ModelType.CHAT_COMPLETION, + "chat_completions", + "chat", + "completion", + "chat-completion", + "chat-completions", + "chat completion", + "chat completions", + ): + chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) + chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init + model_info + ) + return chat_completion_client + + if model_info.model_type in ( + _models.ModelType.EMBEDDINGS, + "embedding", + "text_embedding", + "text-embeddings", + "text embedding", + "text embeddings", + ): + embedding_client = EmbeddingsClient(endpoint, credential, **kwargs) + embedding_client._model_info = model_info # pylint: disable=protected-access,attribute-defined-outside-init + return embedding_client + + if model_info.model_type in ( + _models.ModelType.IMAGE_EMBEDDINGS, + "image_embedding", + "image-embeddings", + "image-embedding", + "image embedding", + "image embeddings", + ): + image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs) + image_embedding_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init + model_info + ) + return image_embedding_client + + raise ValueError(f"No client available to support AI model type `{model_info.model_type}`") + + +class ChatCompletionsClient(ChatCompletionsClientGenerated): # pylint: disable=too-many-instance-attributes + """ChatCompletionsClient. + + :param endpoint: Service endpoint URL for AI model inference. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a + AzureKeyCredential type or a AsyncTokenCredential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword frequency_penalty: A value that influences the probability of generated tokens + appearing based on their cumulative frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2]. + Default value is None. + :paramtype frequency_penalty: float + :keyword presence_penalty: A value that influences the probability of generated tokens + appearing based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase + the model's likelihood to output new topics. + Supported range is [-2, 2]. + Default value is None. + :paramtype presence_penalty: float + :keyword temperature: The sampling temperature to use that controls the apparent creativity of + generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype temperature: float + :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value + causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype top_p: float + :keyword max_tokens: The maximum number of tokens to generate. Default value is None. + :paramtype max_tokens: int + :keyword response_format: The format that the AI model must output. AI chat completions models typically output + unformatted text by default. This is equivalent to setting "text" as the response_format. + To output JSON format, without adhering to any schema, set to "json_object". + To output JSON format adhering to a provided schema, set this to an object of the class + ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. + :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] + :keyword stop: A collection of textual sequences that will end completions generation. Default + value is None. + :paramtype stop: list[str] + :keyword tools: The available tool definitions that the chat completions request can use, + including caller-defined functions. Default value is None. + :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] + :keyword tool_choice: If specified, the model will configure which of the provided tools it can + use for the chat completions response. Is either a Union[str, + "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. + Default value is None. + :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or + ~azure.ai.inference.models.ChatCompletionsNamedToolChoice + :keyword seed: If specified, the system will make a best effort to sample deterministically + such that repeated requests with the + same seed and parameters should return the same result. Determinism is not guaranteed. + Default value is None. + :paramtype seed: int + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, + endpoint: str, + credential: Union[AzureKeyCredential, "AsyncTokenCredential"], + *, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + + self._model_info: Optional[_models.ModelInfo] = None + + # Store default chat completions settings, to be applied in all future service calls + # unless overridden by arguments in the `complete` method. + self._frequency_penalty = frequency_penalty + self._presence_penalty = presence_penalty + self._temperature = temperature + self._top_p = top_p + self._max_tokens = max_tokens + self._internal_response_format = _get_internal_response_format(response_format) + self._stop = stop + self._tools = tools + self._tool_choice = tool_choice + self._seed = seed + self._model = model + self._model_extras = model_extras + + # For Key auth, we need to send these two auth HTTP request headers simultaneously: + # 1. "Authorization: Bearer <key>" + # 2. "api-key: <key>" + # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, + # and Azure OpenAI and the new Unified Inference endpoints support the second header. + # The first header will be taken care of by auto-generated code. + # The second one is added here. + if isinstance(credential, AzureKeyCredential): + headers = kwargs.pop("headers", {}) + if "api-key" not in headers: + headers["api-key"] = credential.key + kwargs["headers"] = headers + + super().__init__(endpoint, credential, **kwargs) + + @overload + async def complete( + self, + *, + messages: List[_models.ChatRequestMessage], + stream: Literal[False] = False, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> _models.ChatCompletions: ... + + @overload + async def complete( + self, + *, + messages: List[_models.ChatRequestMessage], + stream: Literal[True], + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> AsyncIterable[_models.StreamingChatCompletionsUpdate]: ... + + @overload + async def complete( + self, + *, + messages: List[_models.ChatRequestMessage], + stream: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: + # pylint: disable=line-too-long + """Gets chat completions for the provided chat messages. + Completions support a wide variety of tasks and generate text that continues from or + "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route + on the given endpoint. + When using this method with `stream=True`, the response is streamed + back to the client. Iterate over the resulting StreamingChatCompletions + object to get content updates as they arrive. By default, the response is a ChatCompletions object + (non-streaming). + + :keyword messages: The collection of context messages associated with this chat completions + request. + Typical usage begins with a chat message for the System role that provides instructions for + the behavior of the assistant, followed by alternating messages between the User and + Assistant roles. Required. + :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] + :keyword stream: A value indicating whether chat completions should be streamed for this request. + Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. + Otherwise the response will be a ChatCompletions. + :paramtype stream: bool + :keyword frequency_penalty: A value that influences the probability of generated tokens + appearing based on their cumulative frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2]. + Default value is None. + :paramtype frequency_penalty: float + :keyword presence_penalty: A value that influences the probability of generated tokens + appearing based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase + the model's likelihood to output new topics. + Supported range is [-2, 2]. + Default value is None. + :paramtype presence_penalty: float + :keyword temperature: The sampling temperature to use that controls the apparent creativity of + generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype temperature: float + :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value + causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype top_p: float + :keyword max_tokens: The maximum number of tokens to generate. Default value is None. + :paramtype max_tokens: int + :keyword response_format: The format that the AI model must output. AI chat completions models typically output + unformatted text by default. This is equivalent to setting "text" as the response_format. + To output JSON format, without adhering to any schema, set to "json_object". + To output JSON format adhering to a provided schema, set this to an object of the class + ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. + :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] + :keyword stop: A collection of textual sequences that will end completions generation. Default + value is None. + :paramtype stop: list[str] + :keyword tools: The available tool definitions that the chat completions request can use, + including caller-defined functions. Default value is None. + :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] + :keyword tool_choice: If specified, the model will configure which of the provided tools it can + use for the chat completions response. Is either a Union[str, + "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. + Default value is None. + :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or + ~azure.ai.inference.models.ChatCompletionsNamedToolChoice + :keyword seed: If specified, the system will make a best effort to sample deterministically + such that repeated requests with the + same seed and parameters should return the same result. Determinism is not guaranteed. + Default value is None. + :paramtype seed: int + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. + :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def complete( + self, + body: JSON, + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: + # pylint: disable=line-too-long + """Gets chat completions for the provided chat messages. + Completions support a wide variety of tasks and generate text that continues from or + "completes" provided prompt data. + + :param body: An object of type MutableMapping[str, Any], such as a dictionary, that + specifies the full request payload. Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. + :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def complete( + self, + body: IO[bytes], + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: + # pylint: disable=line-too-long + """Gets chat completions for the provided chat messages. + Completions support a wide variety of tasks and generate text that continues from or + "completes" provided prompt data. + + :param body: Specifies the full request payload. Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. + :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions + :raises ~azure.core.exceptions.HttpResponseError: + """ + + # pylint:disable=client-method-missing-tracing-decorator-async + async def complete( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + messages: List[_models.ChatRequestMessage] = _Unset, + stream: Optional[bool] = None, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, + stop: Optional[List[str]] = None, + tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, + tool_choice: Optional[ + Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] + ] = None, + seed: Optional[int] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: + # pylint: disable=line-too-long + # pylint: disable=too-many-locals + """Gets chat completions for the provided chat messages. + Completions support a wide variety of tasks and generate text that continues from or + "completes" provided prompt data. When using this method with `stream=True`, the response is streamed + back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions` + object to get content updates as they arrive. + + :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type + that specifies the full request payload. Required. + :type body: JSON or IO[bytes] + :keyword messages: The collection of context messages associated with this chat completions + request. + Typical usage begins with a chat message for the System role that provides instructions for + the behavior of the assistant, followed by alternating messages between the User and + Assistant roles. Required. + :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] + :keyword stream: A value indicating whether chat completions should be streamed for this request. + Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. + Otherwise the response will be a ChatCompletions. + :paramtype stream: bool + :keyword frequency_penalty: A value that influences the probability of generated tokens + appearing based on their cumulative frequency in generated text. + Positive values will make tokens less likely to appear as their frequency increases and + decrease the likelihood of the model repeating the same statements verbatim. + Supported range is [-2, 2]. + Default value is None. + :paramtype frequency_penalty: float + :keyword presence_penalty: A value that influences the probability of generated tokens + appearing based on their existing + presence in generated text. + Positive values will make tokens less likely to appear when they already exist and increase + the model's likelihood to output new topics. + Supported range is [-2, 2]. + Default value is None. + :paramtype presence_penalty: float + :keyword temperature: The sampling temperature to use that controls the apparent creativity of + generated completions. + Higher values will make output more random while lower values will make results more focused + and deterministic. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype temperature: float + :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value + causes the + model to consider the results of tokens with the provided probability mass. As an example, a + value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be + considered. + It is not recommended to modify temperature and top_p for the same completions request as the + interaction of these two settings is difficult to predict. + Supported range is [0, 1]. + Default value is None. + :paramtype top_p: float + :keyword max_tokens: The maximum number of tokens to generate. Default value is None. + :paramtype max_tokens: int + :keyword response_format: The format that the AI model must output. AI chat completions models typically output + unformatted text by default. This is equivalent to setting "text" as the response_format. + To output JSON format, without adhering to any schema, set to "json_object". + To output JSON format adhering to a provided schema, set this to an object of the class + ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. + :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] + :keyword stop: A collection of textual sequences that will end completions generation. Default + value is None. + :paramtype stop: list[str] + :keyword tools: The available tool definitions that the chat completions request can use, + including caller-defined functions. Default value is None. + :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] + :keyword tool_choice: If specified, the model will configure which of the provided tools it can + use for the chat completions response. Is either a Union[str, + "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. + Default value is None. + :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or + ~azure.ai.inference.models.ChatCompletionsNamedToolChoice + :keyword seed: If specified, the system will make a best effort to sample deterministically + such that repeated requests with the + same seed and parameters should return the same result. Determinism is not guaranteed. + Default value is None. + :paramtype seed: int + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. + :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + _extra_parameters: Union[_models._enums.ExtraParameters, None] = None + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + + internal_response_format = _get_internal_response_format(response_format) + + if body is _Unset: + if messages is _Unset: + raise TypeError("missing required argument: messages") + body = { + "messages": messages, + "stream": stream, + "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty, + "max_tokens": max_tokens if max_tokens is not None else self._max_tokens, + "model": model if model is not None else self._model, + "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty, + "response_format": ( + internal_response_format if internal_response_format is not None else self._internal_response_format + ), + "seed": seed if seed is not None else self._seed, + "stop": stop if stop is not None else self._stop, + "temperature": temperature if temperature is not None else self._temperature, + "tool_choice": tool_choice if tool_choice is not None else self._tool_choice, + "tools": tools if tools is not None else self._tools, + "top_p": top_p if top_p is not None else self._top_p, + } + if model_extras is not None and bool(model_extras): + body.update(model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + elif self._model_extras is not None and bool(self._model_extras): + body.update(self._model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + body = {k: v for k, v in body.items() if v is not None} + elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool): + stream = body["stream"] + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_chat_completions_complete_request( + extra_params=_extra_parameters, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = stream or False + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + return _models.AsyncStreamingChatCompletions(response) + + return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access + + @distributed_trace_async + async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + # pylint: disable=line-too-long + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + if not self._model_info: + try: + self._model_info = await self._get_model_info( + **kwargs + ) # pylint: disable=attribute-defined-outside-init + except ResourceNotFoundError as error: + error.message = "Model information is not available on this endpoint (`/info` route not supported)." + raise error + + return self._model_info + + def __str__(self) -> str: + # pylint: disable=client-method-name-no-double-underscore + return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() + + +class EmbeddingsClient(EmbeddingsClientGenerated): + """EmbeddingsClient. + + :param endpoint: Service endpoint URL for AI model inference. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a + AzureKeyCredential type or a AsyncTokenCredential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, + endpoint: str, + credential: Union[AzureKeyCredential, "AsyncTokenCredential"], + *, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + + self._model_info: Optional[_models.ModelInfo] = None + + # Store default embeddings settings, to be applied in all future service calls + # unless overridden by arguments in the `embed` method. + self._dimensions = dimensions + self._encoding_format = encoding_format + self._input_type = input_type + self._model = model + self._model_extras = model_extras + + # For Key auth, we need to send these two auth HTTP request headers simultaneously: + # 1. "Authorization: Bearer <key>" + # 2. "api-key: <key>" + # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, + # and Azure OpenAI and the new Unified Inference endpoints support the second header. + # The first header will be taken care of by auto-generated code. + # The second one is added here. + if isinstance(credential, AzureKeyCredential): + headers = kwargs.pop("headers", {}) + if "api-key" not in headers: + headers["api-key"] = credential.key + kwargs["headers"] = headers + + super().__init__(endpoint, credential, **kwargs) + + @overload + async def embed( + self, + *, + input: List[str], + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given text prompts. + The method makes a REST API call to the `/embeddings` route on the given endpoint. + + :keyword input: Input text to embed, encoded as a string or array of tokens. + To embed multiple inputs in a single request, pass an array + of strings or array of token arrays. Required. + :paramtype input: list[str] + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def embed( + self, + body: JSON, + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given text prompts. + The method makes a REST API call to the `/embeddings` route on the given endpoint. + + :param body: An object of type MutableMapping[str, Any], such as a dictionary, that + specifies the full request payload. Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def embed( + self, + body: IO[bytes], + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given text prompts. + The method makes a REST API call to the `/embeddings` route on the given endpoint. + + :param body: Specifies the full request payload. Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @distributed_trace_async + async def embed( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + input: List[str] = _Unset, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> _models.EmbeddingsResult: + # pylint: disable=line-too-long + """Return the embedding vectors for given text prompts. + The method makes a REST API call to the `/embeddings` route on the given endpoint. + + :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type + that specifies the full request payload. Required. + :type body: JSON or IO[bytes] + :keyword input: Input text to embed, encoded as a string or array of tokens. + To embed multiple inputs in a single request, pass an array + of strings or array of token arrays. Required. + :paramtype input: list[str] + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + _extra_parameters: Union[_models._enums.ExtraParameters, None] = None + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + + if body is _Unset: + if input is _Unset: + raise TypeError("missing required argument: input") + body = { + "input": input, + "dimensions": dimensions if dimensions is not None else self._dimensions, + "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, + "input_type": input_type if input_type is not None else self._input_type, + "model": model if model is not None else self._model, + } + if model_extras is not None and bool(model_extras): + body.update(model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + elif self._model_extras is not None and bool(self._model_extras): + body.update(self._model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + body = {k: v for k, v in body.items() if v is not None} + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_embeddings_embed_request( + extra_params=_extra_parameters, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize( + _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access + ) + + return deserialized # type: ignore + + @distributed_trace_async + async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + # pylint: disable=line-too-long + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + if not self._model_info: + try: + self._model_info = await self._get_model_info( + **kwargs + ) # pylint: disable=attribute-defined-outside-init + except ResourceNotFoundError as error: + error.message = "Model information is not available on this endpoint (`/info` route not supported)." + raise error + + return self._model_info + + def __str__(self) -> str: + # pylint: disable=client-method-name-no-double-underscore + return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() + + +class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated): + """ImageEmbeddingsClient. + + :param endpoint: Service endpoint URL for AI model inference. Required. + :type endpoint: str + :param credential: Credential used to authenticate requests to the service. Is either a + AzureKeyCredential type or a AsyncTokenCredential type. Required. + :type credential: ~azure.core.credentials.AzureKeyCredential or + ~azure.core.credentials_async.AsyncTokenCredential + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :keyword api_version: The API version to use for this operation. Default value is + "2024-05-01-preview". Note that overriding this default value may result in unsupported + behavior. + :paramtype api_version: str + """ + + def __init__( + self, + endpoint: str, + credential: Union[AzureKeyCredential, "AsyncTokenCredential"], + *, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + + self._model_info: Optional[_models.ModelInfo] = None + + # Store default embeddings settings, to be applied in all future service calls + # unless overridden by arguments in the `embed` method. + self._dimensions = dimensions + self._encoding_format = encoding_format + self._input_type = input_type + self._model = model + self._model_extras = model_extras + + # For Key auth, we need to send these two auth HTTP request headers simultaneously: + # 1. "Authorization: Bearer <key>" + # 2. "api-key: <key>" + # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, + # and Azure OpenAI and the new Unified Inference endpoints support the second header. + # The first header will be taken care of by auto-generated code. + # The second one is added here. + if isinstance(credential, AzureKeyCredential): + headers = kwargs.pop("headers", {}) + if "api-key" not in headers: + headers["api-key"] = credential.key + kwargs["headers"] = headers + + super().__init__(endpoint, credential, **kwargs) + + @overload + async def embed( + self, + *, + input: List[_models.ImageEmbeddingInput], + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + + :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an + array. + The input must not exceed the max input tokens for the model. Required. + :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def embed( + self, + body: JSON, + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + + :param body: An object of type MutableMapping[str, Any], such as a dictionary, that + specifies the full request payload. Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @overload + async def embed( + self, + body: IO[bytes], + *, + content_type: str = "application/json", + **kwargs: Any, + ) -> _models.EmbeddingsResult: + """Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + + :param body: Specifies the full request payload. Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + + @distributed_trace_async + async def embed( + self, + body: Union[JSON, IO[bytes]] = _Unset, + *, + input: List[_models.ImageEmbeddingInput] = _Unset, + dimensions: Optional[int] = None, + encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, + input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, + model: Optional[str] = None, + model_extras: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> _models.EmbeddingsResult: + # pylint: disable=line-too-long + """Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + + :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type + that specifies the full request payload. Required. + :type body: JSON or IO[bytes] + :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an + array. + The input must not exceed the max input tokens for the model. Required. + :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] + :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should + have. Default value is None. + :paramtype dimensions: int + :keyword encoding_format: Optional. The desired format for the returned embeddings. + Known values are: + "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. + :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat + :keyword input_type: Optional. The type of the input. Known values are: + "text", "query", and "document". Default value is None. + :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType + :keyword model: ID of the specific AI model to use, if more than one model is available on the + endpoint. Default value is None. + :paramtype model: str + :keyword model_extras: Additional, model-specific parameters that are not in the + standard request payload. They will be added as-is to the root of the JSON in the request body. + How the service handles these extra parameters depends on the value of the + ``extra-parameters`` request header. Default value is None. + :paramtype model_extras: dict[str, Any] + :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.EmbeddingsResult + :raises ~azure.core.exceptions.HttpResponseError: + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 401: ClientAuthenticationError, + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + _extra_parameters: Union[_models._enums.ExtraParameters, None] = None + + content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) + + if body is _Unset: + if input is _Unset: + raise TypeError("missing required argument: input") + body = { + "input": input, + "dimensions": dimensions if dimensions is not None else self._dimensions, + "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, + "input_type": input_type if input_type is not None else self._input_type, + "model": model if model is not None else self._model, + } + if model_extras is not None and bool(model_extras): + body.update(model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + elif self._model_extras is not None and bool(self._model_extras): + body.update(self._model_extras) + _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access + body = {k: v for k, v in body.items() if v is not None} + content_type = content_type or "application/json" + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore + + _request = build_image_embeddings_embed_request( + extra_params=_extra_parameters, + content_type=content_type, + api_version=self._config.api_version, + content=_content, + headers=_headers, + params=_params, + ) + path_format_arguments = { + "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), + } + _request.url = self._client.format_url(_request.url, **path_format_arguments) + + _stream = kwargs.pop("stream", False) + pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) + raise HttpResponseError(response=response) + + if _stream: + deserialized = response.iter_bytes() + else: + deserialized = _deserialize( + _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access + ) + + return deserialized # type: ignore + + @distributed_trace_async + async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: + # pylint: disable=line-too-long + """Returns information about the AI model. + The method makes a REST API call to the ``/info`` route on the given endpoint. + This method will only work when using Serverless API or Managed Compute endpoint. + It will not work for GitHub Models endpoint or Azure OpenAI endpoint. + + :return: ModelInfo. The ModelInfo is compatible with MutableMapping + :rtype: ~azure.ai.inference.models.ModelInfo + :raises ~azure.core.exceptions.HttpResponseError: + """ + if not self._model_info: + try: + self._model_info = await self._get_model_info( + **kwargs + ) # pylint: disable=attribute-defined-outside-init + except ResourceNotFoundError as error: + error.message = "Model information is not available on this endpoint (`/info` route not supported)." + raise error + + return self._model_info + + def __str__(self) -> str: + # pylint: disable=client-method-name-no-double-underscore + return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() + + +__all__: List[str] = [ + "load_client", + "ChatCompletionsClient", + "EmbeddingsClient", + "ImageEmbeddingsClient", +] # Add all objects you want publicly available to users at this package level + + +def patch_sdk(): + """Do not remove from this file. + + `patch_sdk` is a last resort escape hatch that allows you to do customizations + you can't accomplish using the techniques described in + https://aka.ms/azsdk/python/dpcodegen/python/customize + """ diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py new file mode 100644 index 00000000..b430582c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py @@ -0,0 +1,47 @@ +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +from abc import ABC +from typing import TYPE_CHECKING + +from ._configuration import ( + ChatCompletionsClientConfiguration, + EmbeddingsClientConfiguration, + ImageEmbeddingsClientConfiguration, +) + +if TYPE_CHECKING: + from azure.core import AsyncPipelineClient + + from .._serialization import Deserializer, Serializer + + +class ChatCompletionsClientMixinABC(ABC): + """DO NOT use this class. It is for internal typing use only.""" + + _client: "AsyncPipelineClient" + _config: ChatCompletionsClientConfiguration + _serialize: "Serializer" + _deserialize: "Deserializer" + + +class EmbeddingsClientMixinABC(ABC): + """DO NOT use this class. It is for internal typing use only.""" + + _client: "AsyncPipelineClient" + _config: EmbeddingsClientConfiguration + _serialize: "Serializer" + _deserialize: "Deserializer" + + +class ImageEmbeddingsClientMixinABC(ABC): + """DO NOT use this class. It is for internal typing use only.""" + + _client: "AsyncPipelineClient" + _config: ImageEmbeddingsClientConfiguration + _serialize: "Serializer" + _deserialize: "Deserializer" |