8 files changed, 2718 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py
new file mode 100644
index 00000000..668f989a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py
@@ -0,0 +1,33 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._client import ChatCompletionsClient  # type: ignore
+from ._client import EmbeddingsClient  # type: ignore
+from ._client import ImageEmbeddingsClient  # type: ignore
+
+try:
+    from ._patch import __all__ as _patch_all
+    from ._patch import *
+except ImportError:
+    _patch_all = []
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py
new file mode 100644
index 00000000..88e6773b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py
@@ -0,0 +1,280 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from copy import deepcopy
+from typing import Any, Awaitable, TYPE_CHECKING, Union
+from typing_extensions import Self
+
+from azure.core import AsyncPipelineClient
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+from azure.core.rest import AsyncHttpResponse, HttpRequest
+
+from .._serialization import Deserializer, Serializer
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+from ._operations import (
+    ChatCompletionsClientOperationsMixin,
+    EmbeddingsClientOperationsMixin,
+    ImageEmbeddingsClientOperationsMixin,
+)
+
+if TYPE_CHECKING:
+    from azure.core.credentials_async import AsyncTokenCredential
+
+
+class ChatCompletionsClient(ChatCompletionsClientOperationsMixin):
+    """ChatCompletionsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ChatCompletionsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
+
+
+class EmbeddingsClient(EmbeddingsClientOperationsMixin):
+    """EmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = EmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ImageEmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py
new file mode 100644
index 00000000..f60e1125
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py
@@ -0,0 +1,197 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from typing import Any, TYPE_CHECKING, Union
+
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+
+from .._version import VERSION
+
+if TYPE_CHECKING:
+    from azure.core.credentials_async import AsyncTokenCredential
+
+
+class ChatCompletionsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ChatCompletionsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class EmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for EmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class ImageEmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ImageEmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py
new file mode 100644
index 00000000..ab870887
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._operations import ChatCompletionsClientOperationsMixin  # type: ignore
+from ._operations import EmbeddingsClientOperationsMixin  # type: ignore
+from ._operations import ImageEmbeddingsClientOperationsMixin  # type: ignore
+
+from ._patch import __all__ as _patch_all
+from ._patch import *
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClientOperationsMixin",
+    "EmbeddingsClientOperationsMixin",
+    "ImageEmbeddingsClientOperationsMixin",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
new file mode 100644
index 00000000..62ec772f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
@@ -0,0 +1,781 @@
+# pylint: disable=too-many-locals
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from io import IOBase
+import json
+import sys
+from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+    StreamClosedError,
+    StreamConsumedError,
+    map_error,
+)
+from azure.core.pipeline import PipelineResponse
+from azure.core.rest import AsyncHttpResponse, HttpRequest
+from azure.core.tracing.decorator_async import distributed_trace_async
+from azure.core.utils import case_insensitive_dict
+
+from ... import models as _models
+from ..._model_base import SdkJSONEncoder, _deserialize
+from ..._operations._operations import (
+    build_chat_completions_complete_request,
+    build_chat_completions_get_model_info_request,
+    build_embeddings_embed_request,
+    build_embeddings_get_model_info_request,
+    build_image_embeddings_embed_request,
+    build_image_embeddings_get_model_info_request,
+)
+from .._vendor import ChatCompletionsClientMixinABC, EmbeddingsClientMixinABC, ImageEmbeddingsClientMixinABC
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+T = TypeVar("T")
+ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]]
+
+
+class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
+
+    @overload
+    async def _complete(
+        self,
+        *,
+        messages: List[_models._models.ChatRequestMessage],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    async def _complete(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    async def _complete(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+
+    @distributed_trace_async
+    async def _complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: List[_models._models.ChatRequestMessage] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions:
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes"
+        provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
+        on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative
+         frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword stream_parameter: A value indicating whether chat completions should be streamed for
+         this request. Default value is None.
+        :paramtype stream_parameter: bool
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the
+         model's likelihood to output new topics.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: An object specifying the format that the model must output.
+
+         Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
+         which ensures the model will match your supplied JSON schema.
+
+         Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
+         model generates is valid JSON.
+
+         **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+         yourself via a system or user message. Without this, the model may generate an unending stream
+         of whitespace until the generation reaches the token limit, resulting in a long-running and
+         seemingly "stuck" request. Also note that the message content may be partially cut off if
+         ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+         conversation exceeded the max context length. Default value is None.
+        :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: A list of tools the model may request to call. Currently, only functions are
+         supported as a tool. The model
+         may response with a function call request and provide the input arguments in JSON format for
+         that function. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed. Default
+         value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "frequency_penalty": frequency_penalty,
+                "max_tokens": max_tokens,
+                "messages": messages,
+                "model": model,
+                "presence_penalty": presence_penalty,
+                "response_format": response_format,
+                "seed": seed,
+                "stop": stop,
+                "stream": stream_parameter,
+                "temperature": temperature,
+                "tool_choice": tool_choice,
+                "tools": tools,
+                "top_p": top_p,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ChatCompletions, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_chat_completions_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
+
+    @overload
+    async def _embed(
+        self,
+        *,
+        input: List[str],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace_async
+    async def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
+         values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
+
+    @overload
+    async def _embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace_async
+    async def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
+         should have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_image_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py
new file mode 100644
index 00000000..f7dd3251
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py
@@ -0,0 +1,20 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+from typing import List
+
+__all__: List[str] = []  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py
new file mode 100644
index 00000000..2f987380
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py
@@ -0,0 +1,1331 @@
+# pylint: disable=too-many-lines
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+import json
+import logging
+import sys
+
+from io import IOBase
+from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, AsyncIterable
+
+from azure.core.pipeline import PipelineResponse
+from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.decorator_async import distributed_trace_async
+from azure.core.utils import case_insensitive_dict
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    map_error,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+)
+from .. import models as _models
+from .._model_base import SdkJSONEncoder, _deserialize
+from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated
+from ._client import EmbeddingsClient as EmbeddingsClientGenerated
+from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated
+from .._operations._operations import (
+    build_chat_completions_complete_request,
+    build_embeddings_embed_request,
+    build_image_embeddings_embed_request,
+)
+from .._patch import _get_internal_response_format
+
+if TYPE_CHECKING:
+    # pylint: disable=unused-import,ungrouped-imports
+    from azure.core.credentials_async import AsyncTokenCredential
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore  # pylint: disable=ungrouped-imports
+
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+_LOGGER = logging.getLogger(__name__)
+
+
+async def load_client(
+    endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]:
+    """
+    Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route
+    on the given endpoint, to determine the model type and therefore which client to instantiate.
+    This method will only work when using Serverless API or Managed Compute endpoint.
+    It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+    Keyword arguments are passed through to the client constructor (you can set keywords such as
+    `api_version`, `user_agent`, `logging_enable` etc. on the client constructor).
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :return: The appropriate asynchronous client associated with the given endpoint
+    :rtype: ~azure.ai.inference.aio.ChatCompletionsClient or ~azure.ai.inference.aio.EmbeddingsClient
+     or ~azure.ai.inference.aio.ImageEmbeddingsClient
+    :raises ~azure.core.exceptions.HttpResponseError:
+    """
+
+    async with ChatCompletionsClient(
+        endpoint, credential, **kwargs
+    ) as client:  # Pick any of the clients, it does not matter.
+        try:
+            model_info = await client.get_model_info()  # type: ignore
+        except ResourceNotFoundError as error:
+            error.message = (
+                "`load_client` function does not work on this endpoint (`/info` route not supported). "
+                "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly."
+            )
+            raise error
+
+    _LOGGER.info("model_info=%s", model_info)
+    if not model_info.model_type:
+        raise ValueError(
+            "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
+        )
+
+    # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+    if model_info.model_type in (
+        _models.ModelType.CHAT_COMPLETION,
+        "chat_completions",
+        "chat",
+        "completion",
+        "chat-completion",
+        "chat-completions",
+        "chat completion",
+        "chat completions",
+    ):
+        chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
+        chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return chat_completion_client
+
+    if model_info.model_type in (
+        _models.ModelType.EMBEDDINGS,
+        "embedding",
+        "text_embedding",
+        "text-embeddings",
+        "text embedding",
+        "text embeddings",
+    ):
+        embedding_client = EmbeddingsClient(endpoint, credential, **kwargs)
+        embedding_client._model_info = model_info  # pylint: disable=protected-access,attribute-defined-outside-init
+        return embedding_client
+
+    if model_info.model_type in (
+        _models.ModelType.IMAGE_EMBEDDINGS,
+        "image_embedding",
+        "image-embeddings",
+        "image-embedding",
+        "image embedding",
+        "image embeddings",
+    ):
+        image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs)
+        image_embedding_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return image_embedding_client
+
+    raise ValueError(f"No client available to support AI model type `{model_info.model_type}`")
+
+
+class ChatCompletionsClient(ChatCompletionsClientGenerated):  # pylint: disable=too-many-instance-attributes
+    """ChatCompletionsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword frequency_penalty: A value that influences the probability of generated tokens
+        appearing based on their cumulative frequency in generated text.
+        Positive values will make tokens less likely to appear as their frequency increases and
+        decrease the likelihood of the model repeating the same statements verbatim.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype frequency_penalty: float
+    :keyword presence_penalty: A value that influences the probability of generated tokens
+        appearing based on their existing
+        presence in generated text.
+        Positive values will make tokens less likely to appear when they already exist and increase
+        the model's likelihood to output new topics.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype presence_penalty: float
+    :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+        generated completions.
+        Higher values will make output more random while lower values will make results more focused
+        and deterministic.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype temperature: float
+    :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+        causes the
+        model to consider the results of tokens with the provided probability mass. As an example, a
+        value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        considered.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype top_p: float
+    :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+    :paramtype max_tokens: int
+    :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+        unformatted text by default. This is equivalent to setting "text" as the response_format.
+        To output JSON format, without adhering to any schema, set to "json_object".
+        To output JSON format adhering to a provided schema, set this to an object of the class
+        ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+    :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+    :keyword stop: A collection of textual sequences that will end completions generation. Default
+        value is None.
+    :paramtype stop: list[str]
+    :keyword tools: The available tool definitions that the chat completions request can use,
+        including caller-defined functions. Default value is None.
+    :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+    :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+        use for the chat completions response. Is either a Union[str,
+        "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+        Default value is None.
+    :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+        ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+    :keyword seed: If specified, the system will make a best effort to sample deterministically
+        such that repeated requests with the
+        same seed and parameters should return the same result. Determinism is not guaranteed.
+        Default value is None.
+    :paramtype seed: int
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default chat completions settings, to be applied in all future service calls
+        # unless overridden by arguments in the `complete` method.
+        self._frequency_penalty = frequency_penalty
+        self._presence_penalty = presence_penalty
+        self._temperature = temperature
+        self._top_p = top_p
+        self._max_tokens = max_tokens
+        self._internal_response_format = _get_internal_response_format(response_format)
+        self._stop = stop
+        self._tools = tools
+        self._tool_choice = tool_choice
+        self._seed = seed
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Literal[False] = False,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.ChatCompletions: ...
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Literal[True],
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[_models.StreamingChatCompletionsUpdate]: ...
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route
+        on the given endpoint.
+        When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting StreamingChatCompletions
+        object to get content updates as they arrive. By default, the response is a ChatCompletions object
+        (non-streaming).
+
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def complete(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def complete(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    # pylint:disable=client-method-missing-tracing-decorator-async
+    async def complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: List[_models.ChatRequestMessage] = _Unset,
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        # pylint: disable=too-many-locals
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions`
+        object to get content updates as they arrive.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        internal_response_format = _get_internal_response_format(response_format)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "messages": messages,
+                "stream": stream,
+                "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty,
+                "max_tokens": max_tokens if max_tokens is not None else self._max_tokens,
+                "model": model if model is not None else self._model,
+                "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty,
+                "response_format": (
+                    internal_response_format if internal_response_format is not None else self._internal_response_format
+                ),
+                "seed": seed if seed is not None else self._seed,
+                "stop": stop if stop is not None else self._stop,
+                "temperature": temperature if temperature is not None else self._temperature,
+                "tool_choice": tool_choice if tool_choice is not None else self._tool_choice,
+                "tools": tools if tools is not None else self._tools,
+                "top_p": top_p if top_p is not None else self._top_p,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool):
+            stream = body["stream"]
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = stream or False
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            return _models.AsyncStreamingChatCompletions(response)
+
+        return _deserialize(_models._patch.ChatCompletions, response.json())  # pylint: disable=protected-access
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class EmbeddingsClient(EmbeddingsClientGenerated):
+    """EmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def embed(
+        self,
+        *,
+        input: List[str],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace_async
+    async def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace_async
+    async def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+__all__: List[str] = [
+    "load_client",
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py
new file mode 100644
index 00000000..b430582c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py
@@ -0,0 +1,47 @@
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from abc import ABC
+from typing import TYPE_CHECKING
+
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+
+if TYPE_CHECKING:
+    from azure.core import AsyncPipelineClient
+
+    from .._serialization import Deserializer, Serializer
+
+
+class ChatCompletionsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: ChatCompletionsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class EmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: EmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class ImageEmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: ImageEmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"