about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/azure/ai/inference
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/inference')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/__init__.py36
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_client.py265
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_configuration.py188
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_model_base.py1235
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/__init__.py29
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_operations.py912
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_patch.py20
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py1387
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_serialization.py2050
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_vendor.py47
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/_version.py9
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py33
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py280
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py197
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py29
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py781
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py20
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py1331
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py47
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/models/__init__.py96
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/models/_enums.py146
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/models/_models.py1458
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/models/_patch.py576
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/__init__.py8
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_core.py312
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_invoker.py295
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py671
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_parsers.py156
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_patch.py124
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_prompty_utils.py415
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_renderers.py30
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_tracer.py316
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_utils.py100
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/py.typed1
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/inference/tracing.py850
35 files changed, 14450 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/__init__.py
new file mode 100644
index 00000000..b7537d16
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/__init__.py
@@ -0,0 +1,36 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._client import ChatCompletionsClient  # type: ignore
+from ._client import EmbeddingsClient  # type: ignore
+from ._client import ImageEmbeddingsClient  # type: ignore
+from ._version import VERSION
+
+__version__ = VERSION
+
+try:
+    from ._patch import __all__ as _patch_all
+    from ._patch import *
+except ImportError:
+    _patch_all = []
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_client.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_client.py
new file mode 100644
index 00000000..0cde08ff
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_client.py
@@ -0,0 +1,265 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from copy import deepcopy
+from typing import Any, TYPE_CHECKING, Union
+from typing_extensions import Self
+
+from azure.core import PipelineClient
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+from azure.core.rest import HttpRequest, HttpResponse
+
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+from ._operations import (
+    ChatCompletionsClientOperationsMixin,
+    EmbeddingsClientOperationsMixin,
+    ImageEmbeddingsClientOperationsMixin,
+)
+from ._serialization import Deserializer, Serializer
+
+if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+
+
+class ChatCompletionsClient(ChatCompletionsClientOperationsMixin):
+    """ChatCompletionsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ChatCompletionsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: PipelineClient = PipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = client.send_request(request)
+        <HttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.HttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    def close(self) -> None:
+        self._client.close()
+
+    def __enter__(self) -> Self:
+        self._client.__enter__()
+        return self
+
+    def __exit__(self, *exc_details: Any) -> None:
+        self._client.__exit__(*exc_details)
+
+
+class EmbeddingsClient(EmbeddingsClientOperationsMixin):
+    """EmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        _endpoint = "{endpoint}"
+        self._config = EmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: PipelineClient = PipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = client.send_request(request)
+        <HttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.HttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    def close(self) -> None:
+        self._client.close()
+
+    def __enter__(self) -> Self:
+        self._client.__enter__()
+        return self
+
+    def __exit__(self, *exc_details: Any) -> None:
+        self._client.__exit__(*exc_details)
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ImageEmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: PipelineClient = PipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = client.send_request(request)
+        <HttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.HttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    def close(self) -> None:
+        self._client.close()
+
+    def __enter__(self) -> Self:
+        self._client.__enter__()
+        return self
+
+    def __exit__(self, *exc_details: Any) -> None:
+        self._client.__exit__(*exc_details)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_configuration.py
new file mode 100644
index 00000000..894ec657
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_configuration.py
@@ -0,0 +1,188 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from typing import Any, TYPE_CHECKING, Union
+
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+
+from ._version import VERSION
+
+if TYPE_CHECKING:
+    from azure.core.credentials import TokenCredential
+
+
+class ChatCompletionsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ChatCompletionsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.RedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.RetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class EmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for EmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.RedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.RetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class ImageEmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ImageEmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.BearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.RedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.RetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_model_base.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_model_base.py
new file mode 100644
index 00000000..359ecebe
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_model_base.py
@@ -0,0 +1,1235 @@
+# pylint: disable=too-many-lines,arguments-differ,signature-differs,no-member
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+# pylint: disable=protected-access, broad-except
+
+import copy
+import calendar
+import decimal
+import functools
+import sys
+import logging
+import base64
+import re
+import typing
+import enum
+import email.utils
+from datetime import datetime, date, time, timedelta, timezone
+from json import JSONEncoder
+import xml.etree.ElementTree as ET
+from typing_extensions import Self
+import isodate
+from azure.core.exceptions import DeserializationError
+from azure.core import CaseInsensitiveEnumMeta
+from azure.core.pipeline import PipelineResponse
+from azure.core.serialization import _Null
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping
+
+_LOGGER = logging.getLogger(__name__)
+
+__all__ = ["SdkJSONEncoder", "Model", "rest_field", "rest_discriminator"]
+
+TZ_UTC = timezone.utc
+_T = typing.TypeVar("_T")
+
+
+def _timedelta_as_isostr(td: timedelta) -> str:
+    """Converts a datetime.timedelta object into an ISO 8601 formatted string, e.g. 'P4DT12H30M05S'
+
+    Function adapted from the Tin Can Python project: https://github.com/RusticiSoftware/TinCanPython
+
+    :param timedelta td: The timedelta to convert
+    :rtype: str
+    :return: ISO8601 version of this timedelta
+    """
+
+    # Split seconds to larger units
+    seconds = td.total_seconds()
+    minutes, seconds = divmod(seconds, 60)
+    hours, minutes = divmod(minutes, 60)
+    days, hours = divmod(hours, 24)
+
+    days, hours, minutes = list(map(int, (days, hours, minutes)))
+    seconds = round(seconds, 6)
+
+    # Build date
+    date_str = ""
+    if days:
+        date_str = "%sD" % days
+
+    if hours or minutes or seconds:
+        # Build time
+        time_str = "T"
+
+        # Hours
+        bigger_exists = date_str or hours
+        if bigger_exists:
+            time_str += "{:02}H".format(hours)
+
+        # Minutes
+        bigger_exists = bigger_exists or minutes
+        if bigger_exists:
+            time_str += "{:02}M".format(minutes)
+
+        # Seconds
+        try:
+            if seconds.is_integer():
+                seconds_string = "{:02}".format(int(seconds))
+            else:
+                # 9 chars long w/ leading 0, 6 digits after decimal
+                seconds_string = "%09.6f" % seconds
+                # Remove trailing zeros
+                seconds_string = seconds_string.rstrip("0")
+        except AttributeError:  # int.is_integer() raises
+            seconds_string = "{:02}".format(seconds)
+
+        time_str += "{}S".format(seconds_string)
+    else:
+        time_str = ""
+
+    return "P" + date_str + time_str
+
+
+def _serialize_bytes(o, format: typing.Optional[str] = None) -> str:
+    encoded = base64.b64encode(o).decode()
+    if format == "base64url":
+        return encoded.strip("=").replace("+", "-").replace("/", "_")
+    return encoded
+
+
+def _serialize_datetime(o, format: typing.Optional[str] = None):
+    if hasattr(o, "year") and hasattr(o, "hour"):
+        if format == "rfc7231":
+            return email.utils.format_datetime(o, usegmt=True)
+        if format == "unix-timestamp":
+            return int(calendar.timegm(o.utctimetuple()))
+
+        # astimezone() fails for naive times in Python 2.7, so make make sure o is aware (tzinfo is set)
+        if not o.tzinfo:
+            iso_formatted = o.replace(tzinfo=TZ_UTC).isoformat()
+        else:
+            iso_formatted = o.astimezone(TZ_UTC).isoformat()
+        # Replace the trailing "+00:00" UTC offset with "Z" (RFC 3339: https://www.ietf.org/rfc/rfc3339.txt)
+        return iso_formatted.replace("+00:00", "Z")
+    # Next try datetime.date or datetime.time
+    return o.isoformat()
+
+
+def _is_readonly(p):
+    try:
+        return p._visibility == ["read"]
+    except AttributeError:
+        return False
+
+
+class SdkJSONEncoder(JSONEncoder):
+    """A JSON encoder that's capable of serializing datetime objects and bytes."""
+
+    def __init__(self, *args, exclude_readonly: bool = False, format: typing.Optional[str] = None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.exclude_readonly = exclude_readonly
+        self.format = format
+
+    def default(self, o):  # pylint: disable=too-many-return-statements
+        if _is_model(o):
+            if self.exclude_readonly:
+                readonly_props = [p._rest_name for p in o._attr_to_rest_field.values() if _is_readonly(p)]
+                return {k: v for k, v in o.items() if k not in readonly_props}
+            return dict(o.items())
+        try:
+            return super(SdkJSONEncoder, self).default(o)
+        except TypeError:
+            if isinstance(o, _Null):
+                return None
+            if isinstance(o, decimal.Decimal):
+                return float(o)
+            if isinstance(o, (bytes, bytearray)):
+                return _serialize_bytes(o, self.format)
+            try:
+                # First try datetime.datetime
+                return _serialize_datetime(o, self.format)
+            except AttributeError:
+                pass
+            # Last, try datetime.timedelta
+            try:
+                return _timedelta_as_isostr(o)
+            except AttributeError:
+                # This will be raised when it hits value.total_seconds in the method above
+                pass
+            return super(SdkJSONEncoder, self).default(o)
+
+
+_VALID_DATE = re.compile(r"\d{4}[-]\d{2}[-]\d{2}T\d{2}:\d{2}:\d{2}" + r"\.?\d*Z?[-+]?[\d{2}]?:?[\d{2}]?")
+_VALID_RFC7231 = re.compile(
+    r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s\d{2}\s"
+    r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{4}\s\d{2}:\d{2}:\d{2}\sGMT"
+)
+
+
+def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
+    """Deserialize ISO-8601 formatted string into Datetime object.
+
+    :param str attr: response string to be deserialized.
+    :rtype: ~datetime.datetime
+    :returns: The datetime object from that input
+    """
+    if isinstance(attr, datetime):
+        # i'm already deserialized
+        return attr
+    attr = attr.upper()
+    match = _VALID_DATE.match(attr)
+    if not match:
+        raise ValueError("Invalid datetime string: " + attr)
+
+    check_decimal = attr.split(".")
+    if len(check_decimal) > 1:
+        decimal_str = ""
+        for digit in check_decimal[1]:
+            if digit.isdigit():
+                decimal_str += digit
+            else:
+                break
+        if len(decimal_str) > 6:
+            attr = attr.replace(decimal_str, decimal_str[0:6])
+
+    date_obj = isodate.parse_datetime(attr)
+    test_utc = date_obj.utctimetuple()
+    if test_utc.tm_year > 9999 or test_utc.tm_year < 1:
+        raise OverflowError("Hit max or min date")
+    return date_obj
+
+
+def _deserialize_datetime_rfc7231(attr: typing.Union[str, datetime]) -> datetime:
+    """Deserialize RFC7231 formatted string into Datetime object.
+
+    :param str attr: response string to be deserialized.
+    :rtype: ~datetime.datetime
+    :returns: The datetime object from that input
+    """
+    if isinstance(attr, datetime):
+        # i'm already deserialized
+        return attr
+    match = _VALID_RFC7231.match(attr)
+    if not match:
+        raise ValueError("Invalid datetime string: " + attr)
+
+    return email.utils.parsedate_to_datetime(attr)
+
+
+def _deserialize_datetime_unix_timestamp(attr: typing.Union[float, datetime]) -> datetime:
+    """Deserialize unix timestamp into Datetime object.
+
+    :param str attr: response string to be deserialized.
+    :rtype: ~datetime.datetime
+    :returns: The datetime object from that input
+    """
+    if isinstance(attr, datetime):
+        # i'm already deserialized
+        return attr
+    return datetime.fromtimestamp(attr, TZ_UTC)
+
+
+def _deserialize_date(attr: typing.Union[str, date]) -> date:
+    """Deserialize ISO-8601 formatted string into Date object.
+    :param str attr: response string to be deserialized.
+    :rtype: date
+    :returns: The date object from that input
+    """
+    # This must NOT use defaultmonth/defaultday. Using None ensure this raises an exception.
+    if isinstance(attr, date):
+        return attr
+    return isodate.parse_date(attr, defaultmonth=None, defaultday=None)  # type: ignore
+
+
+def _deserialize_time(attr: typing.Union[str, time]) -> time:
+    """Deserialize ISO-8601 formatted string into time object.
+
+    :param str attr: response string to be deserialized.
+    :rtype: datetime.time
+    :returns: The time object from that input
+    """
+    if isinstance(attr, time):
+        return attr
+    return isodate.parse_time(attr)
+
+
+def _deserialize_bytes(attr):
+    if isinstance(attr, (bytes, bytearray)):
+        return attr
+    return bytes(base64.b64decode(attr))
+
+
+def _deserialize_bytes_base64(attr):
+    if isinstance(attr, (bytes, bytearray)):
+        return attr
+    padding = "=" * (3 - (len(attr) + 3) % 4)  # type: ignore
+    attr = attr + padding  # type: ignore
+    encoded = attr.replace("-", "+").replace("_", "/")
+    return bytes(base64.b64decode(encoded))
+
+
+def _deserialize_duration(attr):
+    if isinstance(attr, timedelta):
+        return attr
+    return isodate.parse_duration(attr)
+
+
+def _deserialize_decimal(attr):
+    if isinstance(attr, decimal.Decimal):
+        return attr
+    return decimal.Decimal(str(attr))
+
+
+def _deserialize_int_as_str(attr):
+    if isinstance(attr, int):
+        return attr
+    return int(attr)
+
+
+_DESERIALIZE_MAPPING = {
+    datetime: _deserialize_datetime,
+    date: _deserialize_date,
+    time: _deserialize_time,
+    bytes: _deserialize_bytes,
+    bytearray: _deserialize_bytes,
+    timedelta: _deserialize_duration,
+    typing.Any: lambda x: x,
+    decimal.Decimal: _deserialize_decimal,
+}
+
+_DESERIALIZE_MAPPING_WITHFORMAT = {
+    "rfc3339": _deserialize_datetime,
+    "rfc7231": _deserialize_datetime_rfc7231,
+    "unix-timestamp": _deserialize_datetime_unix_timestamp,
+    "base64": _deserialize_bytes,
+    "base64url": _deserialize_bytes_base64,
+}
+
+
+def get_deserializer(annotation: typing.Any, rf: typing.Optional["_RestField"] = None):
+    if annotation is int and rf and rf._format == "str":
+        return _deserialize_int_as_str
+    if rf and rf._format:
+        return _DESERIALIZE_MAPPING_WITHFORMAT.get(rf._format)
+    return _DESERIALIZE_MAPPING.get(annotation)  # pyright: ignore
+
+
+def _get_type_alias_type(module_name: str, alias_name: str):
+    types = {
+        k: v
+        for k, v in sys.modules[module_name].__dict__.items()
+        if isinstance(v, typing._GenericAlias)  # type: ignore
+    }
+    if alias_name not in types:
+        return alias_name
+    return types[alias_name]
+
+
+def _get_model(module_name: str, model_name: str):
+    models = {k: v for k, v in sys.modules[module_name].__dict__.items() if isinstance(v, type)}
+    module_end = module_name.rsplit(".", 1)[0]
+    models.update({k: v for k, v in sys.modules[module_end].__dict__.items() if isinstance(v, type)})
+    if isinstance(model_name, str):
+        model_name = model_name.split(".")[-1]
+    if model_name not in models:
+        return model_name
+    return models[model_name]
+
+
+_UNSET = object()
+
+
+class _MyMutableMapping(MutableMapping[str, typing.Any]):  # pylint: disable=unsubscriptable-object
+    def __init__(self, data: typing.Dict[str, typing.Any]) -> None:
+        self._data = data
+
+    def __contains__(self, key: typing.Any) -> bool:
+        return key in self._data
+
+    def __getitem__(self, key: str) -> typing.Any:
+        return self._data.__getitem__(key)
+
+    def __setitem__(self, key: str, value: typing.Any) -> None:
+        self._data.__setitem__(key, value)
+
+    def __delitem__(self, key: str) -> None:
+        self._data.__delitem__(key)
+
+    def __iter__(self) -> typing.Iterator[typing.Any]:
+        return self._data.__iter__()
+
+    def __len__(self) -> int:
+        return self._data.__len__()
+
+    def __ne__(self, other: typing.Any) -> bool:
+        return not self.__eq__(other)
+
+    def keys(self) -> typing.KeysView[str]:
+        """
+        :returns: a set-like object providing a view on D's keys
+        :rtype: ~typing.KeysView
+        """
+        return self._data.keys()
+
+    def values(self) -> typing.ValuesView[typing.Any]:
+        """
+        :returns: an object providing a view on D's values
+        :rtype: ~typing.ValuesView
+        """
+        return self._data.values()
+
+    def items(self) -> typing.ItemsView[str, typing.Any]:
+        """
+        :returns: set-like object providing a view on D's items
+        :rtype: ~typing.ItemsView
+        """
+        return self._data.items()
+
+    def get(self, key: str, default: typing.Any = None) -> typing.Any:
+        """
+        Get the value for key if key is in the dictionary, else default.
+        :param str key: The key to look up.
+        :param any default: The value to return if key is not in the dictionary. Defaults to None
+        :returns: D[k] if k in D, else d.
+        :rtype: any
+        """
+        try:
+            return self[key]
+        except KeyError:
+            return default
+
+    @typing.overload
+    def pop(self, key: str) -> typing.Any: ...
+
+    @typing.overload
+    def pop(self, key: str, default: _T) -> _T: ...
+
+    @typing.overload
+    def pop(self, key: str, default: typing.Any) -> typing.Any: ...
+
+    def pop(self, key: str, default: typing.Any = _UNSET) -> typing.Any:
+        """
+        Removes specified key and return the corresponding value.
+        :param str key: The key to pop.
+        :param any default: The value to return if key is not in the dictionary
+        :returns: The value corresponding to the key.
+        :rtype: any
+        :raises KeyError: If key is not found and default is not given.
+        """
+        if default is _UNSET:
+            return self._data.pop(key)
+        return self._data.pop(key, default)
+
+    def popitem(self) -> typing.Tuple[str, typing.Any]:
+        """
+        Removes and returns some (key, value) pair
+        :returns: The (key, value) pair.
+        :rtype: tuple
+        :raises KeyError: if D is empty.
+        """
+        return self._data.popitem()
+
+    def clear(self) -> None:
+        """
+        Remove all items from D.
+        """
+        self._data.clear()
+
+    def update(self, *args: typing.Any, **kwargs: typing.Any) -> None:
+        """
+        Updates D from mapping/iterable E and F.
+        :param any args: Either a mapping object or an iterable of key-value pairs.
+        """
+        self._data.update(*args, **kwargs)
+
+    @typing.overload
+    def setdefault(self, key: str, default: None = None) -> None: ...
+
+    @typing.overload
+    def setdefault(self, key: str, default: typing.Any) -> typing.Any: ...
+
+    def setdefault(self, key: str, default: typing.Any = _UNSET) -> typing.Any:
+        """
+        Same as calling D.get(k, d), and setting D[k]=d if k not found
+        :param str key: The key to look up.
+        :param any default: The value to set if key is not in the dictionary
+        :returns: D[k] if k in D, else d.
+        :rtype: any
+        """
+        if default is _UNSET:
+            return self._data.setdefault(key)
+        return self._data.setdefault(key, default)
+
+    def __eq__(self, other: typing.Any) -> bool:
+        try:
+            other_model = self.__class__(other)
+        except Exception:
+            return False
+        return self._data == other_model._data
+
+    def __repr__(self) -> str:
+        return str(self._data)
+
+
+def _is_model(obj: typing.Any) -> bool:
+    return getattr(obj, "_is_model", False)
+
+
+def _serialize(o, format: typing.Optional[str] = None):  # pylint: disable=too-many-return-statements
+    if isinstance(o, list):
+        return [_serialize(x, format) for x in o]
+    if isinstance(o, dict):
+        return {k: _serialize(v, format) for k, v in o.items()}
+    if isinstance(o, set):
+        return {_serialize(x, format) for x in o}
+    if isinstance(o, tuple):
+        return tuple(_serialize(x, format) for x in o)
+    if isinstance(o, (bytes, bytearray)):
+        return _serialize_bytes(o, format)
+    if isinstance(o, decimal.Decimal):
+        return float(o)
+    if isinstance(o, enum.Enum):
+        return o.value
+    if isinstance(o, int):
+        if format == "str":
+            return str(o)
+        return o
+    try:
+        # First try datetime.datetime
+        return _serialize_datetime(o, format)
+    except AttributeError:
+        pass
+    # Last, try datetime.timedelta
+    try:
+        return _timedelta_as_isostr(o)
+    except AttributeError:
+        # This will be raised when it hits value.total_seconds in the method above
+        pass
+    return o
+
+
+def _get_rest_field(
+    attr_to_rest_field: typing.Dict[str, "_RestField"], rest_name: str
+) -> typing.Optional["_RestField"]:
+    try:
+        return next(rf for rf in attr_to_rest_field.values() if rf._rest_name == rest_name)
+    except StopIteration:
+        return None
+
+
+def _create_value(rf: typing.Optional["_RestField"], value: typing.Any) -> typing.Any:
+    if not rf:
+        return _serialize(value, None)
+    if rf._is_multipart_file_input:
+        return value
+    if rf._is_model:
+        return _deserialize(rf._type, value)
+    if isinstance(value, ET.Element):
+        value = _deserialize(rf._type, value)
+    return _serialize(value, rf._format)
+
+
+class Model(_MyMutableMapping):
+    _is_model = True
+    # label whether current class's _attr_to_rest_field has been calculated
+    # could not see _attr_to_rest_field directly because subclass inherits it from parent class
+    _calculated: typing.Set[str] = set()
+
+    def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None:
+        class_name = self.__class__.__name__
+        if len(args) > 1:
+            raise TypeError(f"{class_name}.__init__() takes 2 positional arguments but {len(args) + 1} were given")
+        dict_to_pass = {
+            rest_field._rest_name: rest_field._default
+            for rest_field in self._attr_to_rest_field.values()
+            if rest_field._default is not _UNSET
+        }
+        if args:  # pylint: disable=too-many-nested-blocks
+            if isinstance(args[0], ET.Element):
+                existed_attr_keys = []
+                model_meta = getattr(self, "_xml", {})
+
+                for rf in self._attr_to_rest_field.values():
+                    prop_meta = getattr(rf, "_xml", {})
+                    xml_name = prop_meta.get("name", rf._rest_name)
+                    xml_ns = prop_meta.get("ns", model_meta.get("ns", None))
+                    if xml_ns:
+                        xml_name = "{" + xml_ns + "}" + xml_name
+
+                    # attribute
+                    if prop_meta.get("attribute", False) and args[0].get(xml_name) is not None:
+                        existed_attr_keys.append(xml_name)
+                        dict_to_pass[rf._rest_name] = _deserialize(rf._type, args[0].get(xml_name))
+                        continue
+
+                    # unwrapped element is array
+                    if prop_meta.get("unwrapped", False):
+                        # unwrapped array could either use prop items meta/prop meta
+                        if prop_meta.get("itemsName"):
+                            xml_name = prop_meta.get("itemsName")
+                            xml_ns = prop_meta.get("itemNs")
+                            if xml_ns:
+                                xml_name = "{" + xml_ns + "}" + xml_name
+                        items = args[0].findall(xml_name)  # pyright: ignore
+                        if len(items) > 0:
+                            existed_attr_keys.append(xml_name)
+                            dict_to_pass[rf._rest_name] = _deserialize(rf._type, items)
+                        continue
+
+                    # text element is primitive type
+                    if prop_meta.get("text", False):
+                        if args[0].text is not None:
+                            dict_to_pass[rf._rest_name] = _deserialize(rf._type, args[0].text)
+                        continue
+
+                    # wrapped element could be normal property or array, it should only have one element
+                    item = args[0].find(xml_name)
+                    if item is not None:
+                        existed_attr_keys.append(xml_name)
+                        dict_to_pass[rf._rest_name] = _deserialize(rf._type, item)
+
+                # rest thing is additional properties
+                for e in args[0]:
+                    if e.tag not in existed_attr_keys:
+                        dict_to_pass[e.tag] = _convert_element(e)
+            else:
+                dict_to_pass.update(
+                    {k: _create_value(_get_rest_field(self._attr_to_rest_field, k), v) for k, v in args[0].items()}
+                )
+        else:
+            non_attr_kwargs = [k for k in kwargs if k not in self._attr_to_rest_field]
+            if non_attr_kwargs:
+                # actual type errors only throw the first wrong keyword arg they see, so following that.
+                raise TypeError(f"{class_name}.__init__() got an unexpected keyword argument '{non_attr_kwargs[0]}'")
+            dict_to_pass.update(
+                {
+                    self._attr_to_rest_field[k]._rest_name: _create_value(self._attr_to_rest_field[k], v)
+                    for k, v in kwargs.items()
+                    if v is not None
+                }
+            )
+        super().__init__(dict_to_pass)
+
+    def copy(self) -> "Model":
+        return Model(self.__dict__)
+
+    def __new__(cls, *args: typing.Any, **kwargs: typing.Any) -> Self:
+        if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated:
+            # we know the last nine classes in mro are going to be 'Model', '_MyMutableMapping', 'MutableMapping',
+            # 'Mapping', 'Collection', 'Sized', 'Iterable', 'Container' and 'object'
+            mros = cls.__mro__[:-9][::-1]  # ignore parents, and reverse the mro order
+            attr_to_rest_field: typing.Dict[str, _RestField] = {  # map attribute name to rest_field property
+                k: v for mro_class in mros for k, v in mro_class.__dict__.items() if k[0] != "_" and hasattr(v, "_type")
+            }
+            annotations = {
+                k: v
+                for mro_class in mros
+                if hasattr(mro_class, "__annotations__")
+                for k, v in mro_class.__annotations__.items()
+            }
+            for attr, rf in attr_to_rest_field.items():
+                rf._module = cls.__module__
+                if not rf._type:
+                    rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None))
+                if not rf._rest_name_input:
+                    rf._rest_name_input = attr
+            cls._attr_to_rest_field: typing.Dict[str, _RestField] = dict(attr_to_rest_field.items())
+            cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}")
+
+        return super().__new__(cls)  # pylint: disable=no-value-for-parameter
+
+    def __init_subclass__(cls, discriminator: typing.Optional[str] = None) -> None:
+        for base in cls.__bases__:
+            if hasattr(base, "__mapping__"):
+                base.__mapping__[discriminator or cls.__name__] = cls  # type: ignore
+
+    @classmethod
+    def _get_discriminator(cls, exist_discriminators) -> typing.Optional["_RestField"]:
+        for v in cls.__dict__.values():
+            if isinstance(v, _RestField) and v._is_discriminator and v._rest_name not in exist_discriminators:
+                return v
+        return None
+
+    @classmethod
+    def _deserialize(cls, data, exist_discriminators):
+        if not hasattr(cls, "__mapping__"):
+            return cls(data)
+        discriminator = cls._get_discriminator(exist_discriminators)
+        if discriminator is None:
+            return cls(data)
+        exist_discriminators.append(discriminator._rest_name)
+        if isinstance(data, ET.Element):
+            model_meta = getattr(cls, "_xml", {})
+            prop_meta = getattr(discriminator, "_xml", {})
+            xml_name = prop_meta.get("name", discriminator._rest_name)
+            xml_ns = prop_meta.get("ns", model_meta.get("ns", None))
+            if xml_ns:
+                xml_name = "{" + xml_ns + "}" + xml_name
+
+            if data.get(xml_name) is not None:
+                discriminator_value = data.get(xml_name)
+            else:
+                discriminator_value = data.find(xml_name).text  # pyright: ignore
+        else:
+            discriminator_value = data.get(discriminator._rest_name)
+        mapped_cls = cls.__mapping__.get(discriminator_value, cls)  # pyright: ignore
+        return mapped_cls._deserialize(data, exist_discriminators)
+
+    def as_dict(self, *, exclude_readonly: bool = False) -> typing.Dict[str, typing.Any]:
+        """Return a dict that can be turned into json using json.dump.
+
+        :keyword bool exclude_readonly: Whether to remove the readonly properties.
+        :returns: A dict JSON compatible object
+        :rtype: dict
+        """
+
+        result = {}
+        readonly_props = []
+        if exclude_readonly:
+            readonly_props = [p._rest_name for p in self._attr_to_rest_field.values() if _is_readonly(p)]
+        for k, v in self.items():
+            if exclude_readonly and k in readonly_props:  # pyright: ignore
+                continue
+            is_multipart_file_input = False
+            try:
+                is_multipart_file_input = next(
+                    rf for rf in self._attr_to_rest_field.values() if rf._rest_name == k
+                )._is_multipart_file_input
+            except StopIteration:
+                pass
+            result[k] = v if is_multipart_file_input else Model._as_dict_value(v, exclude_readonly=exclude_readonly)
+        return result
+
+    @staticmethod
+    def _as_dict_value(v: typing.Any, exclude_readonly: bool = False) -> typing.Any:
+        if v is None or isinstance(v, _Null):
+            return None
+        if isinstance(v, (list, tuple, set)):
+            return type(v)(Model._as_dict_value(x, exclude_readonly=exclude_readonly) for x in v)
+        if isinstance(v, dict):
+            return {dk: Model._as_dict_value(dv, exclude_readonly=exclude_readonly) for dk, dv in v.items()}
+        return v.as_dict(exclude_readonly=exclude_readonly) if hasattr(v, "as_dict") else v
+
+
+def _deserialize_model(model_deserializer: typing.Optional[typing.Callable], obj):
+    if _is_model(obj):
+        return obj
+    return _deserialize(model_deserializer, obj)
+
+
+def _deserialize_with_optional(if_obj_deserializer: typing.Optional[typing.Callable], obj):
+    if obj is None:
+        return obj
+    return _deserialize_with_callable(if_obj_deserializer, obj)
+
+
+def _deserialize_with_union(deserializers, obj):
+    for deserializer in deserializers:
+        try:
+            return _deserialize(deserializer, obj)
+        except DeserializationError:
+            pass
+    raise DeserializationError()
+
+
+def _deserialize_dict(
+    value_deserializer: typing.Optional[typing.Callable],
+    module: typing.Optional[str],
+    obj: typing.Dict[typing.Any, typing.Any],
+):
+    if obj is None:
+        return obj
+    if isinstance(obj, ET.Element):
+        obj = {child.tag: child for child in obj}
+    return {k: _deserialize(value_deserializer, v, module) for k, v in obj.items()}
+
+
+def _deserialize_multiple_sequence(
+    entry_deserializers: typing.List[typing.Optional[typing.Callable]],
+    module: typing.Optional[str],
+    obj,
+):
+    if obj is None:
+        return obj
+    return type(obj)(_deserialize(deserializer, entry, module) for entry, deserializer in zip(obj, entry_deserializers))
+
+
+def _deserialize_sequence(
+    deserializer: typing.Optional[typing.Callable],
+    module: typing.Optional[str],
+    obj,
+):
+    if obj is None:
+        return obj
+    if isinstance(obj, ET.Element):
+        obj = list(obj)
+    return type(obj)(_deserialize(deserializer, entry, module) for entry in obj)
+
+
+def _sorted_annotations(types: typing.List[typing.Any]) -> typing.List[typing.Any]:
+    return sorted(
+        types,
+        key=lambda x: hasattr(x, "__name__") and x.__name__.lower() in ("str", "float", "int", "bool"),
+    )
+
+
+def _get_deserialize_callable_from_annotation(  # pylint: disable=too-many-return-statements, too-many-branches
+    annotation: typing.Any,
+    module: typing.Optional[str],
+    rf: typing.Optional["_RestField"] = None,
+) -> typing.Optional[typing.Callable[[typing.Any], typing.Any]]:
+    if not annotation:
+        return None
+
+    # is it a type alias?
+    if isinstance(annotation, str):
+        if module is not None:
+            annotation = _get_type_alias_type(module, annotation)
+
+    # is it a forward ref / in quotes?
+    if isinstance(annotation, (str, typing.ForwardRef)):
+        try:
+            model_name = annotation.__forward_arg__  # type: ignore
+        except AttributeError:
+            model_name = annotation
+        if module is not None:
+            annotation = _get_model(module, model_name)  # type: ignore
+
+    try:
+        if module and _is_model(annotation):
+            if rf:
+                rf._is_model = True
+
+            return functools.partial(_deserialize_model, annotation)  # pyright: ignore
+    except Exception:
+        pass
+
+    # is it a literal?
+    try:
+        if annotation.__origin__ is typing.Literal:  # pyright: ignore
+            return None
+    except AttributeError:
+        pass
+
+    # is it optional?
+    try:
+        if any(a for a in annotation.__args__ if a == type(None)):  # pyright: ignore
+            if len(annotation.__args__) <= 2:  # pyright: ignore
+                if_obj_deserializer = _get_deserialize_callable_from_annotation(
+                    next(a for a in annotation.__args__ if a != type(None)), module, rf  # pyright: ignore
+                )
+
+                return functools.partial(_deserialize_with_optional, if_obj_deserializer)
+            # the type is Optional[Union[...]], we need to remove the None type from the Union
+            annotation_copy = copy.copy(annotation)
+            annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a != type(None)]  # pyright: ignore
+            return _get_deserialize_callable_from_annotation(annotation_copy, module, rf)
+    except AttributeError:
+        pass
+
+    # is it union?
+    if getattr(annotation, "__origin__", None) is typing.Union:
+        # initial ordering is we make `string` the last deserialization option, because it is often them most generic
+        deserializers = [
+            _get_deserialize_callable_from_annotation(arg, module, rf)
+            for arg in _sorted_annotations(annotation.__args__)  # pyright: ignore
+        ]
+
+        return functools.partial(_deserialize_with_union, deserializers)
+
+    try:
+        if annotation._name == "Dict":  # pyright: ignore
+            value_deserializer = _get_deserialize_callable_from_annotation(
+                annotation.__args__[1], module, rf  # pyright: ignore
+            )
+
+            return functools.partial(
+                _deserialize_dict,
+                value_deserializer,
+                module,
+            )
+    except (AttributeError, IndexError):
+        pass
+    try:
+        if annotation._name in ["List", "Set", "Tuple", "Sequence"]:  # pyright: ignore
+            if len(annotation.__args__) > 1:  # pyright: ignore
+                entry_deserializers = [
+                    _get_deserialize_callable_from_annotation(dt, module, rf)
+                    for dt in annotation.__args__  # pyright: ignore
+                ]
+                return functools.partial(_deserialize_multiple_sequence, entry_deserializers, module)
+            deserializer = _get_deserialize_callable_from_annotation(
+                annotation.__args__[0], module, rf  # pyright: ignore
+            )
+
+            return functools.partial(_deserialize_sequence, deserializer, module)
+    except (TypeError, IndexError, AttributeError, SyntaxError):
+        pass
+
+    def _deserialize_default(
+        deserializer,
+        obj,
+    ):
+        if obj is None:
+            return obj
+        try:
+            return _deserialize_with_callable(deserializer, obj)
+        except Exception:
+            pass
+        return obj
+
+    if get_deserializer(annotation, rf):
+        return functools.partial(_deserialize_default, get_deserializer(annotation, rf))
+
+    return functools.partial(_deserialize_default, annotation)
+
+
+def _deserialize_with_callable(
+    deserializer: typing.Optional[typing.Callable[[typing.Any], typing.Any]],
+    value: typing.Any,
+):  # pylint: disable=too-many-return-statements
+    try:
+        if value is None or isinstance(value, _Null):
+            return None
+        if isinstance(value, ET.Element):
+            if deserializer is str:
+                return value.text or ""
+            if deserializer is int:
+                return int(value.text) if value.text else None
+            if deserializer is float:
+                return float(value.text) if value.text else None
+            if deserializer is bool:
+                return value.text == "true" if value.text else None
+        if deserializer is None:
+            return value
+        if deserializer in [int, float, bool]:
+            return deserializer(value)
+        if isinstance(deserializer, CaseInsensitiveEnumMeta):
+            try:
+                return deserializer(value)
+            except ValueError:
+                # for unknown value, return raw value
+                return value
+        if isinstance(deserializer, type) and issubclass(deserializer, Model):
+            return deserializer._deserialize(value, [])
+        return typing.cast(typing.Callable[[typing.Any], typing.Any], deserializer)(value)
+    except Exception as e:
+        raise DeserializationError() from e
+
+
+def _deserialize(
+    deserializer: typing.Any,
+    value: typing.Any,
+    module: typing.Optional[str] = None,
+    rf: typing.Optional["_RestField"] = None,
+    format: typing.Optional[str] = None,
+) -> typing.Any:
+    if isinstance(value, PipelineResponse):
+        value = value.http_response.json()
+    if rf is None and format:
+        rf = _RestField(format=format)
+    if not isinstance(deserializer, functools.partial):
+        deserializer = _get_deserialize_callable_from_annotation(deserializer, module, rf)
+    return _deserialize_with_callable(deserializer, value)
+
+
+def _failsafe_deserialize(
+    deserializer: typing.Any,
+    value: typing.Any,
+    module: typing.Optional[str] = None,
+    rf: typing.Optional["_RestField"] = None,
+    format: typing.Optional[str] = None,
+) -> typing.Any:
+    try:
+        return _deserialize(deserializer, value, module, rf, format)
+    except DeserializationError:
+        _LOGGER.warning(
+            "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
+        )
+        return None
+
+
+def _failsafe_deserialize_xml(
+    deserializer: typing.Any,
+    value: typing.Any,
+) -> typing.Any:
+    try:
+        return _deserialize_xml(deserializer, value)
+    except DeserializationError:
+        _LOGGER.warning(
+            "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
+        )
+        return None
+
+
+class _RestField:
+    def __init__(
+        self,
+        *,
+        name: typing.Optional[str] = None,
+        type: typing.Optional[typing.Callable] = None,  # pylint: disable=redefined-builtin
+        is_discriminator: bool = False,
+        visibility: typing.Optional[typing.List[str]] = None,
+        default: typing.Any = _UNSET,
+        format: typing.Optional[str] = None,
+        is_multipart_file_input: bool = False,
+        xml: typing.Optional[typing.Dict[str, typing.Any]] = None,
+    ):
+        self._type = type
+        self._rest_name_input = name
+        self._module: typing.Optional[str] = None
+        self._is_discriminator = is_discriminator
+        self._visibility = visibility
+        self._is_model = False
+        self._default = default
+        self._format = format
+        self._is_multipart_file_input = is_multipart_file_input
+        self._xml = xml if xml is not None else {}
+
+    @property
+    def _class_type(self) -> typing.Any:
+        return getattr(self._type, "args", [None])[0]
+
+    @property
+    def _rest_name(self) -> str:
+        if self._rest_name_input is None:
+            raise ValueError("Rest name was never set")
+        return self._rest_name_input
+
+    def __get__(self, obj: Model, type=None):  # pylint: disable=redefined-builtin
+        # by this point, type and rest_name will have a value bc we default
+        # them in __new__ of the Model class
+        item = obj.get(self._rest_name)
+        if item is None:
+            return item
+        if self._is_model:
+            return item
+        return _deserialize(self._type, _serialize(item, self._format), rf=self)
+
+    def __set__(self, obj: Model, value) -> None:
+        if value is None:
+            # we want to wipe out entries if users set attr to None
+            try:
+                obj.__delitem__(self._rest_name)
+            except KeyError:
+                pass
+            return
+        if self._is_model:
+            if not _is_model(value):
+                value = _deserialize(self._type, value)
+            obj.__setitem__(self._rest_name, value)
+            return
+        obj.__setitem__(self._rest_name, _serialize(value, self._format))
+
+    def _get_deserialize_callable_from_annotation(
+        self, annotation: typing.Any
+    ) -> typing.Optional[typing.Callable[[typing.Any], typing.Any]]:
+        return _get_deserialize_callable_from_annotation(annotation, self._module, self)
+
+
+def rest_field(
+    *,
+    name: typing.Optional[str] = None,
+    type: typing.Optional[typing.Callable] = None,  # pylint: disable=redefined-builtin
+    visibility: typing.Optional[typing.List[str]] = None,
+    default: typing.Any = _UNSET,
+    format: typing.Optional[str] = None,
+    is_multipart_file_input: bool = False,
+    xml: typing.Optional[typing.Dict[str, typing.Any]] = None,
+) -> typing.Any:
+    return _RestField(
+        name=name,
+        type=type,
+        visibility=visibility,
+        default=default,
+        format=format,
+        is_multipart_file_input=is_multipart_file_input,
+        xml=xml,
+    )
+
+
+def rest_discriminator(
+    *,
+    name: typing.Optional[str] = None,
+    type: typing.Optional[typing.Callable] = None,  # pylint: disable=redefined-builtin
+    visibility: typing.Optional[typing.List[str]] = None,
+    xml: typing.Optional[typing.Dict[str, typing.Any]] = None,
+) -> typing.Any:
+    return _RestField(name=name, type=type, is_discriminator=True, visibility=visibility, xml=xml)
+
+
+def serialize_xml(model: Model, exclude_readonly: bool = False) -> str:
+    """Serialize a model to XML.
+
+    :param Model model: The model to serialize.
+    :param bool exclude_readonly: Whether to exclude readonly properties.
+    :returns: The XML representation of the model.
+    :rtype: str
+    """
+    return ET.tostring(_get_element(model, exclude_readonly), encoding="unicode")  # type: ignore
+
+
+def _get_element(
+    o: typing.Any,
+    exclude_readonly: bool = False,
+    parent_meta: typing.Optional[typing.Dict[str, typing.Any]] = None,
+    wrapped_element: typing.Optional[ET.Element] = None,
+) -> typing.Union[ET.Element, typing.List[ET.Element]]:
+    if _is_model(o):
+        model_meta = getattr(o, "_xml", {})
+
+        # if prop is a model, then use the prop element directly, else generate a wrapper of model
+        if wrapped_element is None:
+            wrapped_element = _create_xml_element(
+                model_meta.get("name", o.__class__.__name__),
+                model_meta.get("prefix"),
+                model_meta.get("ns"),
+            )
+
+        readonly_props = []
+        if exclude_readonly:
+            readonly_props = [p._rest_name for p in o._attr_to_rest_field.values() if _is_readonly(p)]
+
+        for k, v in o.items():
+            # do not serialize readonly properties
+            if exclude_readonly and k in readonly_props:
+                continue
+
+            prop_rest_field = _get_rest_field(o._attr_to_rest_field, k)
+            if prop_rest_field:
+                prop_meta = getattr(prop_rest_field, "_xml").copy()
+                # use the wire name as xml name if no specific name is set
+                if prop_meta.get("name") is None:
+                    prop_meta["name"] = k
+            else:
+                # additional properties will not have rest field, use the wire name as xml name
+                prop_meta = {"name": k}
+
+            # if no ns for prop, use model's
+            if prop_meta.get("ns") is None and model_meta.get("ns"):
+                prop_meta["ns"] = model_meta.get("ns")
+                prop_meta["prefix"] = model_meta.get("prefix")
+
+            if prop_meta.get("unwrapped", False):
+                # unwrapped could only set on array
+                wrapped_element.extend(_get_element(v, exclude_readonly, prop_meta))
+            elif prop_meta.get("text", False):
+                # text could only set on primitive type
+                wrapped_element.text = _get_primitive_type_value(v)
+            elif prop_meta.get("attribute", False):
+                xml_name = prop_meta.get("name", k)
+                if prop_meta.get("ns"):
+                    ET.register_namespace(prop_meta.get("prefix"), prop_meta.get("ns"))  # pyright: ignore
+                    xml_name = "{" + prop_meta.get("ns") + "}" + xml_name  # pyright: ignore
+                # attribute should be primitive type
+                wrapped_element.set(xml_name, _get_primitive_type_value(v))
+            else:
+                # other wrapped prop element
+                wrapped_element.append(_get_wrapped_element(v, exclude_readonly, prop_meta))
+        return wrapped_element
+    if isinstance(o, list):
+        return [_get_element(x, exclude_readonly, parent_meta) for x in o]  # type: ignore
+    if isinstance(o, dict):
+        result = []
+        for k, v in o.items():
+            result.append(
+                _get_wrapped_element(
+                    v,
+                    exclude_readonly,
+                    {
+                        "name": k,
+                        "ns": parent_meta.get("ns") if parent_meta else None,
+                        "prefix": parent_meta.get("prefix") if parent_meta else None,
+                    },
+                )
+            )
+        return result
+
+    # primitive case need to create element based on parent_meta
+    if parent_meta:
+        return _get_wrapped_element(
+            o,
+            exclude_readonly,
+            {
+                "name": parent_meta.get("itemsName", parent_meta.get("name")),
+                "prefix": parent_meta.get("itemsPrefix", parent_meta.get("prefix")),
+                "ns": parent_meta.get("itemsNs", parent_meta.get("ns")),
+            },
+        )
+
+    raise ValueError("Could not serialize value into xml: " + o)
+
+
+def _get_wrapped_element(
+    v: typing.Any,
+    exclude_readonly: bool,
+    meta: typing.Optional[typing.Dict[str, typing.Any]],
+) -> ET.Element:
+    wrapped_element = _create_xml_element(
+        meta.get("name") if meta else None, meta.get("prefix") if meta else None, meta.get("ns") if meta else None
+    )
+    if isinstance(v, (dict, list)):
+        wrapped_element.extend(_get_element(v, exclude_readonly, meta))
+    elif _is_model(v):
+        _get_element(v, exclude_readonly, meta, wrapped_element)
+    else:
+        wrapped_element.text = _get_primitive_type_value(v)
+    return wrapped_element
+
+
+def _get_primitive_type_value(v) -> str:
+    if v is True:
+        return "true"
+    if v is False:
+        return "false"
+    if isinstance(v, _Null):
+        return ""
+    return str(v)
+
+
+def _create_xml_element(tag, prefix=None, ns=None):
+    if prefix and ns:
+        ET.register_namespace(prefix, ns)
+    if ns:
+        return ET.Element("{" + ns + "}" + tag)
+    return ET.Element(tag)
+
+
+def _deserialize_xml(
+    deserializer: typing.Any,
+    value: str,
+) -> typing.Any:
+    element = ET.fromstring(value)  # nosec
+    return _deserialize(deserializer, element)
+
+
+def _convert_element(e: ET.Element):
+    # dict case
+    if len(e.attrib) > 0 or len({child.tag for child in e}) > 1:
+        dict_result: typing.Dict[str, typing.Any] = {}
+        for child in e:
+            if dict_result.get(child.tag) is not None:
+                if isinstance(dict_result[child.tag], list):
+                    dict_result[child.tag].append(_convert_element(child))
+                else:
+                    dict_result[child.tag] = [dict_result[child.tag], _convert_element(child)]
+            else:
+                dict_result[child.tag] = _convert_element(child)
+        dict_result.update(e.attrib)
+        return dict_result
+    # array case
+    if len(e) > 0:
+        array_result: typing.List[typing.Any] = []
+        for child in e:
+            array_result.append(_convert_element(child))
+        return array_result
+    # primitive case
+    return e.text
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/__init__.py
new file mode 100644
index 00000000..ab870887
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/__init__.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._operations import ChatCompletionsClientOperationsMixin  # type: ignore
+from ._operations import EmbeddingsClientOperationsMixin  # type: ignore
+from ._operations import ImageEmbeddingsClientOperationsMixin  # type: ignore
+
+from ._patch import __all__ as _patch_all
+from ._patch import *
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClientOperationsMixin",
+    "EmbeddingsClientOperationsMixin",
+    "ImageEmbeddingsClientOperationsMixin",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_operations.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_operations.py
new file mode 100644
index 00000000..78e5ee35
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_operations.py
@@ -0,0 +1,912 @@
+# pylint: disable=too-many-locals
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from io import IOBase
+import json
+import sys
+from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+    StreamClosedError,
+    StreamConsumedError,
+    map_error,
+)
+from azure.core.pipeline import PipelineResponse
+from azure.core.rest import HttpRequest, HttpResponse
+from azure.core.tracing.decorator import distributed_trace
+from azure.core.utils import case_insensitive_dict
+
+from .. import models as _models
+from .._model_base import SdkJSONEncoder, _deserialize
+from .._serialization import Serializer
+from .._vendor import ChatCompletionsClientMixinABC, EmbeddingsClientMixinABC, ImageEmbeddingsClientMixinABC
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+T = TypeVar("T")
+ClsType = Optional[Callable[[PipelineResponse[HttpRequest, HttpResponse], T, Dict[str, Any]], Any]]
+
+_SERIALIZER = Serializer()
+_SERIALIZER.client_side_validation = False
+
+
+def build_chat_completions_complete_request(
+    *, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/chat/completions"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    if extra_params is not None:
+        _headers["extra-parameters"] = _SERIALIZER.header("extra_params", extra_params, "str")
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header("content_type", content_type, "str")
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+def build_chat_completions_get_model_info_request(**kwargs: Any) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/info"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+def build_embeddings_embed_request(
+    *, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/embeddings"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    if extra_params is not None:
+        _headers["extra-parameters"] = _SERIALIZER.header("extra_params", extra_params, "str")
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header("content_type", content_type, "str")
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+def build_embeddings_get_model_info_request(**kwargs: Any) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/info"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+def build_image_embeddings_embed_request(
+    *, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/images/embeddings"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    if extra_params is not None:
+        _headers["extra-parameters"] = _SERIALIZER.header("extra_params", extra_params, "str")
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header("content_type", content_type, "str")
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+def build_image_embeddings_get_model_info_request(**kwargs: Any) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    api_version: str = kwargs.pop("api_version", _params.pop("api-version", "2024-05-01-preview"))
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/info"
+
+    # Construct parameters
+    _params["api-version"] = _SERIALIZER.query("api_version", api_version, "str")
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, params=_params, headers=_headers, **kwargs)
+
+
+class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
+
+    @overload
+    def _complete(
+        self,
+        *,
+        messages: List[_models._models.ChatRequestMessage],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    def _complete(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    def _complete(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+
+    @distributed_trace
+    def _complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: List[_models._models.ChatRequestMessage] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions:
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes"
+        provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
+        on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative
+         frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword stream_parameter: A value indicating whether chat completions should be streamed for
+         this request. Default value is None.
+        :paramtype stream_parameter: bool
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the
+         model's likelihood to output new topics.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: An object specifying the format that the model must output.
+
+         Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
+         which ensures the model will match your supplied JSON schema.
+
+         Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
+         model generates is valid JSON.
+
+         **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+         yourself via a system or user message. Without this, the model may generate an unending stream
+         of whitespace until the generation reaches the token limit, resulting in a long-running and
+         seemingly "stuck" request. Also note that the message content may be partially cut off if
+         ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+         conversation exceeded the max context length. Default value is None.
+        :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: A list of tools the model may request to call. Currently, only functions are
+         supported as a tool. The model
+         may response with a function call request and provide the input arguments in JSON format for
+         that function. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed. Default
+         value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "frequency_penalty": frequency_penalty,
+                "max_tokens": max_tokens,
+                "messages": messages,
+                "model": model,
+                "presence_penalty": presence_penalty,
+                "response_format": response_format,
+                "seed": seed,
+                "stop": stop,
+                "stream": stream_parameter,
+                "temperature": temperature,
+                "tool_choice": tool_choice,
+                "tools": tools,
+                "top_p": top_p,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ChatCompletions, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_chat_completions_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
+
+    @overload
+    def _embed(
+        self,
+        *,
+        input: List[str],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace
+    def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
+         values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
+
+    @overload
+    def _embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace
+    def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
+         should have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_image_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_patch.py
new file mode 100644
index 00000000..f7dd3251
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_operations/_patch.py
@@ -0,0 +1,20 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+from typing import List
+
+__all__: List[str] = []  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
new file mode 100644
index 00000000..da95cf93
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_patch.py
@@ -0,0 +1,1387 @@
+# pylint: disable=too-many-lines
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+
+Why do we patch auto-generated code? Below is a summary of the changes made in all _patch files (not just this one):
+1. Add support for input argument `model_extras` (all clients)
+2. Add support for function load_client
+3. Add support for setting sticky chat completions/embeddings input arguments in the client constructor
+4. Add support for get_model_info, while caching the result (all clients)
+5. Add support for chat completion streaming (ChatCompletionsClient client only)
+6. Add support for friendly print of result objects (__str__ method) (all clients)
+7. Add support for load() method in ImageUrl class (see /models/_patch.py)
+8. Add support for sending two auth headers for api-key auth (all clients)
+9. Simplify how chat completions "response_format" is set. Define "response_format" as a flat Union of strings and
+   JsonSchemaFormat object, instead of using auto-generated base/derived classes named
+   ChatCompletionsResponseFormatXxxInternal.
+10. Allow UserMessage("my message") in addition to UserMessage(content="my message"). Same applies to 
+AssistantMessage, SystemMessage, DeveloperMessage and ToolMessage.
+
+"""
+import json
+import logging
+import sys
+
+from io import IOBase
+from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, Iterable
+
+from azure.core.pipeline import PipelineResponse
+from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.decorator import distributed_trace
+from azure.core.utils import case_insensitive_dict
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    map_error,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+)
+from . import models as _models
+from ._model_base import SdkJSONEncoder, _deserialize
+from ._serialization import Serializer
+from ._operations._operations import (
+    build_chat_completions_complete_request,
+    build_embeddings_embed_request,
+    build_image_embeddings_embed_request,
+)
+from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated
+from ._client import EmbeddingsClient as EmbeddingsClientGenerated
+from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore  # pylint: disable=ungrouped-imports
+
+if TYPE_CHECKING:
+    # pylint: disable=unused-import,ungrouped-imports
+    from azure.core.credentials import TokenCredential
+
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+
+_SERIALIZER = Serializer()
+_SERIALIZER.client_side_validation = False
+
+_LOGGER = logging.getLogger(__name__)
+
+
+def _get_internal_response_format(
+    response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]]
+) -> Optional[_models._models.ChatCompletionsResponseFormat]:
+    """
+    Internal helper method to convert between the public response format type that's supported in the `complete` method,
+    and the internal response format type that's used in the generated code.
+
+    :param response_format: Response format. Required.
+    :type response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]]
+    :return: Internal response format.
+    :rtype: ~azure.ai.inference._models._models.ChatCompletionsResponseFormat
+    """
+    if response_format is not None:
+
+        # To make mypy tool happy, start by declaring the type as the base class
+        internal_response_format: _models._models.ChatCompletionsResponseFormat
+
+        if isinstance(response_format, str) and response_format == "text":
+            internal_response_format = (
+                _models._models.ChatCompletionsResponseFormatText()  # pylint: disable=protected-access
+            )
+        elif isinstance(response_format, str) and response_format == "json_object":
+            internal_response_format = (
+                _models._models.ChatCompletionsResponseFormatJsonObject()  # pylint: disable=protected-access
+            )
+        elif isinstance(response_format, _models.JsonSchemaFormat):
+            internal_response_format = (
+                _models._models.ChatCompletionsResponseFormatJsonSchema(  # pylint: disable=protected-access
+                    json_schema=response_format
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported `response_format` {response_format}")
+
+        return internal_response_format
+
+    return None
+
+
+def load_client(
+    endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any
+) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]:
+    """
+    Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route
+    on the given endpoint, to determine the model type and therefore which client to instantiate.
+    Keyword arguments are passed to the appropriate client's constructor, so if you need to set things like
+    `api_version`, `logging_enable`, `user_agent`, etc., you can do so here.
+    This method will only work when using Serverless API or Managed Compute endpoint.
+    It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+    Keyword arguments are passed through to the client constructor (you can set keywords such as
+    `api_version`, `user_agent`, `logging_enable` etc. on the client constructor).
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a TokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.TokenCredential
+    :return: The appropriate synchronous client associated with the given endpoint
+    :rtype: ~azure.ai.inference.ChatCompletionsClient or ~azure.ai.inference.EmbeddingsClient
+     or ~azure.ai.inference.ImageEmbeddingsClient
+    :raises ~azure.core.exceptions.HttpResponseError:
+    """
+
+    with ChatCompletionsClient(
+        endpoint, credential, **kwargs
+    ) as client:  # Pick any of the clients, it does not matter.
+        try:
+            model_info = client.get_model_info()  # type: ignore
+        except ResourceNotFoundError as error:
+            error.message = (
+                "`load_client` function does not work on this endpoint (`/info` route not supported). "
+                "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly."
+            )
+            raise error
+
+    _LOGGER.info("model_info=%s", model_info)
+    if not model_info.model_type:
+        raise ValueError(
+            "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
+        )
+
+    # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+    if model_info.model_type in (
+        _models.ModelType.CHAT_COMPLETION,
+        "chat_completions",
+        "chat",
+        "completion",
+        "chat-completion",
+        "chat-completions",
+        "chat completion",
+        "chat completions",
+    ):
+        chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
+        chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return chat_completion_client
+
+    if model_info.model_type in (
+        _models.ModelType.EMBEDDINGS,
+        "embedding",
+        "text_embedding",
+        "text-embeddings",
+        "text embedding",
+        "text embeddings",
+    ):
+        embedding_client = EmbeddingsClient(endpoint, credential, **kwargs)
+        embedding_client._model_info = model_info  # pylint: disable=protected-access,attribute-defined-outside-init
+        return embedding_client
+
+    if model_info.model_type in (
+        _models.ModelType.IMAGE_EMBEDDINGS,
+        "image_embedding",
+        "image-embeddings",
+        "image-embedding",
+        "image embedding",
+        "image embeddings",
+    ):
+        image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs)
+        image_embedding_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return image_embedding_client
+
+    raise ValueError(f"No client available to support AI model type `{model_info.model_type}`")
+
+
+class ChatCompletionsClient(ChatCompletionsClientGenerated):  # pylint: disable=too-many-instance-attributes
+    """ChatCompletionsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a TokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.TokenCredential
+    :keyword frequency_penalty: A value that influences the probability of generated tokens
+        appearing based on their cumulative frequency in generated text.
+        Positive values will make tokens less likely to appear as their frequency increases and
+        decrease the likelihood of the model repeating the same statements verbatim.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype frequency_penalty: float
+    :keyword presence_penalty: A value that influences the probability of generated tokens
+        appearing based on their existing
+        presence in generated text.
+        Positive values will make tokens less likely to appear when they already exist and increase
+        the model's likelihood to output new topics.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype presence_penalty: float
+    :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+        generated completions.
+        Higher values will make output more random while lower values will make results more focused
+        and deterministic.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype temperature: float
+    :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+        causes the
+        model to consider the results of tokens with the provided probability mass. As an example, a
+        value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        considered.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype top_p: float
+    :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+    :paramtype max_tokens: int
+    :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+        unformatted text by default. This is equivalent to setting "text" as the response_format.
+        To output JSON format, without adhering to any schema, set to "json_object".
+        To output JSON format adhering to a provided schema, set this to an object of the class
+        ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+    :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+    :keyword stop: A collection of textual sequences that will end completions generation. Default
+        value is None.
+    :paramtype stop: list[str]
+    :keyword tools: The available tool definitions that the chat completions request can use,
+        including caller-defined functions. Default value is None.
+    :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+    :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+        use for the chat completions response. Is either a Union[str,
+        "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+        Default value is None.
+    :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+        ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+    :keyword seed: If specified, the system will make a best effort to sample deterministically
+        such that repeated requests with the
+        same seed and parameters should return the same result. Determinism is not guaranteed.
+        Default value is None.
+    :paramtype seed: int
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "TokenCredential"],
+        *,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default chat completions settings, to be applied in all future service calls
+        # unless overridden by arguments in the `complete` method.
+        self._frequency_penalty = frequency_penalty
+        self._presence_penalty = presence_penalty
+        self._temperature = temperature
+        self._top_p = top_p
+        self._max_tokens = max_tokens
+        self._internal_response_format = _get_internal_response_format(response_format)
+        self._stop = stop
+        self._tools = tools
+        self._tool_choice = tool_choice
+        self._seed = seed
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    def complete(
+        self,
+        *,
+        messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+        stream: Literal[False] = False,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.ChatCompletions: ...
+
+    @overload
+    def complete(
+        self,
+        *,
+        messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+        stream: Literal[True],
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Iterable[_models.StreamingChatCompletionsUpdate]: ...
+
+    @overload
+    def complete(
+        self,
+        *,
+        messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]],
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route
+        on the given endpoint.
+        When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting StreamingChatCompletions
+        object to get content updates as they arrive. By default, the response is a ChatCompletions object
+        (non-streaming).
+
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def complete(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def complete(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        # pylint: disable=too-many-locals
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    # pylint:disable=client-method-missing-tracing-decorator
+    def complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]] = _Unset,
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        # pylint: disable=too-many-locals
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions`
+        object to get content updates as they arrive.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        internal_response_format = _get_internal_response_format(response_format)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "messages": messages,
+                "stream": stream,
+                "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty,
+                "max_tokens": max_tokens if max_tokens is not None else self._max_tokens,
+                "model": model if model is not None else self._model,
+                "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty,
+                "response_format": (
+                    internal_response_format if internal_response_format is not None else self._internal_response_format
+                ),
+                "seed": seed if seed is not None else self._seed,
+                "stop": stop if stop is not None else self._stop,
+                "temperature": temperature if temperature is not None else self._temperature,
+                "tool_choice": tool_choice if tool_choice is not None else self._tool_choice,
+                "tools": tools if tools is not None else self._tools,
+                "top_p": top_p if top_p is not None else self._top_p,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool):
+            stream = body["stream"]
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = stream or False
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            return _models.StreamingChatCompletions(response)
+
+        return _deserialize(_models._patch.ChatCompletions, response.json())  # pylint: disable=protected-access
+
+    @distributed_trace
+    def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class EmbeddingsClient(EmbeddingsClientGenerated):
+    """EmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a TokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.TokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "TokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    def embed(
+        self,
+        *,
+        input: List[str],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace
+    def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a TokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.TokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "TokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    def embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace
+    def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = self._client._pipeline.run(  # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = self._get_model_info(**kwargs)  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+__all__: List[str] = [
+    "load_client",
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_serialization.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_serialization.py
new file mode 100644
index 00000000..a066e16a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_serialization.py
@@ -0,0 +1,2050 @@
+# pylint: disable=too-many-lines
+# --------------------------------------------------------------------------
+#
+# Copyright (c) Microsoft Corporation. All rights reserved.
+#
+# The MIT License (MIT)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the ""Software""), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# --------------------------------------------------------------------------
+
+# pyright: reportUnnecessaryTypeIgnoreComment=false
+
+from base64 import b64decode, b64encode
+import calendar
+import datetime
+import decimal
+import email
+from enum import Enum
+import json
+import logging
+import re
+import sys
+import codecs
+from typing import (
+    Dict,
+    Any,
+    cast,
+    Optional,
+    Union,
+    AnyStr,
+    IO,
+    Mapping,
+    Callable,
+    MutableMapping,
+    List,
+)
+
+try:
+    from urllib import quote  # type: ignore
+except ImportError:
+    from urllib.parse import quote
+import xml.etree.ElementTree as ET
+
+import isodate  # type: ignore
+from typing_extensions import Self
+
+from azure.core.exceptions import DeserializationError, SerializationError
+from azure.core.serialization import NULL as CoreNull
+
+_BOM = codecs.BOM_UTF8.decode(encoding="utf-8")
+
+JSON = MutableMapping[str, Any]
+
+
+class RawDeserializer:
+
+    # Accept "text" because we're open minded people...
+    JSON_REGEXP = re.compile(r"^(application|text)/([a-z+.]+\+)?json$")
+
+    # Name used in context
+    CONTEXT_NAME = "deserialized_data"
+
+    @classmethod
+    def deserialize_from_text(cls, data: Optional[Union[AnyStr, IO]], content_type: Optional[str] = None) -> Any:
+        """Decode data according to content-type.
+
+        Accept a stream of data as well, but will be load at once in memory for now.
+
+        If no content-type, will return the string version (not bytes, not stream)
+
+        :param data: Input, could be bytes or stream (will be decoded with UTF8) or text
+        :type data: str or bytes or IO
+        :param str content_type: The content type.
+        :return: The deserialized data.
+        :rtype: object
+        """
+        if hasattr(data, "read"):
+            # Assume a stream
+            data = cast(IO, data).read()
+
+        if isinstance(data, bytes):
+            data_as_str = data.decode(encoding="utf-8-sig")
+        else:
+            # Explain to mypy the correct type.
+            data_as_str = cast(str, data)
+
+            # Remove Byte Order Mark if present in string
+            data_as_str = data_as_str.lstrip(_BOM)
+
+        if content_type is None:
+            return data
+
+        if cls.JSON_REGEXP.match(content_type):
+            try:
+                return json.loads(data_as_str)
+            except ValueError as err:
+                raise DeserializationError("JSON is invalid: {}".format(err), err) from err
+        elif "xml" in (content_type or []):
+            try:
+
+                try:
+                    if isinstance(data, unicode):  # type: ignore
+                        # If I'm Python 2.7 and unicode XML will scream if I try a "fromstring" on unicode string
+                        data_as_str = data_as_str.encode(encoding="utf-8")  # type: ignore
+                except NameError:
+                    pass
+
+                return ET.fromstring(data_as_str)  # nosec
+            except ET.ParseError as err:
+                # It might be because the server has an issue, and returned JSON with
+                # content-type XML....
+                # So let's try a JSON load, and if it's still broken
+                # let's flow the initial exception
+                def _json_attemp(data):
+                    try:
+                        return True, json.loads(data)
+                    except ValueError:
+                        return False, None  # Don't care about this one
+
+                success, json_result = _json_attemp(data)
+                if success:
+                    return json_result
+                # If i'm here, it's not JSON, it's not XML, let's scream
+                # and raise the last context in this block (the XML exception)
+                # The function hack is because Py2.7 messes up with exception
+                # context otherwise.
+                _LOGGER.critical("Wasn't XML not JSON, failing")
+                raise DeserializationError("XML is invalid") from err
+        elif content_type.startswith("text/"):
+            return data_as_str
+        raise DeserializationError("Cannot deserialize content-type: {}".format(content_type))
+
+    @classmethod
+    def deserialize_from_http_generics(cls, body_bytes: Optional[Union[AnyStr, IO]], headers: Mapping) -> Any:
+        """Deserialize from HTTP response.
+
+        Use bytes and headers to NOT use any requests/aiohttp or whatever
+        specific implementation.
+        Headers will tested for "content-type"
+
+        :param bytes body_bytes: The body of the response.
+        :param dict headers: The headers of the response.
+        :returns: The deserialized data.
+        :rtype: object
+        """
+        # Try to use content-type from headers if available
+        content_type = None
+        if "content-type" in headers:
+            content_type = headers["content-type"].split(";")[0].strip().lower()
+        # Ouch, this server did not declare what it sent...
+        # Let's guess it's JSON...
+        # Also, since Autorest was considering that an empty body was a valid JSON,
+        # need that test as well....
+        else:
+            content_type = "application/json"
+
+        if body_bytes:
+            return cls.deserialize_from_text(body_bytes, content_type)
+        return None
+
+
+_LOGGER = logging.getLogger(__name__)
+
+try:
+    _long_type = long  # type: ignore
+except NameError:
+    _long_type = int
+
+TZ_UTC = datetime.timezone.utc
+
+_FLATTEN = re.compile(r"(?<!\\)\.")
+
+
+def attribute_transformer(key, attr_desc, value):  # pylint: disable=unused-argument
+    """A key transformer that returns the Python attribute.
+
+    :param str key: The attribute name
+    :param dict attr_desc: The attribute metadata
+    :param object value: The value
+    :returns: A key using attribute name
+    :rtype: str
+    """
+    return (key, value)
+
+
+def full_restapi_key_transformer(key, attr_desc, value):  # pylint: disable=unused-argument
+    """A key transformer that returns the full RestAPI key path.
+
+    :param str key: The attribute name
+    :param dict attr_desc: The attribute metadata
+    :param object value: The value
+    :returns: A list of keys using RestAPI syntax.
+    :rtype: list
+    """
+    keys = _FLATTEN.split(attr_desc["key"])
+    return ([_decode_attribute_map_key(k) for k in keys], value)
+
+
+def last_restapi_key_transformer(key, attr_desc, value):
+    """A key transformer that returns the last RestAPI key.
+
+    :param str key: The attribute name
+    :param dict attr_desc: The attribute metadata
+    :param object value: The value
+    :returns: The last RestAPI key.
+    :rtype: str
+    """
+    key, value = full_restapi_key_transformer(key, attr_desc, value)
+    return (key[-1], value)
+
+
+def _create_xml_node(tag, prefix=None, ns=None):
+    """Create a XML node.
+
+    :param str tag: The tag name
+    :param str prefix: The prefix
+    :param str ns: The namespace
+    :return: The XML node
+    :rtype: xml.etree.ElementTree.Element
+    """
+    if prefix and ns:
+        ET.register_namespace(prefix, ns)
+    if ns:
+        return ET.Element("{" + ns + "}" + tag)
+    return ET.Element(tag)
+
+
+class Model:
+    """Mixin for all client request body/response body models to support
+    serialization and deserialization.
+    """
+
+    _subtype_map: Dict[str, Dict[str, Any]] = {}
+    _attribute_map: Dict[str, Dict[str, Any]] = {}
+    _validation: Dict[str, Dict[str, Any]] = {}
+
+    def __init__(self, **kwargs: Any) -> None:
+        self.additional_properties: Optional[Dict[str, Any]] = {}
+        for k in kwargs:  # pylint: disable=consider-using-dict-items
+            if k not in self._attribute_map:
+                _LOGGER.warning("%s is not a known attribute of class %s and will be ignored", k, self.__class__)
+            elif k in self._validation and self._validation[k].get("readonly", False):
+                _LOGGER.warning("Readonly attribute %s will be ignored in class %s", k, self.__class__)
+            else:
+                setattr(self, k, kwargs[k])
+
+    def __eq__(self, other: Any) -> bool:
+        """Compare objects by comparing all attributes.
+
+        :param object other: The object to compare
+        :returns: True if objects are equal
+        :rtype: bool
+        """
+        if isinstance(other, self.__class__):
+            return self.__dict__ == other.__dict__
+        return False
+
+    def __ne__(self, other: Any) -> bool:
+        """Compare objects by comparing all attributes.
+
+        :param object other: The object to compare
+        :returns: True if objects are not equal
+        :rtype: bool
+        """
+        return not self.__eq__(other)
+
+    def __str__(self) -> str:
+        return str(self.__dict__)
+
+    @classmethod
+    def enable_additional_properties_sending(cls) -> None:
+        cls._attribute_map["additional_properties"] = {"key": "", "type": "{object}"}
+
+    @classmethod
+    def is_xml_model(cls) -> bool:
+        try:
+            cls._xml_map  # type: ignore
+        except AttributeError:
+            return False
+        return True
+
+    @classmethod
+    def _create_xml_node(cls):
+        """Create XML node.
+
+        :returns: The XML node
+        :rtype: xml.etree.ElementTree.Element
+        """
+        try:
+            xml_map = cls._xml_map  # type: ignore
+        except AttributeError:
+            xml_map = {}
+
+        return _create_xml_node(xml_map.get("name", cls.__name__), xml_map.get("prefix", None), xml_map.get("ns", None))
+
+    def serialize(self, keep_readonly: bool = False, **kwargs: Any) -> JSON:
+        """Return the JSON that would be sent to server from this model.
+
+        This is an alias to `as_dict(full_restapi_key_transformer, keep_readonly=False)`.
+
+        If you want XML serialization, you can pass the kwargs is_xml=True.
+
+        :param bool keep_readonly: If you want to serialize the readonly attributes
+        :returns: A dict JSON compatible object
+        :rtype: dict
+        """
+        serializer = Serializer(self._infer_class_models())
+        return serializer._serialize(  # type: ignore # pylint: disable=protected-access
+            self, keep_readonly=keep_readonly, **kwargs
+        )
+
+    def as_dict(
+        self,
+        keep_readonly: bool = True,
+        key_transformer: Callable[[str, Dict[str, Any], Any], Any] = attribute_transformer,
+        **kwargs: Any
+    ) -> JSON:
+        """Return a dict that can be serialized using json.dump.
+
+        Advanced usage might optionally use a callback as parameter:
+
+        .. code::python
+
+            def my_key_transformer(key, attr_desc, value):
+                return key
+
+        Key is the attribute name used in Python. Attr_desc
+        is a dict of metadata. Currently contains 'type' with the
+        msrest type and 'key' with the RestAPI encoded key.
+        Value is the current value in this object.
+
+        The string returned will be used to serialize the key.
+        If the return type is a list, this is considered hierarchical
+        result dict.
+
+        See the three examples in this file:
+
+        - attribute_transformer
+        - full_restapi_key_transformer
+        - last_restapi_key_transformer
+
+        If you want XML serialization, you can pass the kwargs is_xml=True.
+
+        :param bool keep_readonly: If you want to serialize the readonly attributes
+        :param function key_transformer: A key transformer function.
+        :returns: A dict JSON compatible object
+        :rtype: dict
+        """
+        serializer = Serializer(self._infer_class_models())
+        return serializer._serialize(  # type: ignore # pylint: disable=protected-access
+            self, key_transformer=key_transformer, keep_readonly=keep_readonly, **kwargs
+        )
+
+    @classmethod
+    def _infer_class_models(cls):
+        try:
+            str_models = cls.__module__.rsplit(".", 1)[0]
+            models = sys.modules[str_models]
+            client_models = {k: v for k, v in models.__dict__.items() if isinstance(v, type)}
+            if cls.__name__ not in client_models:
+                raise ValueError("Not Autorest generated code")
+        except Exception:  # pylint: disable=broad-exception-caught
+            # Assume it's not Autorest generated (tests?). Add ourselves as dependencies.
+            client_models = {cls.__name__: cls}
+        return client_models
+
+    @classmethod
+    def deserialize(cls, data: Any, content_type: Optional[str] = None) -> Self:
+        """Parse a str using the RestAPI syntax and return a model.
+
+        :param str data: A str using RestAPI structure. JSON by default.
+        :param str content_type: JSON by default, set application/xml if XML.
+        :returns: An instance of this model
+        :raises DeserializationError: if something went wrong
+        :rtype: Self
+        """
+        deserializer = Deserializer(cls._infer_class_models())
+        return deserializer(cls.__name__, data, content_type=content_type)  # type: ignore
+
+    @classmethod
+    def from_dict(
+        cls,
+        data: Any,
+        key_extractors: Optional[Callable[[str, Dict[str, Any], Any], Any]] = None,
+        content_type: Optional[str] = None,
+    ) -> Self:
+        """Parse a dict using given key extractor return a model.
+
+        By default consider key
+        extractors (rest_key_case_insensitive_extractor, attribute_key_case_insensitive_extractor
+        and last_rest_key_case_insensitive_extractor)
+
+        :param dict data: A dict using RestAPI structure
+        :param function key_extractors: A key extractor function.
+        :param str content_type: JSON by default, set application/xml if XML.
+        :returns: An instance of this model
+        :raises: DeserializationError if something went wrong
+        :rtype: Self
+        """
+        deserializer = Deserializer(cls._infer_class_models())
+        deserializer.key_extractors = (  # type: ignore
+            [  # type: ignore
+                attribute_key_case_insensitive_extractor,
+                rest_key_case_insensitive_extractor,
+                last_rest_key_case_insensitive_extractor,
+            ]
+            if key_extractors is None
+            else key_extractors
+        )
+        return deserializer(cls.__name__, data, content_type=content_type)  # type: ignore
+
+    @classmethod
+    def _flatten_subtype(cls, key, objects):
+        if "_subtype_map" not in cls.__dict__:
+            return {}
+        result = dict(cls._subtype_map[key])
+        for valuetype in cls._subtype_map[key].values():
+            result.update(objects[valuetype]._flatten_subtype(key, objects))  # pylint: disable=protected-access
+        return result
+
+    @classmethod
+    def _classify(cls, response, objects):
+        """Check the class _subtype_map for any child classes.
+        We want to ignore any inherited _subtype_maps.
+
+        :param dict response: The initial data
+        :param dict objects: The class objects
+        :returns: The class to be used
+        :rtype: class
+        """
+        for subtype_key in cls.__dict__.get("_subtype_map", {}).keys():
+            subtype_value = None
+
+            if not isinstance(response, ET.Element):
+                rest_api_response_key = cls._get_rest_key_parts(subtype_key)[-1]
+                subtype_value = response.get(rest_api_response_key, None) or response.get(subtype_key, None)
+            else:
+                subtype_value = xml_key_extractor(subtype_key, cls._attribute_map[subtype_key], response)
+            if subtype_value:
+                # Try to match base class. Can be class name only
+                # (bug to fix in Autorest to support x-ms-discriminator-name)
+                if cls.__name__ == subtype_value:
+                    return cls
+                flatten_mapping_type = cls._flatten_subtype(subtype_key, objects)
+                try:
+                    return objects[flatten_mapping_type[subtype_value]]  # type: ignore
+                except KeyError:
+                    _LOGGER.warning(
+                        "Subtype value %s has no mapping, use base class %s.",
+                        subtype_value,
+                        cls.__name__,
+                    )
+                    break
+            else:
+                _LOGGER.warning("Discriminator %s is absent or null, use base class %s.", subtype_key, cls.__name__)
+                break
+        return cls
+
+    @classmethod
+    def _get_rest_key_parts(cls, attr_key):
+        """Get the RestAPI key of this attr, split it and decode part
+        :param str attr_key: Attribute key must be in attribute_map.
+        :returns: A list of RestAPI part
+        :rtype: list
+        """
+        rest_split_key = _FLATTEN.split(cls._attribute_map[attr_key]["key"])
+        return [_decode_attribute_map_key(key_part) for key_part in rest_split_key]
+
+
+def _decode_attribute_map_key(key):
+    """This decode a key in an _attribute_map to the actual key we want to look at
+    inside the received data.
+
+    :param str key: A key string from the generated code
+    :returns: The decoded key
+    :rtype: str
+    """
+    return key.replace("\\.", ".")
+
+
+class Serializer:  # pylint: disable=too-many-public-methods
+    """Request object model serializer."""
+
+    basic_types = {str: "str", int: "int", bool: "bool", float: "float"}
+
+    _xml_basic_types_serializers = {"bool": lambda x: str(x).lower()}
+    days = {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"}
+    months = {
+        1: "Jan",
+        2: "Feb",
+        3: "Mar",
+        4: "Apr",
+        5: "May",
+        6: "Jun",
+        7: "Jul",
+        8: "Aug",
+        9: "Sep",
+        10: "Oct",
+        11: "Nov",
+        12: "Dec",
+    }
+    validation = {
+        "min_length": lambda x, y: len(x) < y,
+        "max_length": lambda x, y: len(x) > y,
+        "minimum": lambda x, y: x < y,
+        "maximum": lambda x, y: x > y,
+        "minimum_ex": lambda x, y: x <= y,
+        "maximum_ex": lambda x, y: x >= y,
+        "min_items": lambda x, y: len(x) < y,
+        "max_items": lambda x, y: len(x) > y,
+        "pattern": lambda x, y: not re.match(y, x, re.UNICODE),
+        "unique": lambda x, y: len(x) != len(set(x)),
+        "multiple": lambda x, y: x % y != 0,
+    }
+
+    def __init__(self, classes: Optional[Mapping[str, type]] = None) -> None:
+        self.serialize_type = {
+            "iso-8601": Serializer.serialize_iso,
+            "rfc-1123": Serializer.serialize_rfc,
+            "unix-time": Serializer.serialize_unix,
+            "duration": Serializer.serialize_duration,
+            "date": Serializer.serialize_date,
+            "time": Serializer.serialize_time,
+            "decimal": Serializer.serialize_decimal,
+            "long": Serializer.serialize_long,
+            "bytearray": Serializer.serialize_bytearray,
+            "base64": Serializer.serialize_base64,
+            "object": self.serialize_object,
+            "[]": self.serialize_iter,
+            "{}": self.serialize_dict,
+        }
+        self.dependencies: Dict[str, type] = dict(classes) if classes else {}
+        self.key_transformer = full_restapi_key_transformer
+        self.client_side_validation = True
+
+    def _serialize(  # pylint: disable=too-many-nested-blocks, too-many-branches, too-many-statements, too-many-locals
+        self, target_obj, data_type=None, **kwargs
+    ):
+        """Serialize data into a string according to type.
+
+        :param object target_obj: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :rtype: str, dict
+        :raises SerializationError: if serialization fails.
+        :returns: The serialized data.
+        """
+        key_transformer = kwargs.get("key_transformer", self.key_transformer)
+        keep_readonly = kwargs.get("keep_readonly", False)
+        if target_obj is None:
+            return None
+
+        attr_name = None
+        class_name = target_obj.__class__.__name__
+
+        if data_type:
+            return self.serialize_data(target_obj, data_type, **kwargs)
+
+        if not hasattr(target_obj, "_attribute_map"):
+            data_type = type(target_obj).__name__
+            if data_type in self.basic_types.values():
+                return self.serialize_data(target_obj, data_type, **kwargs)
+
+        # Force "is_xml" kwargs if we detect a XML model
+        try:
+            is_xml_model_serialization = kwargs["is_xml"]
+        except KeyError:
+            is_xml_model_serialization = kwargs.setdefault("is_xml", target_obj.is_xml_model())
+
+        serialized = {}
+        if is_xml_model_serialization:
+            serialized = target_obj._create_xml_node()  # pylint: disable=protected-access
+        try:
+            attributes = target_obj._attribute_map  # pylint: disable=protected-access
+            for attr, attr_desc in attributes.items():
+                attr_name = attr
+                if not keep_readonly and target_obj._validation.get(  # pylint: disable=protected-access
+                    attr_name, {}
+                ).get("readonly", False):
+                    continue
+
+                if attr_name == "additional_properties" and attr_desc["key"] == "":
+                    if target_obj.additional_properties is not None:
+                        serialized.update(target_obj.additional_properties)
+                    continue
+                try:
+
+                    orig_attr = getattr(target_obj, attr)
+                    if is_xml_model_serialization:
+                        pass  # Don't provide "transformer" for XML for now. Keep "orig_attr"
+                    else:  # JSON
+                        keys, orig_attr = key_transformer(attr, attr_desc.copy(), orig_attr)
+                        keys = keys if isinstance(keys, list) else [keys]
+
+                    kwargs["serialization_ctxt"] = attr_desc
+                    new_attr = self.serialize_data(orig_attr, attr_desc["type"], **kwargs)
+
+                    if is_xml_model_serialization:
+                        xml_desc = attr_desc.get("xml", {})
+                        xml_name = xml_desc.get("name", attr_desc["key"])
+                        xml_prefix = xml_desc.get("prefix", None)
+                        xml_ns = xml_desc.get("ns", None)
+                        if xml_desc.get("attr", False):
+                            if xml_ns:
+                                ET.register_namespace(xml_prefix, xml_ns)
+                                xml_name = "{{{}}}{}".format(xml_ns, xml_name)
+                            serialized.set(xml_name, new_attr)  # type: ignore
+                            continue
+                        if xml_desc.get("text", False):
+                            serialized.text = new_attr  # type: ignore
+                            continue
+                        if isinstance(new_attr, list):
+                            serialized.extend(new_attr)  # type: ignore
+                        elif isinstance(new_attr, ET.Element):
+                            # If the down XML has no XML/Name,
+                            # we MUST replace the tag with the local tag. But keeping the namespaces.
+                            if "name" not in getattr(orig_attr, "_xml_map", {}):
+                                splitted_tag = new_attr.tag.split("}")
+                                if len(splitted_tag) == 2:  # Namespace
+                                    new_attr.tag = "}".join([splitted_tag[0], xml_name])
+                                else:
+                                    new_attr.tag = xml_name
+                            serialized.append(new_attr)  # type: ignore
+                        else:  # That's a basic type
+                            # Integrate namespace if necessary
+                            local_node = _create_xml_node(xml_name, xml_prefix, xml_ns)
+                            local_node.text = str(new_attr)
+                            serialized.append(local_node)  # type: ignore
+                    else:  # JSON
+                        for k in reversed(keys):  # type: ignore
+                            new_attr = {k: new_attr}
+
+                        _new_attr = new_attr
+                        _serialized = serialized
+                        for k in keys:  # type: ignore
+                            if k not in _serialized:
+                                _serialized.update(_new_attr)  # type: ignore
+                            _new_attr = _new_attr[k]  # type: ignore
+                            _serialized = _serialized[k]
+                except ValueError as err:
+                    if isinstance(err, SerializationError):
+                        raise
+
+        except (AttributeError, KeyError, TypeError) as err:
+            msg = "Attribute {} in object {} cannot be serialized.\n{}".format(attr_name, class_name, str(target_obj))
+            raise SerializationError(msg) from err
+        return serialized
+
+    def body(self, data, data_type, **kwargs):
+        """Serialize data intended for a request body.
+
+        :param object data: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :rtype: dict
+        :raises SerializationError: if serialization fails.
+        :raises ValueError: if data is None
+        :returns: The serialized request body
+        """
+
+        # Just in case this is a dict
+        internal_data_type_str = data_type.strip("[]{}")
+        internal_data_type = self.dependencies.get(internal_data_type_str, None)
+        try:
+            is_xml_model_serialization = kwargs["is_xml"]
+        except KeyError:
+            if internal_data_type and issubclass(internal_data_type, Model):
+                is_xml_model_serialization = kwargs.setdefault("is_xml", internal_data_type.is_xml_model())
+            else:
+                is_xml_model_serialization = False
+        if internal_data_type and not isinstance(internal_data_type, Enum):
+            try:
+                deserializer = Deserializer(self.dependencies)
+                # Since it's on serialization, it's almost sure that format is not JSON REST
+                # We're not able to deal with additional properties for now.
+                deserializer.additional_properties_detection = False
+                if is_xml_model_serialization:
+                    deserializer.key_extractors = [  # type: ignore
+                        attribute_key_case_insensitive_extractor,
+                    ]
+                else:
+                    deserializer.key_extractors = [
+                        rest_key_case_insensitive_extractor,
+                        attribute_key_case_insensitive_extractor,
+                        last_rest_key_case_insensitive_extractor,
+                    ]
+                data = deserializer._deserialize(data_type, data)  # pylint: disable=protected-access
+            except DeserializationError as err:
+                raise SerializationError("Unable to build a model: " + str(err)) from err
+
+        return self._serialize(data, data_type, **kwargs)
+
+    def url(self, name, data, data_type, **kwargs):
+        """Serialize data intended for a URL path.
+
+        :param str name: The name of the URL path parameter.
+        :param object data: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :rtype: str
+        :returns: The serialized URL path
+        :raises TypeError: if serialization fails.
+        :raises ValueError: if data is None
+        """
+        try:
+            output = self.serialize_data(data, data_type, **kwargs)
+            if data_type == "bool":
+                output = json.dumps(output)
+
+            if kwargs.get("skip_quote") is True:
+                output = str(output)
+                output = output.replace("{", quote("{")).replace("}", quote("}"))
+            else:
+                output = quote(str(output), safe="")
+        except SerializationError as exc:
+            raise TypeError("{} must be type {}.".format(name, data_type)) from exc
+        return output
+
+    def query(self, name, data, data_type, **kwargs):
+        """Serialize data intended for a URL query.
+
+        :param str name: The name of the query parameter.
+        :param object data: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :rtype: str, list
+        :raises TypeError: if serialization fails.
+        :raises ValueError: if data is None
+        :returns: The serialized query parameter
+        """
+        try:
+            # Treat the list aside, since we don't want to encode the div separator
+            if data_type.startswith("["):
+                internal_data_type = data_type[1:-1]
+                do_quote = not kwargs.get("skip_quote", False)
+                return self.serialize_iter(data, internal_data_type, do_quote=do_quote, **kwargs)
+
+            # Not a list, regular serialization
+            output = self.serialize_data(data, data_type, **kwargs)
+            if data_type == "bool":
+                output = json.dumps(output)
+            if kwargs.get("skip_quote") is True:
+                output = str(output)
+            else:
+                output = quote(str(output), safe="")
+        except SerializationError as exc:
+            raise TypeError("{} must be type {}.".format(name, data_type)) from exc
+        return str(output)
+
+    def header(self, name, data, data_type, **kwargs):
+        """Serialize data intended for a request header.
+
+        :param str name: The name of the header.
+        :param object data: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :rtype: str
+        :raises TypeError: if serialization fails.
+        :raises ValueError: if data is None
+        :returns: The serialized header
+        """
+        try:
+            if data_type in ["[str]"]:
+                data = ["" if d is None else d for d in data]
+
+            output = self.serialize_data(data, data_type, **kwargs)
+            if data_type == "bool":
+                output = json.dumps(output)
+        except SerializationError as exc:
+            raise TypeError("{} must be type {}.".format(name, data_type)) from exc
+        return str(output)
+
+    def serialize_data(self, data, data_type, **kwargs):
+        """Serialize generic data according to supplied data type.
+
+        :param object data: The data to be serialized.
+        :param str data_type: The type to be serialized from.
+        :raises AttributeError: if required data is None.
+        :raises ValueError: if data is None
+        :raises SerializationError: if serialization fails.
+        :returns: The serialized data.
+        :rtype: str, int, float, bool, dict, list
+        """
+        if data is None:
+            raise ValueError("No value for given attribute")
+
+        try:
+            if data is CoreNull:
+                return None
+            if data_type in self.basic_types.values():
+                return self.serialize_basic(data, data_type, **kwargs)
+
+            if data_type in self.serialize_type:
+                return self.serialize_type[data_type](data, **kwargs)
+
+            # If dependencies is empty, try with current data class
+            # It has to be a subclass of Enum anyway
+            enum_type = self.dependencies.get(data_type, data.__class__)
+            if issubclass(enum_type, Enum):
+                return Serializer.serialize_enum(data, enum_obj=enum_type)
+
+            iter_type = data_type[0] + data_type[-1]
+            if iter_type in self.serialize_type:
+                return self.serialize_type[iter_type](data, data_type[1:-1], **kwargs)
+
+        except (ValueError, TypeError) as err:
+            msg = "Unable to serialize value: {!r} as type: {!r}."
+            raise SerializationError(msg.format(data, data_type)) from err
+        return self._serialize(data, **kwargs)
+
+    @classmethod
+    def _get_custom_serializers(cls, data_type, **kwargs):  # pylint: disable=inconsistent-return-statements
+        custom_serializer = kwargs.get("basic_types_serializers", {}).get(data_type)
+        if custom_serializer:
+            return custom_serializer
+        if kwargs.get("is_xml", False):
+            return cls._xml_basic_types_serializers.get(data_type)
+
+    @classmethod
+    def serialize_basic(cls, data, data_type, **kwargs):
+        """Serialize basic builting data type.
+        Serializes objects to str, int, float or bool.
+
+        Possible kwargs:
+        - basic_types_serializers dict[str, callable] : If set, use the callable as serializer
+        - is_xml bool : If set, use xml_basic_types_serializers
+
+        :param obj data: Object to be serialized.
+        :param str data_type: Type of object in the iterable.
+        :rtype: str, int, float, bool
+        :return: serialized object
+        """
+        custom_serializer = cls._get_custom_serializers(data_type, **kwargs)
+        if custom_serializer:
+            return custom_serializer(data)
+        if data_type == "str":
+            return cls.serialize_unicode(data)
+        return eval(data_type)(data)  # nosec # pylint: disable=eval-used
+
+    @classmethod
+    def serialize_unicode(cls, data):
+        """Special handling for serializing unicode strings in Py2.
+        Encode to UTF-8 if unicode, otherwise handle as a str.
+
+        :param str data: Object to be serialized.
+        :rtype: str
+        :return: serialized object
+        """
+        try:  # If I received an enum, return its value
+            return data.value
+        except AttributeError:
+            pass
+
+        try:
+            if isinstance(data, unicode):  # type: ignore
+                # Don't change it, JSON and XML ElementTree are totally able
+                # to serialize correctly u'' strings
+                return data
+        except NameError:
+            return str(data)
+        return str(data)
+
+    def serialize_iter(self, data, iter_type, div=None, **kwargs):
+        """Serialize iterable.
+
+        Supported kwargs:
+        - serialization_ctxt dict : The current entry of _attribute_map, or same format.
+          serialization_ctxt['type'] should be same as data_type.
+        - is_xml bool : If set, serialize as XML
+
+        :param list data: Object to be serialized.
+        :param str iter_type: Type of object in the iterable.
+        :param str div: If set, this str will be used to combine the elements
+         in the iterable into a combined string. Default is 'None'.
+        Defaults to False.
+        :rtype: list, str
+        :return: serialized iterable
+        """
+        if isinstance(data, str):
+            raise SerializationError("Refuse str type as a valid iter type.")
+
+        serialization_ctxt = kwargs.get("serialization_ctxt", {})
+        is_xml = kwargs.get("is_xml", False)
+
+        serialized = []
+        for d in data:
+            try:
+                serialized.append(self.serialize_data(d, iter_type, **kwargs))
+            except ValueError as err:
+                if isinstance(err, SerializationError):
+                    raise
+                serialized.append(None)
+
+        if kwargs.get("do_quote", False):
+            serialized = ["" if s is None else quote(str(s), safe="") for s in serialized]
+
+        if div:
+            serialized = ["" if s is None else str(s) for s in serialized]
+            serialized = div.join(serialized)
+
+        if "xml" in serialization_ctxt or is_xml:
+            # XML serialization is more complicated
+            xml_desc = serialization_ctxt.get("xml", {})
+            xml_name = xml_desc.get("name")
+            if not xml_name:
+                xml_name = serialization_ctxt["key"]
+
+            # Create a wrap node if necessary (use the fact that Element and list have "append")
+            is_wrapped = xml_desc.get("wrapped", False)
+            node_name = xml_desc.get("itemsName", xml_name)
+            if is_wrapped:
+                final_result = _create_xml_node(xml_name, xml_desc.get("prefix", None), xml_desc.get("ns", None))
+            else:
+                final_result = []
+            # All list elements to "local_node"
+            for el in serialized:
+                if isinstance(el, ET.Element):
+                    el_node = el
+                else:
+                    el_node = _create_xml_node(node_name, xml_desc.get("prefix", None), xml_desc.get("ns", None))
+                    if el is not None:  # Otherwise it writes "None" :-p
+                        el_node.text = str(el)
+                final_result.append(el_node)
+            return final_result
+        return serialized
+
+    def serialize_dict(self, attr, dict_type, **kwargs):
+        """Serialize a dictionary of objects.
+
+        :param dict attr: Object to be serialized.
+        :param str dict_type: Type of object in the dictionary.
+        :rtype: dict
+        :return: serialized dictionary
+        """
+        serialization_ctxt = kwargs.get("serialization_ctxt", {})
+        serialized = {}
+        for key, value in attr.items():
+            try:
+                serialized[self.serialize_unicode(key)] = self.serialize_data(value, dict_type, **kwargs)
+            except ValueError as err:
+                if isinstance(err, SerializationError):
+                    raise
+                serialized[self.serialize_unicode(key)] = None
+
+        if "xml" in serialization_ctxt:
+            # XML serialization is more complicated
+            xml_desc = serialization_ctxt["xml"]
+            xml_name = xml_desc["name"]
+
+            final_result = _create_xml_node(xml_name, xml_desc.get("prefix", None), xml_desc.get("ns", None))
+            for key, value in serialized.items():
+                ET.SubElement(final_result, key).text = value
+            return final_result
+
+        return serialized
+
+    def serialize_object(self, attr, **kwargs):  # pylint: disable=too-many-return-statements
+        """Serialize a generic object.
+        This will be handled as a dictionary. If object passed in is not
+        a basic type (str, int, float, dict, list) it will simply be
+        cast to str.
+
+        :param dict attr: Object to be serialized.
+        :rtype: dict or str
+        :return: serialized object
+        """
+        if attr is None:
+            return None
+        if isinstance(attr, ET.Element):
+            return attr
+        obj_type = type(attr)
+        if obj_type in self.basic_types:
+            return self.serialize_basic(attr, self.basic_types[obj_type], **kwargs)
+        if obj_type is _long_type:
+            return self.serialize_long(attr)
+        if obj_type is str:
+            return self.serialize_unicode(attr)
+        if obj_type is datetime.datetime:
+            return self.serialize_iso(attr)
+        if obj_type is datetime.date:
+            return self.serialize_date(attr)
+        if obj_type is datetime.time:
+            return self.serialize_time(attr)
+        if obj_type is datetime.timedelta:
+            return self.serialize_duration(attr)
+        if obj_type is decimal.Decimal:
+            return self.serialize_decimal(attr)
+
+        # If it's a model or I know this dependency, serialize as a Model
+        if obj_type in self.dependencies.values() or isinstance(attr, Model):
+            return self._serialize(attr)
+
+        if obj_type == dict:
+            serialized = {}
+            for key, value in attr.items():
+                try:
+                    serialized[self.serialize_unicode(key)] = self.serialize_object(value, **kwargs)
+                except ValueError:
+                    serialized[self.serialize_unicode(key)] = None
+            return serialized
+
+        if obj_type == list:
+            serialized = []
+            for obj in attr:
+                try:
+                    serialized.append(self.serialize_object(obj, **kwargs))
+                except ValueError:
+                    pass
+            return serialized
+        return str(attr)
+
+    @staticmethod
+    def serialize_enum(attr, enum_obj=None):
+        try:
+            result = attr.value
+        except AttributeError:
+            result = attr
+        try:
+            enum_obj(result)  # type: ignore
+            return result
+        except ValueError as exc:
+            for enum_value in enum_obj:  # type: ignore
+                if enum_value.value.lower() == str(attr).lower():
+                    return enum_value.value
+            error = "{!r} is not valid value for enum {!r}"
+            raise SerializationError(error.format(attr, enum_obj)) from exc
+
+    @staticmethod
+    def serialize_bytearray(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize bytearray into base-64 string.
+
+        :param str attr: Object to be serialized.
+        :rtype: str
+        :return: serialized base64
+        """
+        return b64encode(attr).decode()
+
+    @staticmethod
+    def serialize_base64(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize str into base-64 string.
+
+        :param str attr: Object to be serialized.
+        :rtype: str
+        :return: serialized base64
+        """
+        encoded = b64encode(attr).decode("ascii")
+        return encoded.strip("=").replace("+", "-").replace("/", "_")
+
+    @staticmethod
+    def serialize_decimal(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Decimal object to float.
+
+        :param decimal attr: Object to be serialized.
+        :rtype: float
+        :return: serialized decimal
+        """
+        return float(attr)
+
+    @staticmethod
+    def serialize_long(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize long (Py2) or int (Py3).
+
+        :param int attr: Object to be serialized.
+        :rtype: int/long
+        :return: serialized long
+        """
+        return _long_type(attr)
+
+    @staticmethod
+    def serialize_date(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Date object into ISO-8601 formatted string.
+
+        :param Date attr: Object to be serialized.
+        :rtype: str
+        :return: serialized date
+        """
+        if isinstance(attr, str):
+            attr = isodate.parse_date(attr)
+        t = "{:04}-{:02}-{:02}".format(attr.year, attr.month, attr.day)
+        return t
+
+    @staticmethod
+    def serialize_time(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Time object into ISO-8601 formatted string.
+
+        :param datetime.time attr: Object to be serialized.
+        :rtype: str
+        :return: serialized time
+        """
+        if isinstance(attr, str):
+            attr = isodate.parse_time(attr)
+        t = "{:02}:{:02}:{:02}".format(attr.hour, attr.minute, attr.second)
+        if attr.microsecond:
+            t += ".{:02}".format(attr.microsecond)
+        return t
+
+    @staticmethod
+    def serialize_duration(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize TimeDelta object into ISO-8601 formatted string.
+
+        :param TimeDelta attr: Object to be serialized.
+        :rtype: str
+        :return: serialized duration
+        """
+        if isinstance(attr, str):
+            attr = isodate.parse_duration(attr)
+        return isodate.duration_isoformat(attr)
+
+    @staticmethod
+    def serialize_rfc(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Datetime object into RFC-1123 formatted string.
+
+        :param Datetime attr: Object to be serialized.
+        :rtype: str
+        :raises TypeError: if format invalid.
+        :return: serialized rfc
+        """
+        try:
+            if not attr.tzinfo:
+                _LOGGER.warning("Datetime with no tzinfo will be considered UTC.")
+            utc = attr.utctimetuple()
+        except AttributeError as exc:
+            raise TypeError("RFC1123 object must be valid Datetime object.") from exc
+
+        return "{}, {:02} {} {:04} {:02}:{:02}:{:02} GMT".format(
+            Serializer.days[utc.tm_wday],
+            utc.tm_mday,
+            Serializer.months[utc.tm_mon],
+            utc.tm_year,
+            utc.tm_hour,
+            utc.tm_min,
+            utc.tm_sec,
+        )
+
+    @staticmethod
+    def serialize_iso(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Datetime object into ISO-8601 formatted string.
+
+        :param Datetime attr: Object to be serialized.
+        :rtype: str
+        :raises SerializationError: if format invalid.
+        :return: serialized iso
+        """
+        if isinstance(attr, str):
+            attr = isodate.parse_datetime(attr)
+        try:
+            if not attr.tzinfo:
+                _LOGGER.warning("Datetime with no tzinfo will be considered UTC.")
+            utc = attr.utctimetuple()
+            if utc.tm_year > 9999 or utc.tm_year < 1:
+                raise OverflowError("Hit max or min date")
+
+            microseconds = str(attr.microsecond).rjust(6, "0").rstrip("0").ljust(3, "0")
+            if microseconds:
+                microseconds = "." + microseconds
+            date = "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}".format(
+                utc.tm_year, utc.tm_mon, utc.tm_mday, utc.tm_hour, utc.tm_min, utc.tm_sec
+            )
+            return date + microseconds + "Z"
+        except (ValueError, OverflowError) as err:
+            msg = "Unable to serialize datetime object."
+            raise SerializationError(msg) from err
+        except AttributeError as err:
+            msg = "ISO-8601 object must be valid Datetime object."
+            raise TypeError(msg) from err
+
+    @staticmethod
+    def serialize_unix(attr, **kwargs):  # pylint: disable=unused-argument
+        """Serialize Datetime object into IntTime format.
+        This is represented as seconds.
+
+        :param Datetime attr: Object to be serialized.
+        :rtype: int
+        :raises SerializationError: if format invalid
+        :return: serialied unix
+        """
+        if isinstance(attr, int):
+            return attr
+        try:
+            if not attr.tzinfo:
+                _LOGGER.warning("Datetime with no tzinfo will be considered UTC.")
+            return int(calendar.timegm(attr.utctimetuple()))
+        except AttributeError as exc:
+            raise TypeError("Unix time object must be valid Datetime object.") from exc
+
+
+def rest_key_extractor(attr, attr_desc, data):  # pylint: disable=unused-argument
+    key = attr_desc["key"]
+    working_data = data
+
+    while "." in key:
+        # Need the cast, as for some reasons "split" is typed as list[str | Any]
+        dict_keys = cast(List[str], _FLATTEN.split(key))
+        if len(dict_keys) == 1:
+            key = _decode_attribute_map_key(dict_keys[0])
+            break
+        working_key = _decode_attribute_map_key(dict_keys[0])
+        working_data = working_data.get(working_key, data)
+        if working_data is None:
+            # If at any point while following flatten JSON path see None, it means
+            # that all properties under are None as well
+            return None
+        key = ".".join(dict_keys[1:])
+
+    return working_data.get(key)
+
+
+def rest_key_case_insensitive_extractor(  # pylint: disable=unused-argument, inconsistent-return-statements
+    attr, attr_desc, data
+):
+    key = attr_desc["key"]
+    working_data = data
+
+    while "." in key:
+        dict_keys = _FLATTEN.split(key)
+        if len(dict_keys) == 1:
+            key = _decode_attribute_map_key(dict_keys[0])
+            break
+        working_key = _decode_attribute_map_key(dict_keys[0])
+        working_data = attribute_key_case_insensitive_extractor(working_key, None, working_data)
+        if working_data is None:
+            # If at any point while following flatten JSON path see None, it means
+            # that all properties under are None as well
+            return None
+        key = ".".join(dict_keys[1:])
+
+    if working_data:
+        return attribute_key_case_insensitive_extractor(key, None, working_data)
+
+
+def last_rest_key_extractor(attr, attr_desc, data):  # pylint: disable=unused-argument
+    """Extract the attribute in "data" based on the last part of the JSON path key.
+
+    :param str attr: The attribute to extract
+    :param dict attr_desc: The attribute description
+    :param dict data: The data to extract from
+    :rtype: object
+    :returns: The extracted attribute
+    """
+    key = attr_desc["key"]
+    dict_keys = _FLATTEN.split(key)
+    return attribute_key_extractor(dict_keys[-1], None, data)
+
+
+def last_rest_key_case_insensitive_extractor(attr, attr_desc, data):  # pylint: disable=unused-argument
+    """Extract the attribute in "data" based on the last part of the JSON path key.
+
+    This is the case insensitive version of "last_rest_key_extractor"
+    :param str attr: The attribute to extract
+    :param dict attr_desc: The attribute description
+    :param dict data: The data to extract from
+    :rtype: object
+    :returns: The extracted attribute
+    """
+    key = attr_desc["key"]
+    dict_keys = _FLATTEN.split(key)
+    return attribute_key_case_insensitive_extractor(dict_keys[-1], None, data)
+
+
+def attribute_key_extractor(attr, _, data):
+    return data.get(attr)
+
+
+def attribute_key_case_insensitive_extractor(attr, _, data):
+    found_key = None
+    lower_attr = attr.lower()
+    for key in data:
+        if lower_attr == key.lower():
+            found_key = key
+            break
+
+    return data.get(found_key)
+
+
+def _extract_name_from_internal_type(internal_type):
+    """Given an internal type XML description, extract correct XML name with namespace.
+
+    :param dict internal_type: An model type
+    :rtype: tuple
+    :returns: A tuple XML name + namespace dict
+    """
+    internal_type_xml_map = getattr(internal_type, "_xml_map", {})
+    xml_name = internal_type_xml_map.get("name", internal_type.__name__)
+    xml_ns = internal_type_xml_map.get("ns", None)
+    if xml_ns:
+        xml_name = "{{{}}}{}".format(xml_ns, xml_name)
+    return xml_name
+
+
+def xml_key_extractor(attr, attr_desc, data):  # pylint: disable=unused-argument,too-many-return-statements
+    if isinstance(data, dict):
+        return None
+
+    # Test if this model is XML ready first
+    if not isinstance(data, ET.Element):
+        return None
+
+    xml_desc = attr_desc.get("xml", {})
+    xml_name = xml_desc.get("name", attr_desc["key"])
+
+    # Look for a children
+    is_iter_type = attr_desc["type"].startswith("[")
+    is_wrapped = xml_desc.get("wrapped", False)
+    internal_type = attr_desc.get("internalType", None)
+    internal_type_xml_map = getattr(internal_type, "_xml_map", {})
+
+    # Integrate namespace if necessary
+    xml_ns = xml_desc.get("ns", internal_type_xml_map.get("ns", None))
+    if xml_ns:
+        xml_name = "{{{}}}{}".format(xml_ns, xml_name)
+
+    # If it's an attribute, that's simple
+    if xml_desc.get("attr", False):
+        return data.get(xml_name)
+
+    # If it's x-ms-text, that's simple too
+    if xml_desc.get("text", False):
+        return data.text
+
+    # Scenario where I take the local name:
+    # - Wrapped node
+    # - Internal type is an enum (considered basic types)
+    # - Internal type has no XML/Name node
+    if is_wrapped or (internal_type and (issubclass(internal_type, Enum) or "name" not in internal_type_xml_map)):
+        children = data.findall(xml_name)
+    # If internal type has a local name and it's not a list, I use that name
+    elif not is_iter_type and internal_type and "name" in internal_type_xml_map:
+        xml_name = _extract_name_from_internal_type(internal_type)
+        children = data.findall(xml_name)
+    # That's an array
+    else:
+        if internal_type:  # Complex type, ignore itemsName and use the complex type name
+            items_name = _extract_name_from_internal_type(internal_type)
+        else:
+            items_name = xml_desc.get("itemsName", xml_name)
+        children = data.findall(items_name)
+
+    if len(children) == 0:
+        if is_iter_type:
+            if is_wrapped:
+                return None  # is_wrapped no node, we want None
+            return []  # not wrapped, assume empty list
+        return None  # Assume it's not there, maybe an optional node.
+
+    # If is_iter_type and not wrapped, return all found children
+    if is_iter_type:
+        if not is_wrapped:
+            return children
+        # Iter and wrapped, should have found one node only (the wrap one)
+        if len(children) != 1:
+            raise DeserializationError(
+                "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format(  # pylint: disable=line-too-long
+                    xml_name
+                )
+            )
+        return list(children[0])  # Might be empty list and that's ok.
+
+    # Here it's not a itertype, we should have found one element only or empty
+    if len(children) > 1:
+        raise DeserializationError("Find several XML '{}' where it was not expected".format(xml_name))
+    return children[0]
+
+
+class Deserializer:
+    """Response object model deserializer.
+
+    :param dict classes: Class type dictionary for deserializing complex types.
+    :ivar list key_extractors: Ordered list of extractors to be used by this deserializer.
+    """
+
+    basic_types = {str: "str", int: "int", bool: "bool", float: "float"}
+
+    valid_date = re.compile(r"\d{4}[-]\d{2}[-]\d{2}T\d{2}:\d{2}:\d{2}\.?\d*Z?[-+]?[\d{2}]?:?[\d{2}]?")
+
+    def __init__(self, classes: Optional[Mapping[str, type]] = None) -> None:
+        self.deserialize_type = {
+            "iso-8601": Deserializer.deserialize_iso,
+            "rfc-1123": Deserializer.deserialize_rfc,
+            "unix-time": Deserializer.deserialize_unix,
+            "duration": Deserializer.deserialize_duration,
+            "date": Deserializer.deserialize_date,
+            "time": Deserializer.deserialize_time,
+            "decimal": Deserializer.deserialize_decimal,
+            "long": Deserializer.deserialize_long,
+            "bytearray": Deserializer.deserialize_bytearray,
+            "base64": Deserializer.deserialize_base64,
+            "object": self.deserialize_object,
+            "[]": self.deserialize_iter,
+            "{}": self.deserialize_dict,
+        }
+        self.deserialize_expected_types = {
+            "duration": (isodate.Duration, datetime.timedelta),
+            "iso-8601": (datetime.datetime),
+        }
+        self.dependencies: Dict[str, type] = dict(classes) if classes else {}
+        self.key_extractors = [rest_key_extractor, xml_key_extractor]
+        # Additional properties only works if the "rest_key_extractor" is used to
+        # extract the keys. Making it to work whatever the key extractor is too much
+        # complicated, with no real scenario for now.
+        # So adding a flag to disable additional properties detection. This flag should be
+        # used if your expect the deserialization to NOT come from a JSON REST syntax.
+        # Otherwise, result are unexpected
+        self.additional_properties_detection = True
+
+    def __call__(self, target_obj, response_data, content_type=None):
+        """Call the deserializer to process a REST response.
+
+        :param str target_obj: Target data type to deserialize to.
+        :param requests.Response response_data: REST response object.
+        :param str content_type: Swagger "produces" if available.
+        :raises DeserializationError: if deserialization fails.
+        :return: Deserialized object.
+        :rtype: object
+        """
+        data = self._unpack_content(response_data, content_type)
+        return self._deserialize(target_obj, data)
+
+    def _deserialize(self, target_obj, data):  # pylint: disable=inconsistent-return-statements
+        """Call the deserializer on a model.
+
+        Data needs to be already deserialized as JSON or XML ElementTree
+
+        :param str target_obj: Target data type to deserialize to.
+        :param object data: Object to deserialize.
+        :raises DeserializationError: if deserialization fails.
+        :return: Deserialized object.
+        :rtype: object
+        """
+        # This is already a model, go recursive just in case
+        if hasattr(data, "_attribute_map"):
+            constants = [name for name, config in getattr(data, "_validation", {}).items() if config.get("constant")]
+            try:
+                for attr, mapconfig in data._attribute_map.items():  # pylint: disable=protected-access
+                    if attr in constants:
+                        continue
+                    value = getattr(data, attr)
+                    if value is None:
+                        continue
+                    local_type = mapconfig["type"]
+                    internal_data_type = local_type.strip("[]{}")
+                    if internal_data_type not in self.dependencies or isinstance(internal_data_type, Enum):
+                        continue
+                    setattr(data, attr, self._deserialize(local_type, value))
+                return data
+            except AttributeError:
+                return
+
+        response, class_name = self._classify_target(target_obj, data)
+
+        if isinstance(response, str):
+            return self.deserialize_data(data, response)
+        if isinstance(response, type) and issubclass(response, Enum):
+            return self.deserialize_enum(data, response)
+
+        if data is None or data is CoreNull:
+            return data
+        try:
+            attributes = response._attribute_map  # type: ignore # pylint: disable=protected-access
+            d_attrs = {}
+            for attr, attr_desc in attributes.items():
+                # Check empty string. If it's not empty, someone has a real "additionalProperties"...
+                if attr == "additional_properties" and attr_desc["key"] == "":
+                    continue
+                raw_value = None
+                # Enhance attr_desc with some dynamic data
+                attr_desc = attr_desc.copy()  # Do a copy, do not change the real one
+                internal_data_type = attr_desc["type"].strip("[]{}")
+                if internal_data_type in self.dependencies:
+                    attr_desc["internalType"] = self.dependencies[internal_data_type]
+
+                for key_extractor in self.key_extractors:
+                    found_value = key_extractor(attr, attr_desc, data)
+                    if found_value is not None:
+                        if raw_value is not None and raw_value != found_value:
+                            msg = (
+                                "Ignoring extracted value '%s' from %s for key '%s'"
+                                " (duplicate extraction, follow extractors order)"
+                            )
+                            _LOGGER.warning(msg, found_value, key_extractor, attr)
+                            continue
+                        raw_value = found_value
+
+                value = self.deserialize_data(raw_value, attr_desc["type"])
+                d_attrs[attr] = value
+        except (AttributeError, TypeError, KeyError) as err:
+            msg = "Unable to deserialize to object: " + class_name  # type: ignore
+            raise DeserializationError(msg) from err
+        additional_properties = self._build_additional_properties(attributes, data)
+        return self._instantiate_model(response, d_attrs, additional_properties)
+
+    def _build_additional_properties(self, attribute_map, data):
+        if not self.additional_properties_detection:
+            return None
+        if "additional_properties" in attribute_map and attribute_map.get("additional_properties", {}).get("key") != "":
+            # Check empty string. If it's not empty, someone has a real "additionalProperties"
+            return None
+        if isinstance(data, ET.Element):
+            data = {el.tag: el.text for el in data}
+
+        known_keys = {
+            _decode_attribute_map_key(_FLATTEN.split(desc["key"])[0])
+            for desc in attribute_map.values()
+            if desc["key"] != ""
+        }
+        present_keys = set(data.keys())
+        missing_keys = present_keys - known_keys
+        return {key: data[key] for key in missing_keys}
+
+    def _classify_target(self, target, data):
+        """Check to see whether the deserialization target object can
+        be classified into a subclass.
+        Once classification has been determined, initialize object.
+
+        :param str target: The target object type to deserialize to.
+        :param str/dict data: The response data to deserialize.
+        :return: The classified target object and its class name.
+        :rtype: tuple
+        """
+        if target is None:
+            return None, None
+
+        if isinstance(target, str):
+            try:
+                target = self.dependencies[target]
+            except KeyError:
+                return target, target
+
+        try:
+            target = target._classify(data, self.dependencies)  # type: ignore # pylint: disable=protected-access
+        except AttributeError:
+            pass  # Target is not a Model, no classify
+        return target, target.__class__.__name__  # type: ignore
+
+    def failsafe_deserialize(self, target_obj, data, content_type=None):
+        """Ignores any errors encountered in deserialization,
+        and falls back to not deserializing the object. Recommended
+        for use in error deserialization, as we want to return the
+        HttpResponseError to users, and not have them deal with
+        a deserialization error.
+
+        :param str target_obj: The target object type to deserialize to.
+        :param str/dict data: The response data to deserialize.
+        :param str content_type: Swagger "produces" if available.
+        :return: Deserialized object.
+        :rtype: object
+        """
+        try:
+            return self(target_obj, data, content_type=content_type)
+        except:  # pylint: disable=bare-except
+            _LOGGER.debug(
+                "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
+            )
+            return None
+
+    @staticmethod
+    def _unpack_content(raw_data, content_type=None):
+        """Extract the correct structure for deserialization.
+
+        If raw_data is a PipelineResponse, try to extract the result of RawDeserializer.
+        if we can't, raise. Your Pipeline should have a RawDeserializer.
+
+        If not a pipeline response and raw_data is bytes or string, use content-type
+        to decode it. If no content-type, try JSON.
+
+        If raw_data is something else, bypass all logic and return it directly.
+
+        :param obj raw_data: Data to be processed.
+        :param str content_type: How to parse if raw_data is a string/bytes.
+        :raises JSONDecodeError: If JSON is requested and parsing is impossible.
+        :raises UnicodeDecodeError: If bytes is not UTF8
+        :rtype: object
+        :return: Unpacked content.
+        """
+        # Assume this is enough to detect a Pipeline Response without importing it
+        context = getattr(raw_data, "context", {})
+        if context:
+            if RawDeserializer.CONTEXT_NAME in context:
+                return context[RawDeserializer.CONTEXT_NAME]
+            raise ValueError("This pipeline didn't have the RawDeserializer policy; can't deserialize")
+
+        # Assume this is enough to recognize universal_http.ClientResponse without importing it
+        if hasattr(raw_data, "body"):
+            return RawDeserializer.deserialize_from_http_generics(raw_data.text(), raw_data.headers)
+
+        # Assume this enough to recognize requests.Response without importing it.
+        if hasattr(raw_data, "_content_consumed"):
+            return RawDeserializer.deserialize_from_http_generics(raw_data.text, raw_data.headers)
+
+        if isinstance(raw_data, (str, bytes)) or hasattr(raw_data, "read"):
+            return RawDeserializer.deserialize_from_text(raw_data, content_type)  # type: ignore
+        return raw_data
+
+    def _instantiate_model(self, response, attrs, additional_properties=None):
+        """Instantiate a response model passing in deserialized args.
+
+        :param Response response: The response model class.
+        :param dict attrs: The deserialized response attributes.
+        :param dict additional_properties: Additional properties to be set.
+        :rtype: Response
+        :return: The instantiated response model.
+        """
+        if callable(response):
+            subtype = getattr(response, "_subtype_map", {})
+            try:
+                readonly = [
+                    k
+                    for k, v in response._validation.items()  # pylint: disable=protected-access  # type: ignore
+                    if v.get("readonly")
+                ]
+                const = [
+                    k
+                    for k, v in response._validation.items()  # pylint: disable=protected-access  # type: ignore
+                    if v.get("constant")
+                ]
+                kwargs = {k: v for k, v in attrs.items() if k not in subtype and k not in readonly + const}
+                response_obj = response(**kwargs)
+                for attr in readonly:
+                    setattr(response_obj, attr, attrs.get(attr))
+                if additional_properties:
+                    response_obj.additional_properties = additional_properties  # type: ignore
+                return response_obj
+            except TypeError as err:
+                msg = "Unable to deserialize {} into model {}. ".format(kwargs, response)  # type: ignore
+                raise DeserializationError(msg + str(err)) from err
+        else:
+            try:
+                for attr, value in attrs.items():
+                    setattr(response, attr, value)
+                return response
+            except Exception as exp:
+                msg = "Unable to populate response model. "
+                msg += "Type: {}, Error: {}".format(type(response), exp)
+                raise DeserializationError(msg) from exp
+
+    def deserialize_data(self, data, data_type):  # pylint: disable=too-many-return-statements
+        """Process data for deserialization according to data type.
+
+        :param str data: The response string to be deserialized.
+        :param str data_type: The type to deserialize to.
+        :raises DeserializationError: if deserialization fails.
+        :return: Deserialized object.
+        :rtype: object
+        """
+        if data is None:
+            return data
+
+        try:
+            if not data_type:
+                return data
+            if data_type in self.basic_types.values():
+                return self.deserialize_basic(data, data_type)
+            if data_type in self.deserialize_type:
+                if isinstance(data, self.deserialize_expected_types.get(data_type, tuple())):
+                    return data
+
+                is_a_text_parsing_type = lambda x: x not in [  # pylint: disable=unnecessary-lambda-assignment
+                    "object",
+                    "[]",
+                    r"{}",
+                ]
+                if isinstance(data, ET.Element) and is_a_text_parsing_type(data_type) and not data.text:
+                    return None
+                data_val = self.deserialize_type[data_type](data)
+                return data_val
+
+            iter_type = data_type[0] + data_type[-1]
+            if iter_type in self.deserialize_type:
+                return self.deserialize_type[iter_type](data, data_type[1:-1])
+
+            obj_type = self.dependencies[data_type]
+            if issubclass(obj_type, Enum):
+                if isinstance(data, ET.Element):
+                    data = data.text
+                return self.deserialize_enum(data, obj_type)
+
+        except (ValueError, TypeError, AttributeError) as err:
+            msg = "Unable to deserialize response data."
+            msg += " Data: {}, {}".format(data, data_type)
+            raise DeserializationError(msg) from err
+        return self._deserialize(obj_type, data)
+
+    def deserialize_iter(self, attr, iter_type):
+        """Deserialize an iterable.
+
+        :param list attr: Iterable to be deserialized.
+        :param str iter_type: The type of object in the iterable.
+        :return: Deserialized iterable.
+        :rtype: list
+        """
+        if attr is None:
+            return None
+        if isinstance(attr, ET.Element):  # If I receive an element here, get the children
+            attr = list(attr)
+        if not isinstance(attr, (list, set)):
+            raise DeserializationError("Cannot deserialize as [{}] an object of type {}".format(iter_type, type(attr)))
+        return [self.deserialize_data(a, iter_type) for a in attr]
+
+    def deserialize_dict(self, attr, dict_type):
+        """Deserialize a dictionary.
+
+        :param dict/list attr: Dictionary to be deserialized. Also accepts
+         a list of key, value pairs.
+        :param str dict_type: The object type of the items in the dictionary.
+        :return: Deserialized dictionary.
+        :rtype: dict
+        """
+        if isinstance(attr, list):
+            return {x["key"]: self.deserialize_data(x["value"], dict_type) for x in attr}
+
+        if isinstance(attr, ET.Element):
+            # Transform <Key>value</Key> into {"Key": "value"}
+            attr = {el.tag: el.text for el in attr}
+        return {k: self.deserialize_data(v, dict_type) for k, v in attr.items()}
+
+    def deserialize_object(self, attr, **kwargs):  # pylint: disable=too-many-return-statements
+        """Deserialize a generic object.
+        This will be handled as a dictionary.
+
+        :param dict attr: Dictionary to be deserialized.
+        :return: Deserialized object.
+        :rtype: dict
+        :raises TypeError: if non-builtin datatype encountered.
+        """
+        if attr is None:
+            return None
+        if isinstance(attr, ET.Element):
+            # Do no recurse on XML, just return the tree as-is
+            return attr
+        if isinstance(attr, str):
+            return self.deserialize_basic(attr, "str")
+        obj_type = type(attr)
+        if obj_type in self.basic_types:
+            return self.deserialize_basic(attr, self.basic_types[obj_type])
+        if obj_type is _long_type:
+            return self.deserialize_long(attr)
+
+        if obj_type == dict:
+            deserialized = {}
+            for key, value in attr.items():
+                try:
+                    deserialized[key] = self.deserialize_object(value, **kwargs)
+                except ValueError:
+                    deserialized[key] = None
+            return deserialized
+
+        if obj_type == list:
+            deserialized = []
+            for obj in attr:
+                try:
+                    deserialized.append(self.deserialize_object(obj, **kwargs))
+                except ValueError:
+                    pass
+            return deserialized
+
+        error = "Cannot deserialize generic object with type: "
+        raise TypeError(error + str(obj_type))
+
+    def deserialize_basic(self, attr, data_type):  # pylint: disable=too-many-return-statements
+        """Deserialize basic builtin data type from string.
+        Will attempt to convert to str, int, float and bool.
+        This function will also accept '1', '0', 'true' and 'false' as
+        valid bool values.
+
+        :param str attr: response string to be deserialized.
+        :param str data_type: deserialization data type.
+        :return: Deserialized basic type.
+        :rtype: str, int, float or bool
+        :raises TypeError: if string format is not valid.
+        """
+        # If we're here, data is supposed to be a basic type.
+        # If it's still an XML node, take the text
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+            if not attr:
+                if data_type == "str":
+                    # None or '', node <a/> is empty string.
+                    return ""
+                # None or '', node <a/> with a strong type is None.
+                # Don't try to model "empty bool" or "empty int"
+                return None
+
+        if data_type == "bool":
+            if attr in [True, False, 1, 0]:
+                return bool(attr)
+            if isinstance(attr, str):
+                if attr.lower() in ["true", "1"]:
+                    return True
+                if attr.lower() in ["false", "0"]:
+                    return False
+            raise TypeError("Invalid boolean value: {}".format(attr))
+
+        if data_type == "str":
+            return self.deserialize_unicode(attr)
+        return eval(data_type)(attr)  # nosec # pylint: disable=eval-used
+
+    @staticmethod
+    def deserialize_unicode(data):
+        """Preserve unicode objects in Python 2, otherwise return data
+        as a string.
+
+        :param str data: response string to be deserialized.
+        :return: Deserialized string.
+        :rtype: str or unicode
+        """
+        # We might be here because we have an enum modeled as string,
+        # and we try to deserialize a partial dict with enum inside
+        if isinstance(data, Enum):
+            return data
+
+        # Consider this is real string
+        try:
+            if isinstance(data, unicode):  # type: ignore
+                return data
+        except NameError:
+            return str(data)
+        return str(data)
+
+    @staticmethod
+    def deserialize_enum(data, enum_obj):
+        """Deserialize string into enum object.
+
+        If the string is not a valid enum value it will be returned as-is
+        and a warning will be logged.
+
+        :param str data: Response string to be deserialized. If this value is
+         None or invalid it will be returned as-is.
+        :param Enum enum_obj: Enum object to deserialize to.
+        :return: Deserialized enum object.
+        :rtype: Enum
+        """
+        if isinstance(data, enum_obj) or data is None:
+            return data
+        if isinstance(data, Enum):
+            data = data.value
+        if isinstance(data, int):
+            # Workaround. We might consider remove it in the future.
+            try:
+                return list(enum_obj.__members__.values())[data]
+            except IndexError as exc:
+                error = "{!r} is not a valid index for enum {!r}"
+                raise DeserializationError(error.format(data, enum_obj)) from exc
+        try:
+            return enum_obj(str(data))
+        except ValueError:
+            for enum_value in enum_obj:
+                if enum_value.value.lower() == str(data).lower():
+                    return enum_value
+            # We don't fail anymore for unknown value, we deserialize as a string
+            _LOGGER.warning("Deserializer is not able to find %s as valid enum in %s", data, enum_obj)
+            return Deserializer.deserialize_unicode(data)
+
+    @staticmethod
+    def deserialize_bytearray(attr):
+        """Deserialize string into bytearray.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized bytearray
+        :rtype: bytearray
+        :raises TypeError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        return bytearray(b64decode(attr))  # type: ignore
+
+    @staticmethod
+    def deserialize_base64(attr):
+        """Deserialize base64 encoded string into string.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized base64 string
+        :rtype: bytearray
+        :raises TypeError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        padding = "=" * (3 - (len(attr) + 3) % 4)  # type: ignore
+        attr = attr + padding  # type: ignore
+        encoded = attr.replace("-", "+").replace("_", "/")
+        return b64decode(encoded)
+
+    @staticmethod
+    def deserialize_decimal(attr):
+        """Deserialize string into Decimal object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized decimal
+        :raises DeserializationError: if string format invalid.
+        :rtype: decimal
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        try:
+            return decimal.Decimal(str(attr))  # type: ignore
+        except decimal.DecimalException as err:
+            msg = "Invalid decimal {}".format(attr)
+            raise DeserializationError(msg) from err
+
+    @staticmethod
+    def deserialize_long(attr):
+        """Deserialize string into long (Py2) or int (Py3).
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized int
+        :rtype: long or int
+        :raises ValueError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        return _long_type(attr)  # type: ignore
+
+    @staticmethod
+    def deserialize_duration(attr):
+        """Deserialize ISO-8601 formatted string into TimeDelta object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized duration
+        :rtype: TimeDelta
+        :raises DeserializationError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        try:
+            duration = isodate.parse_duration(attr)
+        except (ValueError, OverflowError, AttributeError) as err:
+            msg = "Cannot deserialize duration object."
+            raise DeserializationError(msg) from err
+        return duration
+
+    @staticmethod
+    def deserialize_date(attr):
+        """Deserialize ISO-8601 formatted string into Date object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized date
+        :rtype: Date
+        :raises DeserializationError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        if re.search(r"[^\W\d_]", attr, re.I + re.U):  # type: ignore
+            raise DeserializationError("Date must have only digits and -. Received: %s" % attr)
+        # This must NOT use defaultmonth/defaultday. Using None ensure this raises an exception.
+        return isodate.parse_date(attr, defaultmonth=0, defaultday=0)
+
+    @staticmethod
+    def deserialize_time(attr):
+        """Deserialize ISO-8601 formatted string into time object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized time
+        :rtype: datetime.time
+        :raises DeserializationError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        if re.search(r"[^\W\d_]", attr, re.I + re.U):  # type: ignore
+            raise DeserializationError("Date must have only digits and -. Received: %s" % attr)
+        return isodate.parse_time(attr)
+
+    @staticmethod
+    def deserialize_rfc(attr):
+        """Deserialize RFC-1123 formatted string into Datetime object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized RFC datetime
+        :rtype: Datetime
+        :raises DeserializationError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        try:
+            parsed_date = email.utils.parsedate_tz(attr)  # type: ignore
+            date_obj = datetime.datetime(
+                *parsed_date[:6], tzinfo=datetime.timezone(datetime.timedelta(minutes=(parsed_date[9] or 0) / 60))
+            )
+            if not date_obj.tzinfo:
+                date_obj = date_obj.astimezone(tz=TZ_UTC)
+        except ValueError as err:
+            msg = "Cannot deserialize to rfc datetime object."
+            raise DeserializationError(msg) from err
+        return date_obj
+
+    @staticmethod
+    def deserialize_iso(attr):
+        """Deserialize ISO-8601 formatted string into Datetime object.
+
+        :param str attr: response string to be deserialized.
+        :return: Deserialized ISO datetime
+        :rtype: Datetime
+        :raises DeserializationError: if string format invalid.
+        """
+        if isinstance(attr, ET.Element):
+            attr = attr.text
+        try:
+            attr = attr.upper()  # type: ignore
+            match = Deserializer.valid_date.match(attr)
+            if not match:
+                raise ValueError("Invalid datetime string: " + attr)
+
+            check_decimal = attr.split(".")
+            if len(check_decimal) > 1:
+                decimal_str = ""
+                for digit in check_decimal[1]:
+                    if digit.isdigit():
+                        decimal_str += digit
+                    else:
+                        break
+                if len(decimal_str) > 6:
+                    attr = attr.replace(decimal_str, decimal_str[0:6])
+
+            date_obj = isodate.parse_datetime(attr)
+            test_utc = date_obj.utctimetuple()
+            if test_utc.tm_year > 9999 or test_utc.tm_year < 1:
+                raise OverflowError("Hit max or min date")
+        except (ValueError, OverflowError, AttributeError) as err:
+            msg = "Cannot deserialize datetime object."
+            raise DeserializationError(msg) from err
+        return date_obj
+
+    @staticmethod
+    def deserialize_unix(attr):
+        """Serialize Datetime object into IntTime format.
+        This is represented as seconds.
+
+        :param int attr: Object to be serialized.
+        :return: Deserialized datetime
+        :rtype: Datetime
+        :raises DeserializationError: if format invalid
+        """
+        if isinstance(attr, ET.Element):
+            attr = int(attr.text)  # type: ignore
+        try:
+            attr = int(attr)
+            date_obj = datetime.datetime.fromtimestamp(attr, TZ_UTC)
+        except ValueError as err:
+            msg = "Cannot deserialize to unix datetime object."
+            raise DeserializationError(msg) from err
+        return date_obj
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_vendor.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_vendor.py
new file mode 100644
index 00000000..147e96be
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_vendor.py
@@ -0,0 +1,47 @@
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from abc import ABC
+from typing import TYPE_CHECKING
+
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+
+if TYPE_CHECKING:
+    from azure.core import PipelineClient
+
+    from ._serialization import Deserializer, Serializer
+
+
+class ChatCompletionsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "PipelineClient"
+    _config: ChatCompletionsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class EmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "PipelineClient"
+    _config: EmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class ImageEmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "PipelineClient"
+    _config: ImageEmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/_version.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/_version.py
new file mode 100644
index 00000000..b1c2836b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/_version.py
@@ -0,0 +1,9 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+VERSION = "1.0.0b9"
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py
new file mode 100644
index 00000000..668f989a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/__init__.py
@@ -0,0 +1,33 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._client import ChatCompletionsClient  # type: ignore
+from ._client import EmbeddingsClient  # type: ignore
+from ._client import ImageEmbeddingsClient  # type: ignore
+
+try:
+    from ._patch import __all__ as _patch_all
+    from ._patch import *
+except ImportError:
+    _patch_all = []
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py
new file mode 100644
index 00000000..88e6773b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_client.py
@@ -0,0 +1,280 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from copy import deepcopy
+from typing import Any, Awaitable, TYPE_CHECKING, Union
+from typing_extensions import Self
+
+from azure.core import AsyncPipelineClient
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+from azure.core.rest import AsyncHttpResponse, HttpRequest
+
+from .._serialization import Deserializer, Serializer
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+from ._operations import (
+    ChatCompletionsClientOperationsMixin,
+    EmbeddingsClientOperationsMixin,
+    ImageEmbeddingsClientOperationsMixin,
+)
+
+if TYPE_CHECKING:
+    from azure.core.credentials_async import AsyncTokenCredential
+
+
+class ChatCompletionsClient(ChatCompletionsClientOperationsMixin):
+    """ChatCompletionsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ChatCompletionsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
+
+
+class EmbeddingsClient(EmbeddingsClientOperationsMixin):
+    """EmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = EmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        _endpoint = "{endpoint}"
+        self._config = ImageEmbeddingsClientConfiguration(endpoint=endpoint, credential=credential, **kwargs)
+        _policies = kwargs.pop("policies", None)
+        if _policies is None:
+            _policies = [
+                policies.RequestIdPolicy(**kwargs),
+                self._config.headers_policy,
+                self._config.user_agent_policy,
+                self._config.proxy_policy,
+                policies.ContentDecodePolicy(**kwargs),
+                self._config.redirect_policy,
+                self._config.retry_policy,
+                self._config.authentication_policy,
+                self._config.custom_hook_policy,
+                self._config.logging_policy,
+                policies.DistributedTracingPolicy(**kwargs),
+                policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
+                self._config.http_logging_policy,
+            ]
+        self._client: AsyncPipelineClient = AsyncPipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
+
+        self._serialize = Serializer()
+        self._deserialize = Deserializer()
+        self._serialize.client_side_validation = False
+
+    def send_request(
+        self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
+    ) -> Awaitable[AsyncHttpResponse]:
+        """Runs the network request through the client's chained policies.
+
+        >>> from azure.core.rest import HttpRequest
+        >>> request = HttpRequest("GET", "https://www.example.org/")
+        <HttpRequest [GET], url: 'https://www.example.org/'>
+        >>> response = await client.send_request(request)
+        <AsyncHttpResponse: 200 OK>
+
+        For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
+
+        :param request: The network request you want to make. Required.
+        :type request: ~azure.core.rest.HttpRequest
+        :keyword bool stream: Whether the response payload will be streamed. Defaults to False.
+        :return: The response of your network call. Does not do error handling on your response.
+        :rtype: ~azure.core.rest.AsyncHttpResponse
+        """
+
+        request_copy = deepcopy(request)
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+
+        request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
+        return self._client.send_request(request_copy, stream=stream, **kwargs)  # type: ignore
+
+    async def close(self) -> None:
+        await self._client.close()
+
+    async def __aenter__(self) -> Self:
+        await self._client.__aenter__()
+        return self
+
+    async def __aexit__(self, *exc_details: Any) -> None:
+        await self._client.__aexit__(*exc_details)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py
new file mode 100644
index 00000000..f60e1125
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_configuration.py
@@ -0,0 +1,197 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from typing import Any, TYPE_CHECKING, Union
+
+from azure.core.credentials import AzureKeyCredential
+from azure.core.pipeline import policies
+
+from .._version import VERSION
+
+if TYPE_CHECKING:
+    from azure.core.credentials_async import AsyncTokenCredential
+
+
+class ChatCompletionsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ChatCompletionsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class EmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for EmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
+
+
+class ImageEmbeddingsClientConfiguration:  # pylint: disable=too-many-instance-attributes
+    """Configuration for ImageEmbeddingsClient.
+
+    Note that all parameters used to create this instance are saved as instance
+    attributes.
+
+    :param endpoint: Service host. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a key
+     credential type or a token credential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self, endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+    ) -> None:
+        api_version: str = kwargs.pop("api_version", "2024-05-01-preview")
+
+        if endpoint is None:
+            raise ValueError("Parameter 'endpoint' must not be None.")
+        if credential is None:
+            raise ValueError("Parameter 'credential' must not be None.")
+
+        self.endpoint = endpoint
+        self.credential = credential
+        self.api_version = api_version
+        self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
+        kwargs.setdefault("sdk_moniker", "ai-inference/{}".format(VERSION))
+        self.polling_interval = kwargs.get("polling_interval", 30)
+        self._configure(**kwargs)
+
+    def _infer_policy(self, **kwargs):
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "Authorization", prefix="Bearer", **kwargs)
+        if isinstance(self.credential, AzureKeyCredential):
+            return policies.AzureKeyCredentialPolicy(self.credential, "api-key", **kwargs)
+        if hasattr(self.credential, "get_token"):
+            return policies.AsyncBearerTokenCredentialPolicy(self.credential, *self.credential_scopes, **kwargs)
+        raise TypeError(f"Unsupported credential: {self.credential}")
+
+    def _configure(self, **kwargs: Any) -> None:
+        self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
+        self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
+        self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
+        self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
+        self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
+        self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
+        self.redirect_policy = kwargs.get("redirect_policy") or policies.AsyncRedirectPolicy(**kwargs)
+        self.retry_policy = kwargs.get("retry_policy") or policies.AsyncRetryPolicy(**kwargs)
+        self.authentication_policy = kwargs.get("authentication_policy")
+        if self.credential and not self.authentication_policy:
+            self.authentication_policy = self._infer_policy(**kwargs)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py
new file mode 100644
index 00000000..ab870887
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/__init__.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+from ._operations import ChatCompletionsClientOperationsMixin  # type: ignore
+from ._operations import EmbeddingsClientOperationsMixin  # type: ignore
+from ._operations import ImageEmbeddingsClientOperationsMixin  # type: ignore
+
+from ._patch import __all__ as _patch_all
+from ._patch import *
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "ChatCompletionsClientOperationsMixin",
+    "EmbeddingsClientOperationsMixin",
+    "ImageEmbeddingsClientOperationsMixin",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
new file mode 100644
index 00000000..62ec772f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_operations.py
@@ -0,0 +1,781 @@
+# pylint: disable=too-many-locals
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+from io import IOBase
+import json
+import sys
+from typing import Any, Callable, Dict, IO, List, Optional, TypeVar, Union, overload
+
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+    StreamClosedError,
+    StreamConsumedError,
+    map_error,
+)
+from azure.core.pipeline import PipelineResponse
+from azure.core.rest import AsyncHttpResponse, HttpRequest
+from azure.core.tracing.decorator_async import distributed_trace_async
+from azure.core.utils import case_insensitive_dict
+
+from ... import models as _models
+from ..._model_base import SdkJSONEncoder, _deserialize
+from ..._operations._operations import (
+    build_chat_completions_complete_request,
+    build_chat_completions_get_model_info_request,
+    build_embeddings_embed_request,
+    build_embeddings_get_model_info_request,
+    build_image_embeddings_embed_request,
+    build_image_embeddings_get_model_info_request,
+)
+from .._vendor import ChatCompletionsClientMixinABC, EmbeddingsClientMixinABC, ImageEmbeddingsClientMixinABC
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+T = TypeVar("T")
+ClsType = Optional[Callable[[PipelineResponse[HttpRequest, AsyncHttpResponse], T, Dict[str, Any]], Any]]
+
+
+class ChatCompletionsClientOperationsMixin(ChatCompletionsClientMixinABC):
+
+    @overload
+    async def _complete(
+        self,
+        *,
+        messages: List[_models._models.ChatRequestMessage],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    async def _complete(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+    @overload
+    async def _complete(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.ChatCompletions: ...
+
+    @distributed_trace_async
+    async def _complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: List[_models._models.ChatRequestMessage] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        frequency_penalty: Optional[float] = None,
+        stream_parameter: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.ChatCompletions:
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes"
+        provided prompt data. The method makes a REST API call to the ``/chat/completions`` route
+        on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative
+         frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword stream_parameter: A value indicating whether chat completions should be streamed for
+         this request. Default value is None.
+        :paramtype stream_parameter: bool
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the
+         model's likelihood to output new topics.
+         Supported range is [-2, 2]. Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1]. Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: An object specifying the format that the model must output.
+
+         Setting to ``{ "type": "json_schema", "json_schema": {...} }`` enables Structured Outputs
+         which ensures the model will match your supplied JSON schema.
+
+         Setting to ``{ "type": "json_object" }`` enables JSON mode, which ensures the message the
+         model generates is valid JSON.
+
+         **Important:** when using JSON mode, you **must** also instruct the model to produce JSON
+         yourself via a system or user message. Without this, the model may generate an unending stream
+         of whitespace until the generation reaches the token limit, resulting in a long-running and
+         seemingly "stuck" request. Also note that the message content may be partially cut off if
+         ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the
+         conversation exceeded the max context length. Default value is None.
+        :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: A list of tools the model may request to call. Currently, only functions are
+         supported as a tool. The model
+         may response with a function call request and provide the input arguments in JSON format for
+         that function. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed. Default
+         value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: ChatCompletions. The ChatCompletions is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.ChatCompletions] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "frequency_penalty": frequency_penalty,
+                "max_tokens": max_tokens,
+                "messages": messages,
+                "model": model,
+                "presence_penalty": presence_penalty,
+                "response_format": response_format,
+                "seed": seed,
+                "stop": stop,
+                "stream": stream_parameter,
+                "temperature": temperature,
+                "tool_choice": tool_choice,
+                "tools": tools,
+                "top_p": top_p,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ChatCompletions, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_chat_completions_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class EmbeddingsClientOperationsMixin(EmbeddingsClientMixinABC):
+
+    @overload
+    async def _embed(
+        self,
+        *,
+        input: List[str],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace_async
+    async def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the ``/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings. Known
+         values are: "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+
+class ImageEmbeddingsClientOperationsMixin(ImageEmbeddingsClientMixinABC):
+
+    @overload
+    async def _embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: JSON,
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+    @overload
+    async def _embed(
+        self,
+        body: IO[bytes],
+        *,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult: ...
+
+    @distributed_trace_async
+    async def _embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        **kwargs: Any
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the ``/images/embeddings`` route on the given endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API,
+         are passed in the JSON request payload.
+         This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and
+         "pass-through". Default value is None.
+        :paramtype extra_params: str or ~azure.ai.inference.models.ExtraParameters
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Default value is
+         None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The number of dimensions the resulting output embeddings
+         should have.
+         Passing null causes the model to use its default value.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input.
+         Returns a 422 error if the model doesn't support the value or parameter. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+        cls: ClsType[_models.EmbeddingsResult] = kwargs.pop("cls", None)
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "dimensions": dimensions,
+                "encoding_format": encoding_format,
+                "input": input,
+                "input_type": input_type,
+                "model": model,
+            }
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=extra_params,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.EmbeddingsResult, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def _get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[_models.ModelInfo] = kwargs.pop("cls", None)
+
+        _request = build_image_embeddings_get_model_info_request(
+            api_version=self._config.api_version,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                try:
+                    await response.read()  # Load the body in memory and close the socket
+                except (StreamConsumedError, StreamClosedError):
+                    pass
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(_models.ModelInfo, response.json())
+
+        if cls:
+            return cls(pipeline_response, deserialized, {})  # type: ignore
+
+        return deserialized  # type: ignore
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py
new file mode 100644
index 00000000..f7dd3251
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_operations/_patch.py
@@ -0,0 +1,20 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+from typing import List
+
+__all__: List[str] = []  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py
new file mode 100644
index 00000000..2f987380
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_patch.py
@@ -0,0 +1,1331 @@
+# pylint: disable=too-many-lines
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+import json
+import logging
+import sys
+
+from io import IOBase
+from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, AsyncIterable
+
+from azure.core.pipeline import PipelineResponse
+from azure.core.credentials import AzureKeyCredential
+from azure.core.tracing.decorator_async import distributed_trace_async
+from azure.core.utils import case_insensitive_dict
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    HttpResponseError,
+    map_error,
+    ResourceExistsError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+)
+from .. import models as _models
+from .._model_base import SdkJSONEncoder, _deserialize
+from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated
+from ._client import EmbeddingsClient as EmbeddingsClientGenerated
+from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated
+from .._operations._operations import (
+    build_chat_completions_complete_request,
+    build_embeddings_embed_request,
+    build_image_embeddings_embed_request,
+)
+from .._patch import _get_internal_response_format
+
+if TYPE_CHECKING:
+    # pylint: disable=unused-import,ungrouped-imports
+    from azure.core.credentials_async import AsyncTokenCredential
+
+if sys.version_info >= (3, 9):
+    from collections.abc import MutableMapping
+else:
+    from typing import MutableMapping  # type: ignore  # pylint: disable=ungrouped-imports
+
+JSON = MutableMapping[str, Any]  # pylint: disable=unsubscriptable-object
+_Unset: Any = object()
+_LOGGER = logging.getLogger(__name__)
+
+
+async def load_client(
+    endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any
+) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]:
+    """
+    Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route
+    on the given endpoint, to determine the model type and therefore which client to instantiate.
+    This method will only work when using Serverless API or Managed Compute endpoint.
+    It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+    Keyword arguments are passed through to the client constructor (you can set keywords such as
+    `api_version`, `user_agent`, `logging_enable` etc. on the client constructor).
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :return: The appropriate asynchronous client associated with the given endpoint
+    :rtype: ~azure.ai.inference.aio.ChatCompletionsClient or ~azure.ai.inference.aio.EmbeddingsClient
+     or ~azure.ai.inference.aio.ImageEmbeddingsClient
+    :raises ~azure.core.exceptions.HttpResponseError:
+    """
+
+    async with ChatCompletionsClient(
+        endpoint, credential, **kwargs
+    ) as client:  # Pick any of the clients, it does not matter.
+        try:
+            model_info = await client.get_model_info()  # type: ignore
+        except ResourceNotFoundError as error:
+            error.message = (
+                "`load_client` function does not work on this endpoint (`/info` route not supported). "
+                "Please construct one of the clients (e.g. `ChatCompletionsClient`) directly."
+            )
+            raise error
+
+    _LOGGER.info("model_info=%s", model_info)
+    if not model_info.model_type:
+        raise ValueError(
+            "The AI model information is missing a value for `model type`. Cannot create an appropriate client."
+        )
+
+    # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type
+    if model_info.model_type in (
+        _models.ModelType.CHAT_COMPLETION,
+        "chat_completions",
+        "chat",
+        "completion",
+        "chat-completion",
+        "chat-completions",
+        "chat completion",
+        "chat completions",
+    ):
+        chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs)
+        chat_completion_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return chat_completion_client
+
+    if model_info.model_type in (
+        _models.ModelType.EMBEDDINGS,
+        "embedding",
+        "text_embedding",
+        "text-embeddings",
+        "text embedding",
+        "text embeddings",
+    ):
+        embedding_client = EmbeddingsClient(endpoint, credential, **kwargs)
+        embedding_client._model_info = model_info  # pylint: disable=protected-access,attribute-defined-outside-init
+        return embedding_client
+
+    if model_info.model_type in (
+        _models.ModelType.IMAGE_EMBEDDINGS,
+        "image_embedding",
+        "image-embeddings",
+        "image-embedding",
+        "image embedding",
+        "image embeddings",
+    ):
+        image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs)
+        image_embedding_client._model_info = (  # pylint: disable=protected-access,attribute-defined-outside-init
+            model_info
+        )
+        return image_embedding_client
+
+    raise ValueError(f"No client available to support AI model type `{model_info.model_type}`")
+
+
+class ChatCompletionsClient(ChatCompletionsClientGenerated):  # pylint: disable=too-many-instance-attributes
+    """ChatCompletionsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword frequency_penalty: A value that influences the probability of generated tokens
+        appearing based on their cumulative frequency in generated text.
+        Positive values will make tokens less likely to appear as their frequency increases and
+        decrease the likelihood of the model repeating the same statements verbatim.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype frequency_penalty: float
+    :keyword presence_penalty: A value that influences the probability of generated tokens
+        appearing based on their existing
+        presence in generated text.
+        Positive values will make tokens less likely to appear when they already exist and increase
+        the model's likelihood to output new topics.
+        Supported range is [-2, 2].
+        Default value is None.
+    :paramtype presence_penalty: float
+    :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+        generated completions.
+        Higher values will make output more random while lower values will make results more focused
+        and deterministic.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype temperature: float
+    :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+        causes the
+        model to consider the results of tokens with the provided probability mass. As an example, a
+        value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        considered.
+        It is not recommended to modify temperature and top_p for the same completions request as the
+        interaction of these two settings is difficult to predict.
+        Supported range is [0, 1].
+        Default value is None.
+    :paramtype top_p: float
+    :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+    :paramtype max_tokens: int
+    :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+        unformatted text by default. This is equivalent to setting "text" as the response_format.
+        To output JSON format, without adhering to any schema, set to "json_object".
+        To output JSON format adhering to a provided schema, set this to an object of the class
+        ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+    :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+    :keyword stop: A collection of textual sequences that will end completions generation. Default
+        value is None.
+    :paramtype stop: list[str]
+    :keyword tools: The available tool definitions that the chat completions request can use,
+        including caller-defined functions. Default value is None.
+    :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+    :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+        use for the chat completions response. Is either a Union[str,
+        "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+        Default value is None.
+    :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+        ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+    :keyword seed: If specified, the system will make a best effort to sample deterministically
+        such that repeated requests with the
+        same seed and parameters should return the same result. Determinism is not guaranteed.
+        Default value is None.
+    :paramtype seed: int
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default chat completions settings, to be applied in all future service calls
+        # unless overridden by arguments in the `complete` method.
+        self._frequency_penalty = frequency_penalty
+        self._presence_penalty = presence_penalty
+        self._temperature = temperature
+        self._top_p = top_p
+        self._max_tokens = max_tokens
+        self._internal_response_format = _get_internal_response_format(response_format)
+        self._stop = stop
+        self._tools = tools
+        self._tool_choice = tool_choice
+        self._seed = seed
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Literal[False] = False,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.ChatCompletions: ...
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Literal[True],
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> AsyncIterable[_models.StreamingChatCompletionsUpdate]: ...
+
+    @overload
+    async def complete(
+        self,
+        *,
+        messages: List[_models.ChatRequestMessage],
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route
+        on the given endpoint.
+        When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting StreamingChatCompletions
+        object to get content updates as they arrive. By default, the response is a ChatCompletions object
+        (non-streaming).
+
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def complete(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def complete(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    # pylint:disable=client-method-missing-tracing-decorator-async
+    async def complete(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        messages: List[_models.ChatRequestMessage] = _Unset,
+        stream: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None,
+        tool_choice: Optional[
+            Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice]
+        ] = None,
+        seed: Optional[int] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]:
+        # pylint: disable=line-too-long
+        # pylint: disable=too-many-locals
+        """Gets chat completions for the provided chat messages.
+        Completions support a wide variety of tasks and generate text that continues from or
+        "completes" provided prompt data. When using this method with `stream=True`, the response is streamed
+        back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions`
+        object to get content updates as they arrive.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword messages: The collection of context messages associated with this chat completions
+         request.
+         Typical usage begins with a chat message for the System role that provides instructions for
+         the behavior of the assistant, followed by alternating messages between the User and
+         Assistant roles. Required.
+        :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage]
+        :keyword stream: A value indicating whether chat completions should be streamed for this request.
+         Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions.
+         Otherwise the response will be a ChatCompletions.
+        :paramtype stream: bool
+        :keyword frequency_penalty: A value that influences the probability of generated tokens
+         appearing based on their cumulative frequency in generated text.
+         Positive values will make tokens less likely to appear as their frequency increases and
+         decrease the likelihood of the model repeating the same statements verbatim.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype frequency_penalty: float
+        :keyword presence_penalty: A value that influences the probability of generated tokens
+         appearing based on their existing
+         presence in generated text.
+         Positive values will make tokens less likely to appear when they already exist and increase
+         the model's likelihood to output new topics.
+         Supported range is [-2, 2].
+         Default value is None.
+        :paramtype presence_penalty: float
+        :keyword temperature: The sampling temperature to use that controls the apparent creativity of
+         generated completions.
+         Higher values will make output more random while lower values will make results more focused
+         and deterministic.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype temperature: float
+        :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value
+         causes the
+         model to consider the results of tokens with the provided probability mass. As an example, a
+         value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+         considered.
+         It is not recommended to modify temperature and top_p for the same completions request as the
+         interaction of these two settings is difficult to predict.
+         Supported range is [0, 1].
+         Default value is None.
+        :paramtype top_p: float
+        :keyword max_tokens: The maximum number of tokens to generate. Default value is None.
+        :paramtype max_tokens: int
+        :keyword response_format: The format that the AI model must output. AI chat completions models typically output
+         unformatted text by default. This is equivalent to setting "text" as the response_format.
+         To output JSON format, without adhering to any schema, set to "json_object".
+         To output JSON format adhering to a provided schema, set this to an object of the class
+         ~azure.ai.inference.models.JsonSchemaFormat. Default value is None.
+        :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat]
+        :keyword stop: A collection of textual sequences that will end completions generation. Default
+         value is None.
+        :paramtype stop: list[str]
+        :keyword tools: The available tool definitions that the chat completions request can use,
+         including caller-defined functions. Default value is None.
+        :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition]
+        :keyword tool_choice: If specified, the model will configure which of the provided tools it can
+         use for the chat completions response. Is either a Union[str,
+         "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type.
+         Default value is None.
+        :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or
+         ~azure.ai.inference.models.ChatCompletionsNamedToolChoice
+        :keyword seed: If specified, the system will make a best effort to sample deterministically
+         such that repeated requests with the
+         same seed and parameters should return the same result. Determinism is not guaranteed.
+         Default value is None.
+        :paramtype seed: int
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming.
+        :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        internal_response_format = _get_internal_response_format(response_format)
+
+        if body is _Unset:
+            if messages is _Unset:
+                raise TypeError("missing required argument: messages")
+            body = {
+                "messages": messages,
+                "stream": stream,
+                "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty,
+                "max_tokens": max_tokens if max_tokens is not None else self._max_tokens,
+                "model": model if model is not None else self._model,
+                "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty,
+                "response_format": (
+                    internal_response_format if internal_response_format is not None else self._internal_response_format
+                ),
+                "seed": seed if seed is not None else self._seed,
+                "stop": stop if stop is not None else self._stop,
+                "temperature": temperature if temperature is not None else self._temperature,
+                "tool_choice": tool_choice if tool_choice is not None else self._tool_choice,
+                "tools": tools if tools is not None else self._tools,
+                "top_p": top_p if top_p is not None else self._top_p,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool):
+            stream = body["stream"]
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_chat_completions_complete_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = stream or False
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            return _models.AsyncStreamingChatCompletions(response)
+
+        return _deserialize(_models._patch.ChatCompletions, response.json())  # pylint: disable=protected-access
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class EmbeddingsClient(EmbeddingsClientGenerated):
+    """EmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def embed(
+        self,
+        *,
+        input: List[str],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace_async
+    async def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[str] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given text prompts.
+        The method makes a REST API call to the `/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input text to embed, encoded as a string or array of tokens.
+         To embed multiple inputs in a single request, pass an array
+         of strings or array of token arrays. Required.
+        :paramtype input: list[str]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated):
+    """ImageEmbeddingsClient.
+
+    :param endpoint: Service endpoint URL for AI model inference. Required.
+    :type endpoint: str
+    :param credential: Credential used to authenticate requests to the service. Is either a
+     AzureKeyCredential type or a AsyncTokenCredential type. Required.
+    :type credential: ~azure.core.credentials.AzureKeyCredential or
+     ~azure.core.credentials_async.AsyncTokenCredential
+    :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+        have. Default value is None.
+    :paramtype dimensions: int
+    :keyword encoding_format: Optional. The desired format for the returned embeddings.
+        Known values are:
+        "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+    :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+    :keyword input_type: Optional. The type of the input. Known values are:
+        "text", "query", and "document". Default value is None.
+    :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+    :keyword model: ID of the specific AI model to use, if more than one model is available on the
+        endpoint. Default value is None.
+    :paramtype model: str
+    :keyword model_extras: Additional, model-specific parameters that are not in the
+        standard request payload. They will be added as-is to the root of the JSON in the request body.
+        How the service handles these extra parameters depends on the value of the
+        ``extra-parameters`` request header. Default value is None.
+    :paramtype model_extras: dict[str, Any]
+    :keyword api_version: The API version to use for this operation. Default value is
+     "2024-05-01-preview". Note that overriding this default value may result in unsupported
+     behavior.
+    :paramtype api_version: str
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        credential: Union[AzureKeyCredential, "AsyncTokenCredential"],
+        *,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+
+        self._model_info: Optional[_models.ModelInfo] = None
+
+        # Store default embeddings settings, to be applied in all future service calls
+        # unless overridden by arguments in the `embed` method.
+        self._dimensions = dimensions
+        self._encoding_format = encoding_format
+        self._input_type = input_type
+        self._model = model
+        self._model_extras = model_extras
+
+        # For Key auth, we need to send these two auth HTTP request headers simultaneously:
+        # 1. "Authorization: Bearer <key>"
+        # 2. "api-key: <key>"
+        # This is because Serverless API, Managed Compute and GitHub endpoints support the first header,
+        # and Azure OpenAI and the new Unified Inference endpoints support the second header.
+        # The first header will be taken care of by auto-generated code.
+        # The second one is added here.
+        if isinstance(credential, AzureKeyCredential):
+            headers = kwargs.pop("headers", {})
+            if "api-key" not in headers:
+                headers["api-key"] = credential.key
+            kwargs["headers"] = headers
+
+        super().__init__(endpoint, credential, **kwargs)
+
+    @overload
+    async def embed(
+        self,
+        *,
+        input: List[_models.ImageEmbeddingInput],
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: An object of type MutableMapping[str, Any], such as a dictionary, that
+         specifies the full request payload. Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @overload
+    async def embed(
+        self,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Specifies the full request payload. Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+
+    @distributed_trace_async
+    async def embed(
+        self,
+        body: Union[JSON, IO[bytes]] = _Unset,
+        *,
+        input: List[_models.ImageEmbeddingInput] = _Unset,
+        dimensions: Optional[int] = None,
+        encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None,
+        input_type: Optional[Union[str, _models.EmbeddingInputType]] = None,
+        model: Optional[str] = None,
+        model_extras: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> _models.EmbeddingsResult:
+        # pylint: disable=line-too-long
+        """Return the embedding vectors for given images.
+        The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+
+        :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type
+         that specifies the full request payload. Required.
+        :type body: JSON or IO[bytes]
+        :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an
+         array.
+         The input must not exceed the max input tokens for the model. Required.
+        :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput]
+        :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should
+         have. Default value is None.
+        :paramtype dimensions: int
+        :keyword encoding_format: Optional. The desired format for the returned embeddings.
+         Known values are:
+         "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None.
+        :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat
+        :keyword input_type: Optional. The type of the input. Known values are:
+         "text", "query", and "document". Default value is None.
+        :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType
+        :keyword model: ID of the specific AI model to use, if more than one model is available on the
+         endpoint. Default value is None.
+        :paramtype model: str
+        :keyword model_extras: Additional, model-specific parameters that are not in the
+         standard request payload. They will be added as-is to the root of the JSON in the request body.
+         How the service handles these extra parameters depends on the value of the
+         ``extra-parameters`` request header. Default value is None.
+        :paramtype model_extras: dict[str, Any]
+        :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.EmbeddingsResult
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            401: ClientAuthenticationError,
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+        _extra_parameters: Union[_models._enums.ExtraParameters, None] = None
+
+        content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None))
+
+        if body is _Unset:
+            if input is _Unset:
+                raise TypeError("missing required argument: input")
+            body = {
+                "input": input,
+                "dimensions": dimensions if dimensions is not None else self._dimensions,
+                "encoding_format": encoding_format if encoding_format is not None else self._encoding_format,
+                "input_type": input_type if input_type is not None else self._input_type,
+                "model": model if model is not None else self._model,
+            }
+            if model_extras is not None and bool(model_extras):
+                body.update(model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            elif self._model_extras is not None and bool(self._model_extras):
+                body.update(self._model_extras)
+                _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH  # pylint: disable=protected-access
+            body = {k: v for k, v in body.items() if v is not None}
+        content_type = content_type or "application/json"
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True)  # type: ignore
+
+        _request = build_image_embeddings_embed_request(
+            extra_params=_extra_parameters,
+            content_type=content_type,
+            api_version=self._config.api_version,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        path_format_arguments = {
+            "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
+        }
+        _request.url = self._client.format_url(_request.url, **path_format_arguments)
+
+        _stream = kwargs.pop("stream", False)
+        pipeline_response: PipelineResponse = await self._client._pipeline.run(  # type: ignore # pylint: disable=protected-access
+            _request, stream=_stream, **kwargs
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)
+            raise HttpResponseError(response=response)
+
+        if _stream:
+            deserialized = response.iter_bytes()
+        else:
+            deserialized = _deserialize(
+                _models._patch.EmbeddingsResult, response.json()  # pylint: disable=protected-access
+            )
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo:
+        # pylint: disable=line-too-long
+        """Returns information about the AI model.
+        The method makes a REST API call to the ``/info`` route on the given endpoint.
+        This method will only work when using Serverless API or Managed Compute endpoint.
+        It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
+
+        :return: ModelInfo. The ModelInfo is compatible with MutableMapping
+        :rtype: ~azure.ai.inference.models.ModelInfo
+        :raises ~azure.core.exceptions.HttpResponseError:
+        """
+        if not self._model_info:
+            try:
+                self._model_info = await self._get_model_info(
+                    **kwargs
+                )  # pylint: disable=attribute-defined-outside-init
+            except ResourceNotFoundError as error:
+                error.message = "Model information is not available on this endpoint (`/info` route not supported)."
+                raise error
+
+        return self._model_info
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__()
+
+
+__all__: List[str] = [
+    "load_client",
+    "ChatCompletionsClient",
+    "EmbeddingsClient",
+    "ImageEmbeddingsClient",
+]  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py
new file mode 100644
index 00000000..b430582c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/aio/_vendor.py
@@ -0,0 +1,47 @@
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from abc import ABC
+from typing import TYPE_CHECKING
+
+from ._configuration import (
+    ChatCompletionsClientConfiguration,
+    EmbeddingsClientConfiguration,
+    ImageEmbeddingsClientConfiguration,
+)
+
+if TYPE_CHECKING:
+    from azure.core import AsyncPipelineClient
+
+    from .._serialization import Deserializer, Serializer
+
+
+class ChatCompletionsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: ChatCompletionsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class EmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: EmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
+
+
+class ImageEmbeddingsClientMixinABC(ABC):
+    """DO NOT use this class. It is for internal typing use only."""
+
+    _client: "AsyncPipelineClient"
+    _config: ImageEmbeddingsClientConfiguration
+    _serialize: "Serializer"
+    _deserialize: "Deserializer"
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/models/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/__init__.py
new file mode 100644
index 00000000..66e62570
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/__init__.py
@@ -0,0 +1,96 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=wrong-import-position
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._patch import *  # pylint: disable=unused-wildcard-import
+
+
+from ._models import (  # type: ignore
+    AudioContentItem,
+    ChatChoice,
+    ChatCompletions,
+    ChatCompletionsNamedToolChoice,
+    ChatCompletionsNamedToolChoiceFunction,
+    ChatCompletionsToolCall,
+    ChatCompletionsToolDefinition,
+    ChatResponseMessage,
+    CompletionsUsage,
+    ContentItem,
+    EmbeddingItem,
+    EmbeddingsResult,
+    EmbeddingsUsage,
+    FunctionCall,
+    FunctionDefinition,
+    ImageContentItem,
+    ImageEmbeddingInput,
+    ImageUrl,
+    InputAudio,
+    JsonSchemaFormat,
+    ModelInfo,
+    StreamingChatChoiceUpdate,
+    StreamingChatCompletionsUpdate,
+    StreamingChatResponseMessageUpdate,
+    StreamingChatResponseToolCallUpdate,
+    TextContentItem,
+)
+
+from ._enums import (  # type: ignore
+    AudioContentFormat,
+    ChatCompletionsToolChoicePreset,
+    ChatRole,
+    CompletionsFinishReason,
+    EmbeddingEncodingFormat,
+    EmbeddingInputType,
+    ImageDetailLevel,
+    ModelType,
+)
+from ._patch import __all__ as _patch_all
+from ._patch import *
+from ._patch import patch_sdk as _patch_sdk
+
+__all__ = [
+    "AudioContentItem",
+    "ChatChoice",
+    "ChatCompletions",
+    "ChatCompletionsNamedToolChoice",
+    "ChatCompletionsNamedToolChoiceFunction",
+    "ChatCompletionsToolCall",
+    "ChatCompletionsToolDefinition",
+    "ChatResponseMessage",
+    "CompletionsUsage",
+    "ContentItem",
+    "EmbeddingItem",
+    "EmbeddingsResult",
+    "EmbeddingsUsage",
+    "FunctionCall",
+    "FunctionDefinition",
+    "ImageContentItem",
+    "ImageEmbeddingInput",
+    "ImageUrl",
+    "InputAudio",
+    "JsonSchemaFormat",
+    "ModelInfo",
+    "StreamingChatChoiceUpdate",
+    "StreamingChatCompletionsUpdate",
+    "StreamingChatResponseMessageUpdate",
+    "StreamingChatResponseToolCallUpdate",
+    "TextContentItem",
+    "AudioContentFormat",
+    "ChatCompletionsToolChoicePreset",
+    "ChatRole",
+    "CompletionsFinishReason",
+    "EmbeddingEncodingFormat",
+    "EmbeddingInputType",
+    "ImageDetailLevel",
+    "ModelType",
+]
+__all__.extend([p for p in _patch_all if p not in __all__])  # pyright: ignore
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_enums.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_enums.py
new file mode 100644
index 00000000..6214f668
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_enums.py
@@ -0,0 +1,146 @@
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+
+from enum import Enum
+from azure.core import CaseInsensitiveEnumMeta
+
+
+class AudioContentFormat(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """A representation of the possible audio formats for audio."""
+
+    WAV = "wav"
+    """Specifies audio in WAV format."""
+    MP3 = "mp3"
+    """Specifies audio in MP3 format."""
+
+
+class ChatCompletionsToolChoicePreset(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """Represents a generic policy for how a chat completions tool may be selected."""
+
+    AUTO = "auto"
+    """Specifies that the model may either use any of the tools provided in this chat completions
+    request or
+    instead return a standard chat completions response as if no tools were provided."""
+    NONE = "none"
+    """Specifies that the model should not respond with a tool call and should instead provide a
+    standard chat
+    completions response. Response content may still be influenced by the provided tool
+    definitions."""
+    REQUIRED = "required"
+    """Specifies that the model should respond with a call to one or more tools."""
+
+
+class ChatRole(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """A description of the intended purpose of a message within a chat completions interaction."""
+
+    SYSTEM = "system"
+    """The role that instructs or sets the behavior of the assistant."""
+    USER = "user"
+    """The role that provides input for chat completions."""
+    ASSISTANT = "assistant"
+    """The role that provides responses to system-instructed, user-prompted input."""
+    TOOL = "tool"
+    """The role that represents extension tool activity within a chat completions operation."""
+    DEVELOPER = "developer"
+    """The role that instructs or sets the behavior of the assistant. Some AI models support this role
+    instead of the 'system' role."""
+
+
+class CompletionsFinishReason(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """Representation of the manner in which a completions response concluded."""
+
+    STOPPED = "stop"
+    """Completions ended normally and reached its end of token generation."""
+    TOKEN_LIMIT_REACHED = "length"
+    """Completions exhausted available token limits before generation could complete."""
+    CONTENT_FILTERED = "content_filter"
+    """Completions generated a response that was identified as potentially sensitive per content
+    moderation policies."""
+    TOOL_CALLS = "tool_calls"
+    """Completion ended with the model calling a provided tool for output."""
+
+
+class EmbeddingEncodingFormat(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """The format of the embeddings result.
+    Returns a 422 error if the model doesn't support the value or parameter.
+    """
+
+    BASE64 = "base64"
+    """Base64"""
+    BINARY = "binary"
+    """Binary"""
+    FLOAT = "float"
+    """Floating point"""
+    INT8 = "int8"
+    """Signed 8-bit integer"""
+    UBINARY = "ubinary"
+    """ubinary"""
+    UINT8 = "uint8"
+    """Unsigned 8-bit integer"""
+
+
+class EmbeddingInputType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """Represents the input types used for embedding search."""
+
+    TEXT = "text"
+    """Indicates the input is a general text input."""
+    QUERY = "query"
+    """Indicates the input represents a search query to find the most relevant documents in your
+    vector database."""
+    DOCUMENT = "document"
+    """Indicates the input represents a document that is stored in a vector database."""
+
+
+class ExtraParameters(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON
+    request payload.
+    """
+
+    ERROR = "error"
+    """The service will error if it detected extra parameters in the request payload. This is the
+    service default."""
+    DROP = "drop"
+    """The service will ignore (drop) extra parameters in the request payload. It will only pass the
+    known parameters to the back-end AI model."""
+    PASS_THROUGH = "pass-through"
+    """The service will pass extra parameters to the back-end AI model."""
+
+
+class ImageDetailLevel(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """A representation of the possible image detail levels for image-based chat completions message
+    content.
+    """
+
+    AUTO = "auto"
+    """Specifies that the model should determine which detail level to apply using heuristics like
+    image size."""
+    LOW = "low"
+    """Specifies that image evaluation should be constrained to the 'low-res' model that may be faster
+    and consume fewer
+    tokens but may also be less accurate for highly detailed images."""
+    HIGH = "high"
+    """Specifies that image evaluation should enable the 'high-res' model that may be more accurate
+    for highly detailed
+    images but may also be slower and consume more tokens."""
+
+
+class ModelType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
+    """The type of AI model."""
+
+    EMBEDDINGS = "embeddings"
+    """A model capable of generating embeddings from a text"""
+    IMAGE_GENERATION = "image_generation"
+    """A model capable of generating images from an image and text description"""
+    TEXT_GENERATION = "text_generation"
+    """A text generation model"""
+    IMAGE_EMBEDDINGS = "image_embeddings"
+    """A model capable of generating embeddings from an image"""
+    AUDIO_GENERATION = "audio_generation"
+    """A text-to-audio generative model"""
+    CHAT_COMPLETION = "chat_completion"
+    """A model capable of taking chat-formatted messages and generate responses"""
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_models.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_models.py
new file mode 100644
index 00000000..53934528
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_models.py
@@ -0,0 +1,1458 @@
+# pylint: disable=too-many-lines
+# coding=utf-8
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# Code generated by Microsoft (R) Python Code Generator.
+# Changes may cause incorrect behavior and will be lost if the code is regenerated.
+# --------------------------------------------------------------------------
+# pylint: disable=useless-super-delegation
+
+import datetime
+from typing import Any, Dict, List, Literal, Mapping, Optional, TYPE_CHECKING, Union, overload
+
+from .. import _model_base
+from .._model_base import rest_discriminator, rest_field
+from ._enums import ChatRole
+
+if TYPE_CHECKING:
+    from .. import models as _models
+
+
+class ContentItem(_model_base.Model):
+    """An abstract representation of a structured content item within a chat message.
+
+    You probably want to use the sub-classes and not this class directly. Known sub-classes are:
+    ImageContentItem, AudioContentItem, TextContentItem
+
+    :ivar type: The discriminated object type. Required. Default value is None.
+    :vartype type: str
+    """
+
+    __mapping__: Dict[str, _model_base.Model] = {}
+    type: str = rest_discriminator(name="type")
+    """The discriminated object type. Required. Default value is None."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        type: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class AudioContentItem(ContentItem, discriminator="input_audio"):
+    """A structured chat content item containing an audio content.
+
+    :ivar type: The discriminated object type: always 'input_audio' for this type. Required.
+     Default value is "input_audio".
+    :vartype type: str
+    :ivar input_audio: The details of the input audio. Required.
+    :vartype input_audio: ~azure.ai.inference.models.InputAudio
+    """
+
+    type: Literal["input_audio"] = rest_discriminator(name="type")  # type: ignore
+    """The discriminated object type: always 'input_audio' for this type. Required. Default value is
+     \"input_audio\"."""
+    input_audio: "_models.InputAudio" = rest_field()
+    """The details of the input audio. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        input_audio: "_models.InputAudio",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="input_audio", **kwargs)
+
+
+class ChatChoice(_model_base.Model):
+    """The representation of a single prompt completion as part of an overall chat completions
+    request.
+    Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+    Token limits and other settings may limit the number of choices generated.
+
+
+    :ivar index: The ordered index associated with this chat completions choice. Required.
+    :vartype index: int
+    :ivar finish_reason: The reason that this chat completions choice completed its generated.
+     Required. Known values are: "stop", "length", "content_filter", and "tool_calls".
+    :vartype finish_reason: str or ~azure.ai.inference.models.CompletionsFinishReason
+    :ivar message: The chat message for a given chat completions prompt. Required.
+    :vartype message: ~azure.ai.inference.models.ChatResponseMessage
+    """
+
+    index: int = rest_field()
+    """The ordered index associated with this chat completions choice. Required."""
+    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field()
+    """The reason that this chat completions choice completed its generated. Required. Known values
+     are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\"."""
+    message: "_models.ChatResponseMessage" = rest_field()
+    """The chat message for a given chat completions prompt. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        index: int,
+        finish_reason: Union[str, "_models.CompletionsFinishReason"],
+        message: "_models.ChatResponseMessage",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ChatCompletions(_model_base.Model):
+    """Representation of the response data from a chat completions request.
+    Completions support a wide variety of tasks and generate text that continues from or
+    "completes"
+    provided prompt data.
+
+
+    :ivar id: A unique identifier associated with this chat completions response. Required.
+    :vartype id: str
+    :ivar created: The first timestamp associated with generation activity for this completions
+     response,
+     represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.
+    :vartype created: ~datetime.datetime
+    :ivar model: The model used for the chat completion. Required.
+    :vartype model: str
+    :ivar choices: The collection of completions choices associated with this completions response.
+     Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+     Token limits and other settings may limit the number of choices generated. Required.
+    :vartype choices: list[~azure.ai.inference.models.ChatChoice]
+    :ivar usage: Usage information for tokens processed and generated as part of this completions
+     operation. Required.
+    :vartype usage: ~azure.ai.inference.models.CompletionsUsage
+    """
+
+    id: str = rest_field()
+    """A unique identifier associated with this chat completions response. Required."""
+    created: datetime.datetime = rest_field(format="unix-timestamp")
+    """The first timestamp associated with generation activity for this completions response,
+     represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required."""
+    model: str = rest_field()
+    """The model used for the chat completion. Required."""
+    choices: List["_models.ChatChoice"] = rest_field()
+    """The collection of completions choices associated with this completions response.
+     Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+     Token limits and other settings may limit the number of choices generated. Required."""
+    usage: "_models.CompletionsUsage" = rest_field()
+    """Usage information for tokens processed and generated as part of this completions operation.
+     Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        id: str,  # pylint: disable=redefined-builtin
+        created: datetime.datetime,
+        model: str,
+        choices: List["_models.ChatChoice"],
+        usage: "_models.CompletionsUsage",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ChatCompletionsNamedToolChoice(_model_base.Model):
+    """A tool selection of a specific, named function tool that will limit chat completions to using
+    the named function.
+
+    :ivar type: The type of the tool. Currently, only ``function`` is supported. Required. Default
+     value is "function".
+    :vartype type: str
+    :ivar function: The function that should be called. Required.
+    :vartype function: ~azure.ai.inference.models.ChatCompletionsNamedToolChoiceFunction
+    """
+
+    type: Literal["function"] = rest_field()
+    """The type of the tool. Currently, only ``function`` is supported. Required. Default value is
+     \"function\"."""
+    function: "_models.ChatCompletionsNamedToolChoiceFunction" = rest_field()
+    """The function that should be called. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        function: "_models.ChatCompletionsNamedToolChoiceFunction",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type: Literal["function"] = "function"
+
+
+class ChatCompletionsNamedToolChoiceFunction(_model_base.Model):
+    """A tool selection of a specific, named function tool that will limit chat completions to using
+    the named function.
+
+    :ivar name: The name of the function that should be called. Required.
+    :vartype name: str
+    """
+
+    name: str = rest_field()
+    """The name of the function that should be called. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        name: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ChatCompletionsResponseFormat(_model_base.Model):
+    """Represents the format that the model must output. Use this to enable JSON mode instead of the
+    default text mode.
+    Note that to enable JSON mode, some AI models may also require you to instruct the model to
+    produce JSON
+    via a system or user message.
+
+    You probably want to use the sub-classes and not this class directly. Known sub-classes are:
+    ChatCompletionsResponseFormatJsonObject, ChatCompletionsResponseFormatJsonSchema,
+    ChatCompletionsResponseFormatText
+
+    :ivar type: The response format type to use for chat completions. Required. Default value is
+     None.
+    :vartype type: str
+    """
+
+    __mapping__: Dict[str, _model_base.Model] = {}
+    type: str = rest_discriminator(name="type")
+    """The response format type to use for chat completions. Required. Default value is None."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        type: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ChatCompletionsResponseFormatJsonObject(ChatCompletionsResponseFormat, discriminator="json_object"):
+    """A response format for Chat Completions that restricts responses to emitting valid JSON objects.
+    Note that to enable JSON mode, some AI models may also require you to instruct the model to
+    produce JSON
+    via a system or user message.
+
+    :ivar type: Response format type: always 'json_object' for this object. Required. Default value
+     is "json_object".
+    :vartype type: str
+    """
+
+    type: Literal["json_object"] = rest_discriminator(name="type")  # type: ignore
+    """Response format type: always 'json_object' for this object. Required. Default value is
+     \"json_object\"."""
+
+    @overload
+    def __init__(
+        self,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="json_object", **kwargs)
+
+
+class ChatCompletionsResponseFormatJsonSchema(ChatCompletionsResponseFormat, discriminator="json_schema"):
+    """A response format for Chat Completions that restricts responses to emitting valid JSON objects,
+    with a
+    JSON schema specified by the caller.
+
+    :ivar type: The type of response format being defined: ``json_schema``. Required. Default value
+     is "json_schema".
+    :vartype type: str
+    :ivar json_schema: The definition of the required JSON schema in the response, and associated
+     metadata. Required.
+    :vartype json_schema: ~azure.ai.inference.models.JsonSchemaFormat
+    """
+
+    type: Literal["json_schema"] = rest_discriminator(name="type")  # type: ignore
+    """The type of response format being defined: ``json_schema``. Required. Default value is
+     \"json_schema\"."""
+    json_schema: "_models.JsonSchemaFormat" = rest_field()
+    """The definition of the required JSON schema in the response, and associated metadata. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        json_schema: "_models.JsonSchemaFormat",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="json_schema", **kwargs)
+
+
+class ChatCompletionsResponseFormatText(ChatCompletionsResponseFormat, discriminator="text"):
+    """A response format for Chat Completions that emits text responses. This is the default response
+    format.
+
+    :ivar type: Response format type: always 'text' for this object. Required. Default value is
+     "text".
+    :vartype type: str
+    """
+
+    type: Literal["text"] = rest_discriminator(name="type")  # type: ignore
+    """Response format type: always 'text' for this object. Required. Default value is \"text\"."""
+
+    @overload
+    def __init__(
+        self,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="text", **kwargs)
+
+
+class ChatCompletionsToolCall(_model_base.Model):
+    """A function tool call requested by the AI model.
+
+    :ivar id: The ID of the tool call. Required.
+    :vartype id: str
+    :ivar type: The type of tool call. Currently, only ``function`` is supported. Required. Default
+     value is "function".
+    :vartype type: str
+    :ivar function: The details of the function call requested by the AI model. Required.
+    :vartype function: ~azure.ai.inference.models.FunctionCall
+    """
+
+    id: str = rest_field()
+    """The ID of the tool call. Required."""
+    type: Literal["function"] = rest_field()
+    """The type of tool call. Currently, only ``function`` is supported. Required. Default value is
+     \"function\"."""
+    function: "_models.FunctionCall" = rest_field()
+    """The details of the function call requested by the AI model. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        id: str,  # pylint: disable=redefined-builtin
+        function: "_models.FunctionCall",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type: Literal["function"] = "function"
+
+
+class ChatCompletionsToolDefinition(_model_base.Model):
+    """The definition of a chat completions tool that can call a function.
+
+    :ivar type: The type of the tool. Currently, only ``function`` is supported. Required. Default
+     value is "function".
+    :vartype type: str
+    :ivar function: The function definition details for the function tool. Required.
+    :vartype function: ~azure.ai.inference.models.FunctionDefinition
+    """
+
+    type: Literal["function"] = rest_field()
+    """The type of the tool. Currently, only ``function`` is supported. Required. Default value is
+     \"function\"."""
+    function: "_models.FunctionDefinition" = rest_field()
+    """The function definition details for the function tool. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        function: "_models.FunctionDefinition",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.type: Literal["function"] = "function"
+
+
+class ChatRequestMessage(_model_base.Model):
+    """An abstract representation of a chat message as provided in a request.
+
+    You probably want to use the sub-classes and not this class directly. Known sub-classes are:
+    ChatRequestAssistantMessage, ChatRequestDeveloperMessage, ChatRequestSystemMessage,
+    ChatRequestToolMessage, ChatRequestUserMessage
+
+    :ivar role: The chat role associated with this message. Required. Known values are: "system",
+     "user", "assistant", "tool", and "developer".
+    :vartype role: str or ~azure.ai.inference.models.ChatRole
+    """
+
+    __mapping__: Dict[str, _model_base.Model] = {}
+    role: str = rest_discriminator(name="role")
+    """The chat role associated with this message. Required. Known values are: \"system\", \"user\",
+     \"assistant\", \"tool\", and \"developer\"."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        role: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ChatRequestAssistantMessage(ChatRequestMessage, discriminator="assistant"):
+    """A request chat message representing response or action from the assistant.
+
+    :ivar role: The chat role associated with this message, which is always 'assistant' for
+     assistant messages. Required. The role that provides responses to system-instructed,
+     user-prompted input.
+    :vartype role: str or ~azure.ai.inference.models.ASSISTANT
+    :ivar content: The content of the message.
+    :vartype content: str
+    :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to
+     subsequent input messages for the chat
+     completions request to resolve as configured.
+    :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall]
+    """
+
+    role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'assistant' for assistant messages.
+     Required. The role that provides responses to system-instructed, user-prompted input."""
+    content: Optional[str] = rest_field()
+    """The content of the message."""
+    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field()
+    """The tool calls that must be resolved and have their outputs appended to subsequent input
+     messages for the chat
+     completions request to resolve as configured."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        content: Optional[str] = None,
+        tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, role=ChatRole.ASSISTANT, **kwargs)
+
+
+class ChatRequestDeveloperMessage(ChatRequestMessage, discriminator="developer"):
+    """A request chat message containing system instructions that influence how the model will
+    generate a chat completions
+    response. Some AI models support a developer message instead of a system message.
+
+    :ivar role: The chat role associated with this message, which is always 'developer' for
+     developer messages. Required. The role that instructs or sets the behavior of the assistant.
+     Some AI models support this role instead of the 'system' role.
+    :vartype role: str or ~azure.ai.inference.models.DEVELOPER
+    :ivar content: The contents of the developer message. Required.
+    :vartype content: str
+    """
+
+    role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'developer' for developer messages.
+     Required. The role that instructs or sets the behavior of the assistant. Some AI models support
+     this role instead of the 'system' role."""
+    content: str = rest_field()
+    """The contents of the developer message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        content: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, role=ChatRole.DEVELOPER, **kwargs)
+
+
+class ChatRequestSystemMessage(ChatRequestMessage, discriminator="system"):
+    """A request chat message containing system instructions that influence how the model will
+    generate a chat completions
+    response.
+
+    :ivar role: The chat role associated with this message, which is always 'system' for system
+     messages. Required. The role that instructs or sets the behavior of the assistant.
+    :vartype role: str or ~azure.ai.inference.models.SYSTEM
+    :ivar content: The contents of the system message. Required.
+    :vartype content: str
+    """
+
+    role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'system' for system messages.
+     Required. The role that instructs or sets the behavior of the assistant."""
+    content: str = rest_field()
+    """The contents of the system message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        content: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, role=ChatRole.SYSTEM, **kwargs)
+
+
+class ChatRequestToolMessage(ChatRequestMessage, discriminator="tool"):
+    """A request chat message representing requested output from a configured tool.
+
+    :ivar role: The chat role associated with this message, which is always 'tool' for tool
+     messages. Required. The role that represents extension tool activity within a chat completions
+     operation.
+    :vartype role: str or ~azure.ai.inference.models.TOOL
+    :ivar content: The content of the message.
+    :vartype content: str
+    :ivar tool_call_id: The ID of the tool call resolved by the provided content. Required.
+    :vartype tool_call_id: str
+    """
+
+    role: Literal[ChatRole.TOOL] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'tool' for tool messages. Required.
+     The role that represents extension tool activity within a chat completions operation."""
+    content: Optional[str] = rest_field()
+    """The content of the message."""
+    tool_call_id: str = rest_field()
+    """The ID of the tool call resolved by the provided content. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        tool_call_id: str,
+        content: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, role=ChatRole.TOOL, **kwargs)
+
+
+class ChatRequestUserMessage(ChatRequestMessage, discriminator="user"):
+    """A request chat message representing user input to the assistant.
+
+    :ivar role: The chat role associated with this message, which is always 'user' for user
+     messages. Required. The role that provides input for chat completions.
+    :vartype role: str or ~azure.ai.inference.models.USER
+    :ivar content: The contents of the user message, with available input types varying by selected
+     model. Required. Is either a str type or a [ContentItem] type.
+    :vartype content: str or list[~azure.ai.inference.models.ContentItem]
+    """
+
+    role: Literal[ChatRole.USER] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'user' for user messages. Required.
+     The role that provides input for chat completions."""
+    content: Union["str", List["_models.ContentItem"]] = rest_field()
+    """The contents of the user message, with available input types varying by selected model.
+     Required. Is either a str type or a [ContentItem] type."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        content: Union[str, List["_models.ContentItem"]],
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, role=ChatRole.USER, **kwargs)
+
+
+class ChatResponseMessage(_model_base.Model):
+    """A representation of a chat message as received in a response.
+
+
+    :ivar role: The chat role associated with the message. Required. Known values are: "system",
+     "user", "assistant", "tool", and "developer".
+    :vartype role: str or ~azure.ai.inference.models.ChatRole
+    :ivar content: The content of the message. Required.
+    :vartype content: str
+    :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to
+     subsequent input messages for the chat
+     completions request to resolve as configured.
+    :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall]
+    """
+
+    role: Union[str, "_models.ChatRole"] = rest_field()
+    """The chat role associated with the message. Required. Known values are: \"system\", \"user\",
+     \"assistant\", \"tool\", and \"developer\"."""
+    content: str = rest_field()
+    """The content of the message. Required."""
+    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field()
+    """The tool calls that must be resolved and have their outputs appended to subsequent input
+     messages for the chat
+     completions request to resolve as configured."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        role: Union[str, "_models.ChatRole"],
+        content: str,
+        tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class CompletionsUsage(_model_base.Model):
+    """Representation of the token counts processed for a completions request.
+    Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
+    other consumers.
+
+
+    :ivar completion_tokens: The number of tokens generated across all completions emissions.
+     Required.
+    :vartype completion_tokens: int
+    :ivar prompt_tokens: The number of tokens in the provided prompts for the completions request.
+     Required.
+    :vartype prompt_tokens: int
+    :ivar total_tokens: The total number of tokens processed for the completions request and
+     response. Required.
+    :vartype total_tokens: int
+    """
+
+    completion_tokens: int = rest_field()
+    """The number of tokens generated across all completions emissions. Required."""
+    prompt_tokens: int = rest_field()
+    """The number of tokens in the provided prompts for the completions request. Required."""
+    total_tokens: int = rest_field()
+    """The total number of tokens processed for the completions request and response. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        completion_tokens: int,
+        prompt_tokens: int,
+        total_tokens: int,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class EmbeddingItem(_model_base.Model):
+    """Representation of a single embeddings relatedness comparison.
+
+
+    :ivar embedding: List of embedding values for the input prompt. These represent a measurement
+     of the
+     vector-based relatedness of the provided input. Or a base64 encoded string of the embedding
+     vector. Required. Is either a str type or a [float] type.
+    :vartype embedding: str or list[float]
+    :ivar index: Index of the prompt to which the EmbeddingItem corresponds. Required.
+    :vartype index: int
+    """
+
+    embedding: Union["str", List[float]] = rest_field()
+    """List of embedding values for the input prompt. These represent a measurement of the
+     vector-based relatedness of the provided input. Or a base64 encoded string of the embedding
+     vector. Required. Is either a str type or a [float] type."""
+    index: int = rest_field()
+    """Index of the prompt to which the EmbeddingItem corresponds. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        embedding: Union[str, List[float]],
+        index: int,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class EmbeddingsResult(_model_base.Model):
+    """Representation of the response data from an embeddings request.
+    Embeddings measure the relatedness of text strings and are commonly used for search,
+    clustering,
+    recommendations, and other similar scenarios.
+
+
+    :ivar id: Unique identifier for the embeddings result. Required.
+    :vartype id: str
+    :ivar data: Embedding values for the prompts submitted in the request. Required.
+    :vartype data: list[~azure.ai.inference.models.EmbeddingItem]
+    :ivar usage: Usage counts for tokens input using the embeddings API. Required.
+    :vartype usage: ~azure.ai.inference.models.EmbeddingsUsage
+    :ivar model: The model ID used to generate this result. Required.
+    :vartype model: str
+    """
+
+    id: str = rest_field()
+    """Unique identifier for the embeddings result. Required."""
+    data: List["_models.EmbeddingItem"] = rest_field()
+    """Embedding values for the prompts submitted in the request. Required."""
+    usage: "_models.EmbeddingsUsage" = rest_field()
+    """Usage counts for tokens input using the embeddings API. Required."""
+    model: str = rest_field()
+    """The model ID used to generate this result. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        id: str,  # pylint: disable=redefined-builtin
+        data: List["_models.EmbeddingItem"],
+        usage: "_models.EmbeddingsUsage",
+        model: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class EmbeddingsUsage(_model_base.Model):
+    """Measurement of the amount of tokens used in this request and response.
+
+
+    :ivar prompt_tokens: Number of tokens in the request. Required.
+    :vartype prompt_tokens: int
+    :ivar total_tokens: Total number of tokens transacted in this request/response. Should equal
+     the
+     number of tokens in the request. Required.
+    :vartype total_tokens: int
+    """
+
+    prompt_tokens: int = rest_field()
+    """Number of tokens in the request. Required."""
+    total_tokens: int = rest_field()
+    """Total number of tokens transacted in this request/response. Should equal the
+     number of tokens in the request. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        prompt_tokens: int,
+        total_tokens: int,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class FunctionCall(_model_base.Model):
+    """The name and arguments of a function that should be called, as generated by the model.
+
+
+    :ivar name: The name of the function to call. Required.
+    :vartype name: str
+    :ivar arguments: The arguments to call the function with, as generated by the model in JSON
+     format.
+     Note that the model does not always generate valid JSON, and may hallucinate parameters
+     not defined by your function schema. Validate the arguments in your code before calling
+     your function. Required.
+    :vartype arguments: str
+    """
+
+    name: str = rest_field()
+    """The name of the function to call. Required."""
+    arguments: str = rest_field()
+    """The arguments to call the function with, as generated by the model in JSON format.
+     Note that the model does not always generate valid JSON, and may hallucinate parameters
+     not defined by your function schema. Validate the arguments in your code before calling
+     your function. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        name: str,
+        arguments: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class FunctionDefinition(_model_base.Model):
+    """The definition of a caller-specified function that chat completions may invoke in response to
+    matching user input.
+
+    :ivar name: The name of the function to be called. Required.
+    :vartype name: str
+    :ivar description: A description of what the function does. The model will use this description
+     when selecting the function and
+     interpreting its parameters.
+    :vartype description: str
+    :ivar parameters: The parameters the function accepts, described as a JSON Schema object.
+    :vartype parameters: any
+    """
+
+    name: str = rest_field()
+    """The name of the function to be called. Required."""
+    description: Optional[str] = rest_field()
+    """A description of what the function does. The model will use this description when selecting the
+     function and
+     interpreting its parameters."""
+    parameters: Optional[Any] = rest_field()
+    """The parameters the function accepts, described as a JSON Schema object."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        name: str,
+        description: Optional[str] = None,
+        parameters: Optional[Any] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ImageContentItem(ContentItem, discriminator="image_url"):
+    """A structured chat content item containing an image reference.
+
+    :ivar type: The discriminated object type: always 'image_url' for this type. Required. Default
+     value is "image_url".
+    :vartype type: str
+    :ivar image_url: An internet location, which must be accessible to the model,from which the
+     image may be retrieved. Required.
+    :vartype image_url: ~azure.ai.inference.models.ImageUrl
+    """
+
+    type: Literal["image_url"] = rest_discriminator(name="type")  # type: ignore
+    """The discriminated object type: always 'image_url' for this type. Required. Default value is
+     \"image_url\"."""
+    image_url: "_models.ImageUrl" = rest_field()
+    """An internet location, which must be accessible to the model,from which the image may be
+     retrieved. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        image_url: "_models.ImageUrl",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="image_url", **kwargs)
+
+
+class ImageEmbeddingInput(_model_base.Model):
+    """Represents an image with optional text.
+
+    :ivar image: The input image encoded in base64 string as a data URL. Example:
+     ``data:image/{format};base64,{data}``. Required.
+    :vartype image: str
+    :ivar text: Optional. The text input to feed into the model (like DINO, CLIP).
+     Returns a 422 error if the model doesn't support the value or parameter.
+    :vartype text: str
+    """
+
+    image: str = rest_field()
+    """The input image encoded in base64 string as a data URL. Example:
+     ``data:image/{format};base64,{data}``. Required."""
+    text: Optional[str] = rest_field()
+    """Optional. The text input to feed into the model (like DINO, CLIP).
+     Returns a 422 error if the model doesn't support the value or parameter."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        image: str,
+        text: Optional[str] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ImageUrl(_model_base.Model):
+    """An internet location from which the model may retrieve an image.
+
+    :ivar url: The URL of the image. Required.
+    :vartype url: str
+    :ivar detail: The evaluation quality setting to use, which controls relative prioritization of
+     speed, token consumption, and
+     accuracy. Known values are: "auto", "low", and "high".
+    :vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
+    """
+
+    url: str = rest_field()
+    """The URL of the image. Required."""
+    detail: Optional[Union[str, "_models.ImageDetailLevel"]] = rest_field()
+    """The evaluation quality setting to use, which controls relative prioritization of speed, token
+     consumption, and
+     accuracy. Known values are: \"auto\", \"low\", and \"high\"."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        url: str,
+        detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class InputAudio(_model_base.Model):
+    """The details of an audio chat message content part.
+
+    :ivar data: Base64 encoded audio data. Required.
+    :vartype data: str
+    :ivar format: The audio format of the audio content. Required. Known values are: "wav" and
+     "mp3".
+    :vartype format: str or ~azure.ai.inference.models.AudioContentFormat
+    """
+
+    data: str = rest_field()
+    """Base64 encoded audio data. Required."""
+    format: Union[str, "_models.AudioContentFormat"] = rest_field()
+    """The audio format of the audio content. Required. Known values are: \"wav\" and \"mp3\"."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        data: str,
+        format: Union[str, "_models.AudioContentFormat"],
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class JsonSchemaFormat(_model_base.Model):
+    """Defines the response format for chat completions as JSON with a given schema.
+    The AI model will need to adhere to this schema when generating completions.
+
+    :ivar name: A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores
+     and dashes, with a maximum length of 64. Required.
+    :vartype name: str
+    :ivar schema: The definition of the JSON schema. See
+     https://json-schema.org/overview/what-is-jsonschema.
+     Note that AI models usually only support a subset of the keywords defined by JSON schema.
+     Consult your AI model documentation to determine what is supported. Required.
+    :vartype schema: dict[str, any]
+    :ivar description: A description of the response format, used by the AI model to determine how
+     to generate responses in this format.
+    :vartype description: str
+    :ivar strict: If set to true, the service will error out if the provided JSON schema contains
+     keywords
+     not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type
+     ``string``.
+     If false, and the provided JSON schema contains keywords not supported by the AI model,
+     the AI model will not error out. Instead it will ignore the unsupported keywords.
+    :vartype strict: bool
+    """
+
+    name: str = rest_field()
+    """A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+     with a maximum length of 64. Required."""
+    schema: Dict[str, Any] = rest_field()
+    """The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
+     Note that AI models usually only support a subset of the keywords defined by JSON schema.
+     Consult your AI model documentation to determine what is supported. Required."""
+    description: Optional[str] = rest_field()
+    """A description of the response format, used by the AI model to determine how to generate
+     responses in this format."""
+    strict: Optional[bool] = rest_field()
+    """If set to true, the service will error out if the provided JSON schema contains keywords
+     not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type
+     ``string``.
+     If false, and the provided JSON schema contains keywords not supported by the AI model,
+     the AI model will not error out. Instead it will ignore the unsupported keywords."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        name: str,
+        schema: Dict[str, Any],
+        description: Optional[str] = None,
+        strict: Optional[bool] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class ModelInfo(_model_base.Model):
+    """Represents some basic information about the AI model.
+
+
+    :ivar model_name: The name of the AI model. For example: ``Phi21``. Required.
+    :vartype model_name: str
+    :ivar model_type: The type of the AI model. A Unique identifier for the profile. Required.
+     Known values are: "embeddings", "image_generation", "text_generation", "image_embeddings",
+     "audio_generation", and "chat_completion".
+    :vartype model_type: str or ~azure.ai.inference.models.ModelType
+    :ivar model_provider_name: The model provider name. For example: ``Microsoft Research``.
+     Required.
+    :vartype model_provider_name: str
+    """
+
+    model_name: str = rest_field()
+    """The name of the AI model. For example: ``Phi21``. Required."""
+    model_type: Union[str, "_models.ModelType"] = rest_field()
+    """The type of the AI model. A Unique identifier for the profile. Required. Known values are:
+     \"embeddings\", \"image_generation\", \"text_generation\", \"image_embeddings\",
+     \"audio_generation\", and \"chat_completion\"."""
+    model_provider_name: str = rest_field()
+    """The model provider name. For example: ``Microsoft Research``. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        model_name: str,
+        model_type: Union[str, "_models.ModelType"],
+        model_provider_name: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class StreamingChatChoiceUpdate(_model_base.Model):
+    """Represents an update to a single prompt completion when the service is streaming updates
+    using Server Sent Events (SSE).
+    Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+    Token limits and other settings may limit the number of choices generated.
+
+
+    :ivar index: The ordered index associated with this chat completions choice. Required.
+    :vartype index: int
+    :ivar finish_reason: The reason that this chat completions choice completed its generated.
+     Required. Known values are: "stop", "length", "content_filter", and "tool_calls".
+    :vartype finish_reason: str or ~azure.ai.inference.models.CompletionsFinishReason
+    :ivar delta: An update to the chat message for a given chat completions prompt. Required.
+    :vartype delta: ~azure.ai.inference.models.StreamingChatResponseMessageUpdate
+    """
+
+    index: int = rest_field()
+    """The ordered index associated with this chat completions choice. Required."""
+    finish_reason: Union[str, "_models.CompletionsFinishReason"] = rest_field()
+    """The reason that this chat completions choice completed its generated. Required. Known values
+     are: \"stop\", \"length\", \"content_filter\", and \"tool_calls\"."""
+    delta: "_models.StreamingChatResponseMessageUpdate" = rest_field()
+    """An update to the chat message for a given chat completions prompt. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        index: int,
+        finish_reason: Union[str, "_models.CompletionsFinishReason"],
+        delta: "_models.StreamingChatResponseMessageUpdate",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class StreamingChatCompletionsUpdate(_model_base.Model):
+    """Represents a response update to a chat completions request, when the service is streaming
+    updates
+    using Server Sent Events (SSE).
+    Completions support a wide variety of tasks and generate text that continues from or
+    "completes"
+    provided prompt data.
+
+
+    :ivar id: A unique identifier associated with this chat completions response. Required.
+    :vartype id: str
+    :ivar created: The first timestamp associated with generation activity for this completions
+     response,
+     represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.
+    :vartype created: ~datetime.datetime
+    :ivar model: The model used for the chat completion. Required.
+    :vartype model: str
+    :ivar choices: An update to the collection of completion choices associated with this
+     completions response.
+     Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+     Token limits and other settings may limit the number of choices generated. Required.
+    :vartype choices: list[~azure.ai.inference.models.StreamingChatChoiceUpdate]
+    :ivar usage: Usage information for tokens processed and generated as part of this completions
+     operation.
+    :vartype usage: ~azure.ai.inference.models.CompletionsUsage
+    """
+
+    id: str = rest_field()
+    """A unique identifier associated with this chat completions response. Required."""
+    created: datetime.datetime = rest_field(format="unix-timestamp")
+    """The first timestamp associated with generation activity for this completions response,
+     represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required."""
+    model: str = rest_field()
+    """The model used for the chat completion. Required."""
+    choices: List["_models.StreamingChatChoiceUpdate"] = rest_field()
+    """An update to the collection of completion choices associated with this completions response.
+     Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+     Token limits and other settings may limit the number of choices generated. Required."""
+    usage: Optional["_models.CompletionsUsage"] = rest_field()
+    """Usage information for tokens processed and generated as part of this completions operation."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        id: str,  # pylint: disable=redefined-builtin
+        created: datetime.datetime,
+        model: str,
+        choices: List["_models.StreamingChatChoiceUpdate"],
+        usage: Optional["_models.CompletionsUsage"] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class StreamingChatResponseMessageUpdate(_model_base.Model):
+    """A representation of a chat message update as received in a streaming response.
+
+    :ivar role: The chat role associated with the message. If present, should always be
+     'assistant'. Known values are: "system", "user", "assistant", "tool", and "developer".
+    :vartype role: str or ~azure.ai.inference.models.ChatRole
+    :ivar content: The content of the message.
+    :vartype content: str
+    :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to
+     subsequent input messages for the chat
+     completions request to resolve as configured.
+    :vartype tool_calls: list[~azure.ai.inference.models.StreamingChatResponseToolCallUpdate]
+    """
+
+    role: Optional[Union[str, "_models.ChatRole"]] = rest_field()
+    """The chat role associated with the message. If present, should always be 'assistant'. Known
+     values are: \"system\", \"user\", \"assistant\", \"tool\", and \"developer\"."""
+    content: Optional[str] = rest_field()
+    """The content of the message."""
+    tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = rest_field()
+    """The tool calls that must be resolved and have their outputs appended to subsequent input
+     messages for the chat
+     completions request to resolve as configured."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        role: Optional[Union[str, "_models.ChatRole"]] = None,
+        content: Optional[str] = None,
+        tool_calls: Optional[List["_models.StreamingChatResponseToolCallUpdate"]] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class StreamingChatResponseToolCallUpdate(_model_base.Model):
+    """An update to the function tool call information requested by the AI model.
+
+
+    :ivar id: The ID of the tool call. Required.
+    :vartype id: str
+    :ivar function: Updates to the function call requested by the AI model. Required.
+    :vartype function: ~azure.ai.inference.models.FunctionCall
+    """
+
+    id: str = rest_field()
+    """The ID of the tool call. Required."""
+    function: "_models.FunctionCall" = rest_field()
+    """Updates to the function call requested by the AI model. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        id: str,  # pylint: disable=redefined-builtin
+        function: "_models.FunctionCall",
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+
+
+class TextContentItem(ContentItem, discriminator="text"):
+    """A structured chat content item containing plain text.
+
+    :ivar type: The discriminated object type: always 'text' for this type. Required. Default value
+     is "text".
+    :vartype type: str
+    :ivar text: The content of the message. Required.
+    :vartype text: str
+    """
+
+    type: Literal["text"] = rest_discriminator(name="type")  # type: ignore
+    """The discriminated object type: always 'text' for this type. Required. Default value is
+     \"text\"."""
+    text: str = rest_field()
+    """The content of the message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        *,
+        text: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, type="text", **kwargs)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_patch.py
new file mode 100644
index 00000000..1bc06799
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/models/_patch.py
@@ -0,0 +1,576 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+import base64
+import json
+import logging
+import queue
+import re
+import sys
+
+from typing import Mapping, Literal, Any, List, AsyncIterator, Iterator, Optional, Union, overload
+from azure.core.rest import HttpResponse, AsyncHttpResponse
+from ._enums import ChatRole
+from .._model_base import rest_discriminator, rest_field
+from ._models import ChatRequestMessage
+from ._models import ImageUrl as ImageUrlGenerated
+from ._models import ChatCompletions as ChatCompletionsGenerated
+from ._models import EmbeddingsResult as EmbeddingsResultGenerated
+from ._models import ImageEmbeddingInput as EmbeddingInputGenerated
+from ._models import InputAudio as InputAudioGenerated
+from .. import models as _models
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+logger = logging.getLogger(__name__)
+
+
+class UserMessage(ChatRequestMessage, discriminator="user"):
+    """A request chat message representing user input to the assistant.
+
+    :ivar role: The chat role associated with this message, which is always 'user' for user
+     messages. Required. The role that provides input for chat completions.
+    :vartype role: str or ~azure.ai.inference.models.USER
+    :ivar content: The contents of the user message, with available input types varying by selected
+     model. Required. Is either a str type or a [ContentItem] type.
+    :vartype content: str or list[~azure.ai.inference.models.ContentItem]
+    """
+
+    role: Literal[ChatRole.USER] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'user' for user messages. Required.
+     The role that provides input for chat completions."""
+    content: Union["str", List["_models.ContentItem"]] = rest_field()
+    """The contents of the user message, with available input types varying by selected model.
+     Required. Is either a str type or a [ContentItem] type."""
+
+    @overload
+    def __init__(
+        self,
+        content: Union[str, List["_models.ContentItem"]],
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        if len(args) == 1 and isinstance(args[0], (List, str)):
+            if kwargs.get("content") is not None:
+                raise ValueError("content cannot be provided as positional and keyword arguments")
+            kwargs["content"] = args[0]
+            args = tuple()
+        super().__init__(*args, role=ChatRole.USER, **kwargs)
+
+
+class SystemMessage(ChatRequestMessage, discriminator="system"):
+    """A request chat message containing system instructions that influence how the model will
+    generate a chat completions response.
+
+    :ivar role: The chat role associated with this message, which is always 'system' for system
+     messages. Required.
+    :vartype role: str or ~azure.ai.inference.models.SYSTEM
+    :ivar content: The contents of the system message. Required.
+    :vartype content: str
+    """
+
+    role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'system' for system messages.
+     Required."""
+    content: str = rest_field()
+    """The contents of the system message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        content: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        if len(args) == 1 and isinstance(args[0], str):
+            if kwargs.get("content") is not None:
+                raise ValueError("content cannot be provided as positional and keyword arguments")
+            kwargs["content"] = args[0]
+            args = tuple()
+        super().__init__(*args, role=ChatRole.SYSTEM, **kwargs)
+
+
+class DeveloperMessage(ChatRequestMessage, discriminator="developer"):
+    """A request chat message containing developer instructions that influence how the model will
+    generate a chat completions response. Some AI models support developer messages instead
+    of system messages.
+
+    :ivar role: The chat role associated with this message, which is always 'developer' for developer
+     messages. Required.
+    :vartype role: str or ~azure.ai.inference.models.DEVELOPER
+    :ivar content: The contents of the developer message. Required.
+    :vartype content: str
+    """
+
+    role: Literal[ChatRole.DEVELOPER] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'developer' for developer messages.
+     Required."""
+    content: str = rest_field()
+    """The contents of the developer message. Required."""
+
+    @overload
+    def __init__(
+        self,
+        content: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        if len(args) == 1 and isinstance(args[0], str):
+            if kwargs.get("content") is not None:
+                raise ValueError("content cannot be provided as positional and keyword arguments")
+            kwargs["content"] = args[0]
+            args = tuple()
+        super().__init__(*args, role=ChatRole.DEVELOPER, **kwargs)
+
+
+class AssistantMessage(ChatRequestMessage, discriminator="assistant"):
+    """A request chat message representing response or action from the assistant.
+
+    :ivar role: The chat role associated with this message, which is always 'assistant' for
+     assistant messages. Required. The role that provides responses to system-instructed,
+     user-prompted input.
+    :vartype role: str or ~azure.ai.inference.models.ASSISTANT
+    :ivar content: The content of the message.
+    :vartype content: str
+    :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to
+     subsequent input messages for the chat
+     completions request to resolve as configured.
+    :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall]
+    """
+
+    role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'assistant' for assistant messages.
+     Required. The role that provides responses to system-instructed, user-prompted input."""
+    content: Optional[str] = rest_field()
+    """The content of the message."""
+    tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field()
+    """The tool calls that must be resolved and have their outputs appended to subsequent input
+     messages for the chat
+     completions request to resolve as configured."""
+
+    @overload
+    def __init__(
+        self,
+        content: Optional[str] = None,
+        *,
+        tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        if len(args) == 1 and isinstance(args[0], str):
+            if kwargs.get("content") is not None:
+                raise ValueError("content cannot be provided as positional and keyword arguments")
+            kwargs["content"] = args[0]
+            args = tuple()
+        super().__init__(*args, role=ChatRole.ASSISTANT, **kwargs)
+
+
+class ToolMessage(ChatRequestMessage, discriminator="tool"):
+    """A request chat message representing requested output from a configured tool.
+
+    :ivar role: The chat role associated with this message, which is always 'tool' for tool
+     messages. Required. The role that represents extension tool activity within a chat completions
+     operation.
+    :vartype role: str or ~azure.ai.inference.models.TOOL
+    :ivar content: The content of the message.
+    :vartype content: str
+    :ivar tool_call_id: The ID of the tool call resolved by the provided content. Required.
+    :vartype tool_call_id: str
+    """
+
+    role: Literal[ChatRole.TOOL] = rest_discriminator(name="role")  # type: ignore
+    """The chat role associated with this message, which is always 'tool' for tool messages. Required.
+     The role that represents extension tool activity within a chat completions operation."""
+    content: Optional[str] = rest_field()
+    """The content of the message."""
+    tool_call_id: str = rest_field()
+    """The ID of the tool call resolved by the provided content. Required."""
+
+    @overload
+    def __init__(
+        self,
+        content: Optional[str] = None,
+        *,
+        tool_call_id: str,
+    ) -> None: ...
+
+    @overload
+    def __init__(self, mapping: Mapping[str, Any]) -> None:
+        """
+        :param mapping: raw JSON to initialize the model.
+        :type mapping: Mapping[str, Any]
+        """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        if len(args) == 1 and isinstance(args[0], str):
+            if kwargs.get("content") is not None:
+                raise ValueError("content cannot be provided as positional and keyword arguments")
+            kwargs["content"] = args[0]
+            args = tuple()
+        super().__init__(*args, role=ChatRole.TOOL, **kwargs)
+
+
+class ChatCompletions(ChatCompletionsGenerated):
+    """Representation of the response data from a chat completions request.
+    Completions support a wide variety of tasks and generate text that continues from or
+    "completes"
+    provided prompt data.
+
+
+    :ivar id: A unique identifier associated with this chat completions response. Required.
+    :vartype id: str
+    :ivar created: The first timestamp associated with generation activity for this completions
+     response,
+     represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.
+    :vartype created: ~datetime.datetime
+    :ivar model: The model used for the chat completion. Required.
+    :vartype model: str
+    :ivar usage: Usage information for tokens processed and generated as part of this completions
+     operation. Required.
+    :vartype usage: ~azure.ai.inference.models.CompletionsUsage
+    :ivar choices: The collection of completions choices associated with this completions response.
+     Generally, ``n`` choices are generated per provided prompt with a default value of 1.
+     Token limits and other settings may limit the number of choices generated. Required.
+    :vartype choices: list[~azure.ai.inference.models.ChatChoice]
+    """
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return json.dumps(self.as_dict(), indent=2)
+
+
+class EmbeddingsResult(EmbeddingsResultGenerated):
+    """Representation of the response data from an embeddings request.
+    Embeddings measure the relatedness of text strings and are commonly used for search,
+    clustering,
+    recommendations, and other similar scenarios.
+
+
+    :ivar data: Embedding values for the prompts submitted in the request. Required.
+    :vartype data: list[~azure.ai.inference.models.EmbeddingItem]
+    :ivar usage: Usage counts for tokens input using the embeddings API. Required.
+    :vartype usage: ~azure.ai.inference.models.EmbeddingsUsage
+    :ivar model: The model ID used to generate this result. Required.
+    :vartype model: str
+    """
+
+    def __str__(self) -> str:
+        # pylint: disable=client-method-name-no-double-underscore
+        return json.dumps(self.as_dict(), indent=2)
+
+
+class ImageUrl(ImageUrlGenerated):
+
+    @classmethod
+    def load(
+        cls, *, image_file: str, image_format: str, detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None
+    ) -> Self:
+        """
+        Create an ImageUrl object from a local image file. The method reads the image
+        file and encodes it as a base64 string, which together with the image format
+        is then used to format the JSON `url` value passed in the request payload.
+
+        :keyword image_file: The name of the local image file to load. Required.
+        :paramtype image_file: str
+        :keyword image_format: The MIME type format of the image. For example: "jpeg", "png". Required.
+        :paramtype image_format: str
+        :keyword detail: The evaluation quality setting to use, which controls relative prioritization of
+         speed, token consumption, and accuracy. Known values are: "auto", "low", and "high".
+        :paramtype detail: str or ~azure.ai.inference.models.ImageDetailLevel
+        :return: An ImageUrl object with the image data encoded as a base64 string.
+        :rtype: ~azure.ai.inference.models.ImageUrl
+        :raises FileNotFoundError: when the image file could not be opened.
+        """
+        with open(image_file, "rb") as f:
+            image_data = base64.b64encode(f.read()).decode("utf-8")
+        url = f"data:image/{image_format};base64,{image_data}"
+        return cls(url=url, detail=detail)
+
+
+class ImageEmbeddingInput(EmbeddingInputGenerated):
+
+    @classmethod
+    def load(cls, *, image_file: str, image_format: str, text: Optional[str] = None) -> Self:
+        """
+        Create an ImageEmbeddingInput object from a local image file. The method reads the image
+        file and encodes it as a base64 string, which together with the image format
+        is then used to format the JSON `url` value passed in the request payload.
+
+        :keyword image_file: The name of the local image file to load. Required.
+        :paramtype image_file: str
+        :keyword image_format: The MIME type format of the image. For example: "jpeg", "png". Required.
+        :paramtype image_format: str
+        :keyword text: Optional. The text input to feed into the model (like DINO, CLIP).
+         Returns a 422 error if the model doesn't support the value or parameter.
+        :paramtype text: str
+        :return: An ImageEmbeddingInput object with the image data encoded as a base64 string.
+        :rtype: ~azure.ai.inference.models.EmbeddingsInput
+        :raises FileNotFoundError: when the image file could not be opened.
+        """
+        with open(image_file, "rb") as f:
+            image_data = base64.b64encode(f.read()).decode("utf-8")
+        image_uri = f"data:image/{image_format};base64,{image_data}"
+        return cls(image=image_uri, text=text)
+
+
+class BaseStreamingChatCompletions:
+    """A base class for the sync and async streaming chat completions responses, holding any common code
+    to deserializes the Server Sent Events (SSE) response stream into chat completions updates, each one
+    represented by a StreamingChatCompletionsUpdate object.
+    """
+
+    # Enable detailed logs of SSE parsing. For development only, should be `False` by default.
+    _ENABLE_CLASS_LOGS = False
+
+    # The prefix of each line in the SSE stream that contains a JSON string
+    # to deserialize into a StreamingChatCompletionsUpdate object
+    _SSE_DATA_EVENT_PREFIX = b"data: "
+
+    # The line indicating the end of the SSE stream
+    _SSE_DATA_EVENT_DONE = b"data: [DONE]"
+
+    def __init__(self):
+        self._queue: "queue.Queue[_models.StreamingChatCompletionsUpdate]" = queue.Queue()
+        self._incomplete_line = b""
+        self._done = False  # Will be set to True when reading 'data: [DONE]' line
+
+    # See https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
+    def _deserialize_and_add_to_queue(self, element: bytes) -> bool:
+
+        if self._ENABLE_CLASS_LOGS:
+            logger.debug("[Original element] %s", repr(element))
+
+        # Clear the queue of StreamingChatCompletionsUpdate before processing the next block
+        self._queue.queue.clear()
+
+        # Split the single input bytes object at new line characters, and get a list of bytes objects, each
+        # representing a single "line". The bytes object at the end of the list may be a partial "line" that
+        # does not contain a new line character at the end.
+        # Note 1: DO NOT try to use something like this here:
+        #   line_list: List[str] = re.split(r"(?<=\n)", element.decode("utf-8"))
+        #   to do full UTF8 decoding of the whole input bytes object, as the last line in the list may be partial, and
+        #   as such may contain a partial UTF8 Chinese character (for example). `decode("utf-8")` will raise an
+        #   exception for such a case. See GitHub issue https://github.com/Azure/azure-sdk-for-python/issues/39565
+        # Note 2: Consider future re-write and simplifications of this code by using:
+        #   `codecs.getincrementaldecoder("utf-8")`
+        line_list: List[bytes] = re.split(re.compile(b"(?<=\n)"), element)
+        for index, line in enumerate(line_list):
+
+            if self._ENABLE_CLASS_LOGS:
+                logger.debug("[Original line] %s", repr(line))
+
+            if index == 0:
+                line = self._incomplete_line + line
+                self._incomplete_line = b""
+
+            if index == len(line_list) - 1 and not line.endswith(b"\n"):
+                self._incomplete_line = line
+                return False
+
+            if self._ENABLE_CLASS_LOGS:
+                logger.debug("[Modified line] %s", repr(line))
+
+            if line == b"\n":  # Empty line, indicating flush output to client
+                continue
+
+            if not line.startswith(self._SSE_DATA_EVENT_PREFIX):
+                raise ValueError(f"SSE event not supported (line `{repr(line)}`)")
+
+            if line.startswith(self._SSE_DATA_EVENT_DONE):
+                if self._ENABLE_CLASS_LOGS:
+                    logger.debug("[Done]")
+                return True
+
+            # If you reached here, the line should contain `data: {...}\n`
+            # where the curly braces contain a valid JSON object.
+            # It is now safe to do UTF8 decoding of the line.
+            line_str = line.decode("utf-8")
+
+            # Deserialize it into a StreamingChatCompletionsUpdate object
+            # and add it to the queue.
+            # pylint: disable=W0212 # Access to a protected member _deserialize of a client class
+            update = _models.StreamingChatCompletionsUpdate._deserialize(
+                json.loads(line_str[len(self._SSE_DATA_EVENT_PREFIX) : -1]), []
+            )
+
+            # We skip any update that has a None or empty choices list, and does not have token usage info.
+            # (this is what OpenAI Python SDK does)
+            if update.choices or update.usage:
+                self._queue.put(update)
+
+            if self._ENABLE_CLASS_LOGS:
+                logger.debug("[Added to queue]")
+
+        return False
+
+
+class StreamingChatCompletions(BaseStreamingChatCompletions):
+    """Represents an interator over StreamingChatCompletionsUpdate objects. It can be used for either synchronous or
+    asynchronous iterations. The class deserializes the Server Sent Events (SSE) response stream
+    into chat completions updates, each one represented by a StreamingChatCompletionsUpdate object.
+    """
+
+    def __init__(self, response: HttpResponse):
+        super().__init__()
+        self._response = response
+        self._bytes_iterator: Iterator[bytes] = response.iter_bytes()
+
+    def __iter__(self) -> Any:
+        return self
+
+    def __next__(self) -> "_models.StreamingChatCompletionsUpdate":
+        while self._queue.empty() and not self._done:
+            self._done = self._read_next_block()
+        if self._queue.empty():
+            raise StopIteration
+        return self._queue.get()
+
+    def _read_next_block(self) -> bool:
+        if self._ENABLE_CLASS_LOGS:
+            logger.debug("[Reading next block]")
+        try:
+            element = self._bytes_iterator.__next__()
+        except StopIteration:
+            self.close()
+            return True
+        return self._deserialize_and_add_to_queue(element)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:  # type: ignore
+        self.close()
+
+    def close(self) -> None:
+        self._response.close()
+
+
+class AsyncStreamingChatCompletions(BaseStreamingChatCompletions):
+    """Represents an async interator over StreamingChatCompletionsUpdate objects.
+    It can be used for either synchronous or asynchronous iterations. The class
+    deserializes the Server Sent Events (SSE) response stream into chat
+    completions updates, each one represented by a StreamingChatCompletionsUpdate object.
+    """
+
+    def __init__(self, response: AsyncHttpResponse):
+        super().__init__()
+        self._response = response
+        self._bytes_iterator: AsyncIterator[bytes] = response.iter_bytes()
+
+    def __aiter__(self) -> Any:
+        return self
+
+    async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate":
+        while self._queue.empty() and not self._done:
+            self._done = await self._read_next_block_async()
+        if self._queue.empty():
+            raise StopAsyncIteration
+        return self._queue.get()
+
+    async def _read_next_block_async(self) -> bool:
+        if self._ENABLE_CLASS_LOGS:
+            logger.debug("[Reading next block]")
+        try:
+            element = await self._bytes_iterator.__anext__()
+        except StopAsyncIteration:
+            await self.aclose()
+            return True
+        return self._deserialize_and_add_to_queue(element)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:  # type: ignore
+        await self.aclose()
+
+    async def aclose(self) -> None:
+        await self._response.close()
+
+
+class InputAudio(InputAudioGenerated):
+
+    @classmethod
+    def load(
+        cls,
+        *,
+        audio_file: str,
+        audio_format: str,
+    ) -> Self:
+        """
+        Create an InputAudio object from a local audio file. The method reads the audio
+        file and encodes it as a base64 string, which together with the audio format
+        is then used to create the InputAudio object passed to the request payload.
+
+        :keyword audio_file: The name of the local audio file to load. Required.
+        :vartype audio_file: str
+        :keyword audio_format: The MIME type format of the audio. For example: "wav", "mp3". Required.
+        :vartype audio_format: str
+        :return: An InputAudio object with the audio data encoded as a base64 string.
+        :rtype: ~azure.ai.inference.models.InputAudio
+        :raises FileNotFoundError: when the image file could not be opened.
+        """
+        with open(audio_file, "rb") as f:
+            audio_data = base64.b64encode(f.read()).decode("utf-8")
+        return cls(data=audio_data, format=audio_format)
+
+
+__all__: List[str] = [
+    "AssistantMessage",
+    "AsyncStreamingChatCompletions",
+    "ChatCompletions",
+    "ChatRequestMessage",
+    "EmbeddingsResult",
+    "ImageEmbeddingInput",
+    "ImageUrl",
+    "InputAudio",
+    "StreamingChatCompletions",
+    "SystemMessage",
+    "ToolMessage",
+    "UserMessage",
+    "DeveloperMessage",
+]  # Add all objects you want publicly available to users at this package level
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/__init__.py
new file mode 100644
index 00000000..2e11b31c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/__init__.py
@@ -0,0 +1,8 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# pylint: disable=unused-import
+from ._patch import patch_sdk as _patch_sdk, PromptTemplate
+
+_patch_sdk()
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_core.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_core.py
new file mode 100644
index 00000000..ec670299
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_core.py
@@ -0,0 +1,312 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="assignment,attr-defined,index,arg-type"
+# pylint: disable=line-too-long,R,consider-iterating-dictionary,raise-missing-from,dangerous-default-value
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field, asdict
+from pathlib import Path
+from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Union
+from ._tracer import Tracer, to_dict
+from ._utils import load_json
+
+
+@dataclass
+class ToolCall:
+    id: str
+    name: str
+    arguments: str
+
+
+@dataclass
+class PropertySettings:
+    """PropertySettings class to define the properties of the model
+
+    Attributes
+    ----------
+    type : str
+        The type of the property
+    default : Any
+        The default value of the property
+    description : str
+        The description of the property
+    """
+
+    type: Literal["string", "number", "array", "object", "boolean"]
+    default: Union[str, int, float, List, Dict, bool, None] = field(default=None)
+    description: str = field(default="")
+
+
+@dataclass
+class ModelSettings:
+    """ModelSettings class to define the model of the prompty
+
+    Attributes
+    ----------
+    api : str
+        The api of the model
+    configuration : Dict
+        The configuration of the model
+    parameters : Dict
+        The parameters of the model
+    response : Dict
+        The response of the model
+    """
+
+    api: str = field(default="")
+    configuration: Dict = field(default_factory=dict)
+    parameters: Dict = field(default_factory=dict)
+    response: Dict = field(default_factory=dict)
+
+
+@dataclass
+class TemplateSettings:
+    """TemplateSettings class to define the template of the prompty
+
+    Attributes
+    ----------
+    type : str
+        The type of the template
+    parser : str
+        The parser of the template
+    """
+
+    type: str = field(default="mustache")
+    parser: str = field(default="")
+
+
+@dataclass
+class Prompty:
+    """Prompty class to define the prompty
+
+    Attributes
+    ----------
+    name : str
+        The name of the prompty
+    description : str
+        The description of the prompty
+    authors : List[str]
+        The authors of the prompty
+    tags : List[str]
+        The tags of the prompty
+    version : str
+        The version of the prompty
+    base : str
+        The base of the prompty
+    basePrompty : Prompty
+        The base prompty
+    model : ModelSettings
+        The model of the prompty
+    sample : Dict
+        The sample of the prompty
+    inputs : Dict[str, PropertySettings]
+        The inputs of the prompty
+    outputs : Dict[str, PropertySettings]
+        The outputs of the prompty
+    template : TemplateSettings
+        The template of the prompty
+    file : FilePath
+        The file of the prompty
+    content : Union[str, List[str], Dict]
+        The content of the prompty
+    """
+
+    # metadata
+    name: str = field(default="")
+    description: str = field(default="")
+    authors: List[str] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+    version: str = field(default="")
+    base: str = field(default="")
+    basePrompty: Union[Prompty, None] = field(default=None)
+    # model
+    model: ModelSettings = field(default_factory=ModelSettings)
+
+    # sample
+    sample: Dict = field(default_factory=dict)
+
+    # input / output
+    inputs: Dict[str, PropertySettings] = field(default_factory=dict)
+    outputs: Dict[str, PropertySettings] = field(default_factory=dict)
+
+    # template
+    template: TemplateSettings = field(default_factory=TemplateSettings)
+
+    file: Union[Path, str] = field(default="")
+    content: Union[str, List[str], Dict] = field(default="")
+
+    def to_safe_dict(self) -> Dict[str, Any]:
+        d = {}
+        if self.model:
+            d["model"] = asdict(self.model)
+            _mask_secrets(d, ["model", "configuration"])
+        if self.template:
+            d["template"] = asdict(self.template)
+        if self.inputs:
+            d["inputs"] = {k: asdict(v) for k, v in self.inputs.items()}
+        if self.outputs:
+            d["outputs"] = {k: asdict(v) for k, v in self.outputs.items()}
+        if self.file:
+            d["file"] = str(self.file.as_posix()) if isinstance(self.file, Path) else self.file
+        return d
+
+    @staticmethod
+    def hoist_base_prompty(top: Prompty, base: Prompty) -> Prompty:
+        top.name = base.name if top.name == "" else top.name
+        top.description = base.description if top.description == "" else top.description
+        top.authors = list(set(base.authors + top.authors))
+        top.tags = list(set(base.tags + top.tags))
+        top.version = base.version if top.version == "" else top.version
+
+        top.model.api = base.model.api if top.model.api == "" else top.model.api
+        top.model.configuration = param_hoisting(top.model.configuration, base.model.configuration)
+        top.model.parameters = param_hoisting(top.model.parameters, base.model.parameters)
+        top.model.response = param_hoisting(top.model.response, base.model.response)
+
+        top.sample = param_hoisting(top.sample, base.sample)
+
+        top.basePrompty = base
+
+        return top
+
+    @staticmethod
+    def _process_file(file: str, parent: Path) -> Any:
+        file_path = Path(parent / Path(file)).resolve().absolute()
+        if file_path.exists():
+            items = load_json(file_path)
+            if isinstance(items, list):
+                return [Prompty.normalize(value, parent) for value in items]
+            elif isinstance(items, Dict):
+                return {key: Prompty.normalize(value, parent) for key, value in items.items()}
+            else:
+                return items
+        else:
+            raise FileNotFoundError(f"File {file} not found")
+
+    @staticmethod
+    def _process_env(variable: str, env_error=True, default: Union[str, None] = None) -> Any:
+        if variable in os.environ.keys():
+            return os.environ[variable]
+        else:
+            if default:
+                return default
+            if env_error:
+                raise ValueError(f"Variable {variable} not found in environment")
+
+            return ""
+
+    @staticmethod
+    def normalize(attribute: Any, parent: Path, env_error=True) -> Any:
+        if isinstance(attribute, str):
+            attribute = attribute.strip()
+            if attribute.startswith("${") and attribute.endswith("}"):
+                # check if env or file
+                variable = attribute[2:-1].split(":")
+                if variable[0] == "env" and len(variable) > 1:
+                    return Prompty._process_env(
+                        variable[1],
+                        env_error,
+                        variable[2] if len(variable) > 2 else None,
+                    )
+                elif variable[0] == "file" and len(variable) > 1:
+                    return Prompty._process_file(variable[1], parent)
+                else:
+                    raise ValueError(f"Invalid attribute format ({attribute})")
+            else:
+                return attribute
+        elif isinstance(attribute, list):
+            return [Prompty.normalize(value, parent) for value in attribute]
+        elif isinstance(attribute, Dict):
+            return {key: Prompty.normalize(value, parent) for key, value in attribute.items()}
+        else:
+            return attribute
+
+
+def param_hoisting(top: Dict[str, Any], bottom: Dict[str, Any], top_key: Union[str, None] = None) -> Dict[str, Any]:
+    if top_key:
+        new_dict = {**top[top_key]} if top_key in top else {}
+    else:
+        new_dict = {**top}
+    for key, value in bottom.items():
+        if not key in new_dict:
+            new_dict[key] = value
+    return new_dict
+
+
+class PromptyStream(Iterator):
+    """PromptyStream class to iterate over LLM stream.
+    Necessary for Prompty to handle streaming data when tracing."""
+
+    def __init__(self, name: str, iterator: Iterator):
+        self.name = name
+        self.iterator = iterator
+        self.items: List[Any] = []
+        self.__name__ = "PromptyStream"
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            # enumerate but add to list
+            o = self.iterator.__next__()
+            self.items.append(o)
+            return o
+
+        except StopIteration:
+            # StopIteration is raised
+            # contents are exhausted
+            if len(self.items) > 0:
+                with Tracer.start("PromptyStream") as trace:
+                    trace("signature", f"{self.name}.PromptyStream")
+                    trace("inputs", "None")
+                    trace("result", [to_dict(s) for s in self.items])
+
+            raise StopIteration
+
+
+class AsyncPromptyStream(AsyncIterator):
+    """AsyncPromptyStream class to iterate over LLM stream.
+    Necessary for Prompty to handle streaming data when tracing."""
+
+    def __init__(self, name: str, iterator: AsyncIterator):
+        self.name = name
+        self.iterator = iterator
+        self.items: List[Any] = []
+        self.__name__ = "AsyncPromptyStream"
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        try:
+            # enumerate but add to list
+            o = await self.iterator.__anext__()
+            self.items.append(o)
+            return o
+
+        except StopAsyncIteration:
+            # StopIteration is raised
+            # contents are exhausted
+            if len(self.items) > 0:
+                with Tracer.start("AsyncPromptyStream") as trace:
+                    trace("signature", f"{self.name}.AsyncPromptyStream")
+                    trace("inputs", "None")
+                    trace("result", [to_dict(s) for s in self.items])
+
+            raise StopAsyncIteration
+
+
+def _mask_secrets(d: Dict[str, Any], path: list[str], patterns: list[str] = ["key", "secret"]) -> bool:
+    sub_d = d
+    for key in path:
+        if key not in sub_d:
+            return False
+        sub_d = sub_d[key]
+
+    for k, v in sub_d.items():
+        if any([pattern in k.lower() for pattern in patterns]):
+            sub_d[k] = "*" * len(v)
+    return True
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_invoker.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_invoker.py
new file mode 100644
index 00000000..d682662e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_invoker.py
@@ -0,0 +1,295 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="return-value,operator"
+# pylint: disable=line-too-long,R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,unnecessary-pass
+import abc
+from typing import Any, Callable, Dict, Literal
+from ._tracer import trace
+from ._core import Prompty
+
+
+class Invoker(abc.ABC):
+    """Abstract class for Invoker
+
+    Attributes
+    ----------
+    prompty : Prompty
+        The prompty object
+    name : str
+        The name of the invoker
+
+    """
+
+    def __init__(self, prompty: Prompty) -> None:
+        self.prompty = prompty
+        self.name = self.__class__.__name__
+
+    @abc.abstractmethod
+    def invoke(self, data: Any) -> Any:
+        """Abstract method to invoke the invoker
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        pass
+
+    @abc.abstractmethod
+    async def invoke_async(self, data: Any) -> Any:
+        """Abstract method to invoke the invoker asynchronously
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        pass
+
+    @trace
+    def run(self, data: Any) -> Any:
+        """Method to run the invoker
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        return self.invoke(data)
+
+    @trace
+    async def run_async(self, data: Any) -> Any:
+        """Method to run the invoker asynchronously
+
+        Parameters
+        ----------
+        data : Any
+            The data to be invoked
+
+        Returns
+        -------
+        Any
+            The invoked
+        """
+        return await self.invoke_async(data)
+
+
+class InvokerFactory:
+    """Factory class for Invoker"""
+
+    _renderers: Dict[str, Invoker] = {}
+    _parsers: Dict[str, Invoker] = {}
+    _executors: Dict[str, Invoker] = {}
+    _processors: Dict[str, Invoker] = {}
+
+    @classmethod
+    def add_renderer(cls, name: str, invoker: Invoker) -> None:
+        cls._renderers[name] = invoker
+
+    @classmethod
+    def add_parser(cls, name: str, invoker: Invoker) -> None:
+        cls._parsers[name] = invoker
+
+    @classmethod
+    def add_executor(cls, name: str, invoker: Invoker) -> None:
+        cls._executors[name] = invoker
+
+    @classmethod
+    def add_processor(cls, name: str, invoker: Invoker) -> None:
+        cls._processors[name] = invoker
+
+    @classmethod
+    def register_renderer(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._renderers[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_parser(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._parsers[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_executor(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._executors[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def register_processor(cls, name: str) -> Callable:
+        def inner_wrapper(wrapped_class: Invoker) -> Callable:
+            cls._processors[name] = wrapped_class
+            return wrapped_class  # type: ignore
+
+        return inner_wrapper
+
+    @classmethod
+    def _get_name(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+    ) -> str:
+        if type == "renderer":
+            return prompty.template.type
+        elif type == "parser":
+            return f"{prompty.template.parser}.{prompty.model.api}"
+        elif type == "executor":
+            return prompty.model.configuration["type"]
+        elif type == "processor":
+            return prompty.model.configuration["type"]
+        else:
+            raise ValueError(f"Type {type} not found")
+
+    @classmethod
+    def _get_invoker(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+    ) -> Invoker:
+        if type == "renderer":
+            name = prompty.template.type
+            if name not in cls._renderers:
+                raise ValueError(f"Renderer {name} not found")
+
+            return cls._renderers[name](prompty)  # type: ignore
+
+        elif type == "parser":
+            name = f"{prompty.template.parser}.{prompty.model.api}"
+            if name not in cls._parsers:
+                raise ValueError(f"Parser {name} not found")
+
+            return cls._parsers[name](prompty)  # type: ignore
+
+        elif type == "executor":
+            name = prompty.model.configuration["type"]
+            if name not in cls._executors:
+                raise ValueError(f"Executor {name} not found")
+
+            return cls._executors[name](prompty)  # type: ignore
+
+        elif type == "processor":
+            name = prompty.model.configuration["type"]
+            if name not in cls._processors:
+                raise ValueError(f"Processor {name} not found")
+
+            return cls._processors[name](prompty)  # type: ignore
+
+        else:
+            raise ValueError(f"Type {type} not found")
+
+    @classmethod
+    def run(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+        data: Any,
+        default: Any = None,
+    ):
+        name = cls._get_name(type, prompty)
+        if name.startswith("NOOP") and default is not None:
+            return default
+        elif name.startswith("NOOP"):
+            return data
+
+        invoker = cls._get_invoker(type, prompty)
+        value = invoker.run(data)
+        return value
+
+    @classmethod
+    async def run_async(
+        cls,
+        type: Literal["renderer", "parser", "executor", "processor"],
+        prompty: Prompty,
+        data: Any,
+        default: Any = None,
+    ):
+        name = cls._get_name(type, prompty)
+        if name.startswith("NOOP") and default is not None:
+            return default
+        elif name.startswith("NOOP"):
+            return data
+        invoker = cls._get_invoker(type, prompty)
+        value = await invoker.run_async(data)
+        return value
+
+    @classmethod
+    def run_renderer(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("renderer", prompty, data, default)
+
+    @classmethod
+    async def run_renderer_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("renderer", prompty, data, default)
+
+    @classmethod
+    def run_parser(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("parser", prompty, data, default)
+
+    @classmethod
+    async def run_parser_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("parser", prompty, data, default)
+
+    @classmethod
+    def run_executor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("executor", prompty, data, default)
+
+    @classmethod
+    async def run_executor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("executor", prompty, data, default)
+
+    @classmethod
+    def run_processor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return cls.run("processor", prompty, data, default)
+
+    @classmethod
+    async def run_processor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any:
+        return await cls.run_async("processor", prompty, data, default)
+
+
+class InvokerException(Exception):
+    """Exception class for Invoker"""
+
+    def __init__(self, message: str, type: str) -> None:
+        super().__init__(message)
+        self.type = type
+
+    def __str__(self) -> str:
+        return f"{super().__str__()}. Make sure to pip install any necessary package extras (i.e. could be something like `pip install prompty[{self.type}]`) for {self.type} as well as import the appropriate invokers (i.e. could be something like `import prompty.{self.type}`)."
+
+
+@InvokerFactory.register_renderer("NOOP")
+@InvokerFactory.register_parser("NOOP")
+@InvokerFactory.register_executor("NOOP")
+@InvokerFactory.register_processor("NOOP")
+@InvokerFactory.register_parser("prompty.embedding")
+@InvokerFactory.register_parser("prompty.image")
+@InvokerFactory.register_parser("prompty.completion")
+class NoOp(Invoker):
+    def invoke(self, data: Any) -> Any:
+        return data
+
+    async def invoke_async(self, data: str) -> Any:
+        return self.invoke(data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py
new file mode 100644
index 00000000..f7a0c21d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_mustache.py
@@ -0,0 +1,671 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# pylint: disable=line-too-long,R,consider-using-dict-items,docstring-missing-return,docstring-missing-rtype,docstring-missing-param,global-statement,unused-argument,global-variable-not-assigned,protected-access,logging-fstring-interpolation,deprecated-method
+from __future__ import annotations
+import logging
+from collections.abc import Iterator, Sequence
+from types import MappingProxyType
+from typing import (
+    Any,
+    Dict,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Union,
+    cast,
+)
+from typing_extensions import TypeAlias
+
+logger = logging.getLogger(__name__)
+
+
+Scopes: TypeAlias = List[Union[Literal[False, 0], Mapping[str, Any]]]
+
+
+# Globals
+_CURRENT_LINE = 1
+_LAST_TAG_LINE = None
+
+
+class ChevronError(SyntaxError):
+    """Custom exception for Chevron errors."""
+
+
+#
+# Helper functions
+#
+
+
+def grab_literal(template: str, l_del: str) -> tuple[str, str]:
+    """Parse a literal from the template.
+
+    Args:
+        template: The template to parse.
+        l_del: The left delimiter.
+
+    Returns:
+        Tuple[str, str]: The literal and the template.
+    """
+
+    global _CURRENT_LINE
+
+    try:
+        # Look for the next tag and move the template to it
+        literal, template = template.split(l_del, 1)
+        _CURRENT_LINE += literal.count("\n")
+        return (literal, template)
+
+    # There are no more tags in the template?
+    except ValueError:
+        # Then the rest of the template is a literal
+        return (template, "")
+
+
+def l_sa_check(template: str, literal: str, is_standalone: bool) -> bool:
+    """Do a preliminary check to see if a tag could be a standalone.
+
+    Args:
+        template: The template. (Not used.)
+        literal: The literal.
+        is_standalone: Whether the tag is standalone.
+
+    Returns:
+        bool: Whether the tag could be a standalone.
+    """
+
+    # If there is a newline, or the previous tag was a standalone
+    if literal.find("\n") != -1 or is_standalone:
+        padding = literal.split("\n")[-1]
+
+        # If all the characters since the last newline are spaces
+        # Then the next tag could be a standalone
+        # Otherwise it can't be
+        return padding.isspace() or padding == ""
+    else:
+        return False
+
+
+def r_sa_check(template: str, tag_type: str, is_standalone: bool) -> bool:
+    """Do a final check to see if a tag could be a standalone.
+
+    Args:
+        template: The template.
+        tag_type: The type of the tag.
+        is_standalone: Whether the tag is standalone.
+
+    Returns:
+        bool: Whether the tag could be a standalone.
+    """
+
+    # Check right side if we might be a standalone
+    if is_standalone and tag_type not in ["variable", "no escape"]:
+        on_newline = template.split("\n", 1)
+
+        # If the stuff to the right of us are spaces we're a standalone
+        return on_newline[0].isspace() or not on_newline[0]
+
+    # If we're a tag can't be a standalone
+    else:
+        return False
+
+
+def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]:
+    """Parse a tag from a template.
+
+    Args:
+        template: The template.
+        l_del: The left delimiter.
+        r_del: The right delimiter.
+
+    Returns:
+        Tuple[Tuple[str, str], str]: The tag and the template.
+
+    Raises:
+        ChevronError: If the tag is unclosed.
+        ChevronError: If the set delimiter tag is unclosed.
+    """
+    global _CURRENT_LINE
+    global _LAST_TAG_LINE
+
+    tag_types = {
+        "!": "comment",
+        "#": "section",
+        "^": "inverted section",
+        "/": "end",
+        ">": "partial",
+        "=": "set delimiter?",
+        "{": "no escape?",
+        "&": "no escape",
+    }
+
+    # Get the tag
+    try:
+        tag, template = template.split(r_del, 1)
+    except ValueError as e:
+        msg = "unclosed tag " f"at line {_CURRENT_LINE}"
+        raise ChevronError(msg) from e
+
+    # Find the type meaning of the first character
+    tag_type = tag_types.get(tag[0], "variable")
+
+    # If the type is not a variable
+    if tag_type != "variable":
+        # Then that first character is not needed
+        tag = tag[1:]
+
+    # If we might be a set delimiter tag
+    if tag_type == "set delimiter?":
+        # Double check to make sure we are
+        if tag.endswith("="):
+            tag_type = "set delimiter"
+            # Remove the equal sign
+            tag = tag[:-1]
+
+        # Otherwise we should complain
+        else:
+            msg = "unclosed set delimiter tag\n" f"at line {_CURRENT_LINE}"
+            raise ChevronError(msg)
+
+    elif (
+        # If we might be a no html escape tag
+        tag_type == "no escape?"
+        # And we have a third curly brace
+        # (And are using curly braces as delimiters)
+        and l_del == "{{"
+        and r_del == "}}"
+        and template.startswith("}")
+    ):
+        # Then we are a no html escape tag
+        template = template[1:]
+        tag_type = "no escape"
+
+    # Strip the whitespace off the key and return
+    return ((tag_type, tag.strip()), template)
+
+
+#
+# The main tokenizing function
+#
+
+
+def tokenize(template: str, def_ldel: str = "{{", def_rdel: str = "}}") -> Iterator[tuple[str, str]]:
+    """Tokenize a mustache template.
+
+    Tokenizes a mustache template in a generator fashion,
+    using file-like objects. It also accepts a string containing
+    the template.
+
+
+    Arguments:
+
+    template -- a file-like object, or a string of a mustache template
+
+    def_ldel -- The default left delimiter
+                ("{{" by default, as in spec compliant mustache)
+
+    def_rdel -- The default right delimiter
+                ("}}" by default, as in spec compliant mustache)
+
+
+    Returns:
+
+    A generator of mustache tags in the form of a tuple
+
+    -- (tag_type, tag_key)
+
+    Where tag_type is one of:
+     * literal
+     * section
+     * inverted section
+     * end
+     * partial
+     * no escape
+
+    And tag_key is either the key or in the case of a literal tag,
+    the literal itself.
+    """
+
+    global _CURRENT_LINE, _LAST_TAG_LINE
+    _CURRENT_LINE = 1
+    _LAST_TAG_LINE = None
+
+    is_standalone = True
+    open_sections = []
+    l_del = def_ldel
+    r_del = def_rdel
+
+    while template:
+        literal, template = grab_literal(template, l_del)
+
+        # If the template is completed
+        if not template:
+            # Then yield the literal and leave
+            yield ("literal", literal)
+            break
+
+        # Do the first check to see if we could be a standalone
+        is_standalone = l_sa_check(template, literal, is_standalone)
+
+        # Parse the tag
+        tag, template = parse_tag(template, l_del, r_del)
+        tag_type, tag_key = tag
+
+        # Special tag logic
+
+        # If we are a set delimiter tag
+        if tag_type == "set delimiter":
+            # Then get and set the delimiters
+            dels = tag_key.strip().split(" ")
+            l_del, r_del = dels[0], dels[-1]
+
+        # If we are a section tag
+        elif tag_type in ["section", "inverted section"]:
+            # Then open a new section
+            open_sections.append(tag_key)
+            _LAST_TAG_LINE = _CURRENT_LINE
+
+        # If we are an end tag
+        elif tag_type == "end":
+            # Then check to see if the last opened section
+            # is the same as us
+            try:
+                last_section = open_sections.pop()
+            except IndexError as e:
+                msg = f'Trying to close tag "{tag_key}"\n' "Looks like it was not opened.\n" f"line {_CURRENT_LINE + 1}"
+                raise ChevronError(msg) from e
+            if tag_key != last_section:
+                # Otherwise we need to complain
+                msg = (
+                    f'Trying to close tag "{tag_key}"\n'
+                    f'last open tag is "{last_section}"\n'
+                    f"line {_CURRENT_LINE + 1}"
+                )
+                raise ChevronError(msg)
+
+        # Do the second check to see if we're a standalone
+        is_standalone = r_sa_check(template, tag_type, is_standalone)
+
+        # Which if we are
+        if is_standalone:
+            # Remove the stuff before the newline
+            template = template.split("\n", 1)[-1]
+
+            # Partials need to keep the spaces on their left
+            if tag_type != "partial":
+                # But other tags don't
+                literal = literal.rstrip(" ")
+
+        # Start yielding
+        # Ignore literals that are empty
+        if literal != "":
+            yield ("literal", literal)
+
+        # Ignore comments and set delimiters
+        if tag_type not in ["comment", "set delimiter?"]:
+            yield (tag_type, tag_key)
+
+    # If there are any open sections when we're done
+    if open_sections:
+        # Then we need to complain
+        msg = (
+            "Unexpected EOF\n"
+            f'the tag "{open_sections[-1]}" was never closed\n'
+            f"was opened at line {_LAST_TAG_LINE}"
+        )
+        raise ChevronError(msg)
+
+
+#
+# Helper functions
+#
+
+
+def _html_escape(string: str) -> str:
+    """HTML escape all of these " & < >"""
+
+    html_codes = {
+        '"': "&quot;",
+        "<": "&lt;",
+        ">": "&gt;",
+    }
+
+    # & must be handled first
+    string = string.replace("&", "&amp;")
+    for char in html_codes:
+        string = string.replace(char, html_codes[char])
+    return string
+
+
+def _get_key(
+    key: str,
+    scopes: Scopes,
+    warn: bool,
+    keep: bool,
+    def_ldel: str,
+    def_rdel: str,
+) -> Any:
+    """Get a key from the current scope"""
+
+    # If the key is a dot
+    if key == ".":
+        # Then just return the current scope
+        return scopes[0]
+
+    # Loop through the scopes
+    for scope in scopes:
+        try:
+            # Return an empty string if falsy, with two exceptions
+            # 0 should return 0, and False should return False
+            if scope in (0, False):
+                return scope
+
+            # For every dot separated key
+            for child in key.split("."):
+                # Return an empty string if falsy, with two exceptions
+                # 0 should return 0, and False should return False
+                if scope in (0, False):
+                    return scope
+                # Move into the scope
+                try:
+                    # Try subscripting (Normal dictionaries)
+                    scope = cast(Dict[str, Any], scope)[child]
+                except (TypeError, AttributeError):
+                    try:
+                        scope = getattr(scope, child)
+                    except (TypeError, AttributeError):
+                        # Try as a list
+                        scope = scope[int(child)]  # type: ignore
+
+            try:
+                # This allows for custom falsy data types
+                # https://github.com/noahmorrison/chevron/issues/35
+                if scope._CHEVRON_return_scope_when_falsy:  # type: ignore
+                    return scope
+            except AttributeError:
+                if scope in (0, False):
+                    return scope
+                return scope or ""
+        except (AttributeError, KeyError, IndexError, ValueError):
+            # We couldn't find the key in the current scope
+            # We'll try again on the next pass
+            pass
+
+    # We couldn't find the key in any of the scopes
+
+    if warn:
+        logger.warn(f"Could not find key '{key}'")
+
+    if keep:
+        return f"{def_ldel} {key} {def_rdel}"
+
+    return ""
+
+
+def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str:
+    """Load a partial"""
+    try:
+        # Maybe the partial is in the dictionary
+        return partials_dict[name]
+    except KeyError:
+        return ""
+
+
+#
+# The main rendering function
+#
+g_token_cache: Dict[str, List[tuple[str, str]]] = {}
+
+EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({})
+
+
+def render(
+    template: Union[str, List[tuple[str, str]]] = "",
+    data: Mapping[str, Any] = EMPTY_DICT,
+    partials_dict: Mapping[str, str] = EMPTY_DICT,
+    padding: str = "",
+    def_ldel: str = "{{",
+    def_rdel: str = "}}",
+    scopes: Optional[Scopes] = None,
+    warn: bool = False,
+    keep: bool = False,
+) -> str:
+    """Render a mustache template.
+
+    Renders a mustache template with a data scope and inline partial capability.
+
+    Arguments:
+
+    template      -- A file-like object or a string containing the template.
+
+    data          -- A python dictionary with your data scope.
+
+    partials_path -- The path to where your partials are stored.
+                     If set to None, then partials won't be loaded from the file system
+                     (defaults to '.').
+
+    partials_ext  -- The extension that you want the parser to look for
+                     (defaults to 'mustache').
+
+    partials_dict -- A python dictionary which will be search for partials
+                     before the filesystem is. {'include': 'foo'} is the same
+                     as a file called include.mustache
+                     (defaults to {}).
+
+    padding       -- This is for padding partials, and shouldn't be used
+                     (but can be if you really want to).
+
+    def_ldel      -- The default left delimiter
+                     ("{{" by default, as in spec compliant mustache).
+
+    def_rdel      -- The default right delimiter
+                     ("}}" by default, as in spec compliant mustache).
+
+    scopes        -- The list of scopes that get_key will look through.
+
+    warn          -- Log a warning when a template substitution isn't found in the data
+
+    keep          -- Keep unreplaced tags when a substitution isn't found in the data.
+
+
+    Returns:
+
+    A string containing the rendered template.
+    """
+
+    # If the template is a sequence but not derived from a string
+    if isinstance(template, Sequence) and not isinstance(template, str):
+        # Then we don't need to tokenize it
+        # But it does need to be a generator
+        tokens: Iterator[tuple[str, str]] = (token for token in template)
+    else:
+        if template in g_token_cache:
+            tokens = (token for token in g_token_cache[template])
+        else:
+            # Otherwise make a generator
+            tokens = tokenize(template, def_ldel, def_rdel)
+
+    output = ""
+
+    if scopes is None:
+        scopes = [data]
+
+    # Run through the tokens
+    for tag, key in tokens:
+        # Set the current scope
+        current_scope = scopes[0]
+
+        # If we're an end tag
+        if tag == "end":
+            # Pop out of the latest scope
+            del scopes[0]
+
+        # If the current scope is falsy and not the only scope
+        elif not current_scope and len(scopes) != 1:
+            if tag in ["section", "inverted section"]:
+                # Set the most recent scope to a falsy value
+                scopes.insert(0, False)
+
+        # If we're a literal tag
+        elif tag == "literal":
+            # Add padding to the key and add it to the output
+            output += key.replace("\n", "\n" + padding)
+
+        # If we're a variable tag
+        elif tag == "variable":
+            # Add the html escaped key to the output
+            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
+            if thing is True and key == ".":
+                # if we've coerced into a boolean by accident
+                # (inverted tags do this)
+                # then get the un-coerced object (next in the stack)
+                thing = scopes[1]
+            if not isinstance(thing, str):
+                thing = str(thing)
+            output += _html_escape(thing)
+
+        # If we're a no html escape tag
+        elif tag == "no escape":
+            # Just lookup the key and add it
+            thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
+            if not isinstance(thing, str):
+                thing = str(thing)
+            output += thing
+
+        # If we're a section tag
+        elif tag == "section":
+            # Get the sections scope
+            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
+
+            # If the scope is a callable (as described in
+            # https://mustache.github.io/mustache.5.html)
+            if callable(scope):
+                # Generate template text from tags
+                text = ""
+                tags: List[tuple[str, str]] = []
+                for token in tokens:
+                    if token == ("end", key):
+                        break
+
+                    tags.append(token)
+                    tag_type, tag_key = token
+                    if tag_type == "literal":
+                        text += tag_key
+                    elif tag_type == "no escape":
+                        text += f"{def_ldel}& {tag_key} {def_rdel}"
+                    else:
+                        text += "{}{} {}{}".format(
+                            def_ldel,
+                            {
+                                "comment": "!",
+                                "section": "#",
+                                "inverted section": "^",
+                                "end": "/",
+                                "partial": ">",
+                                "set delimiter": "=",
+                                "no escape": "&",
+                                "variable": "",
+                            }[tag_type],
+                            tag_key,
+                            def_rdel,
+                        )
+
+                g_token_cache[text] = tags
+
+                rend = scope(
+                    text,
+                    lambda template, data=None: render(
+                        template,
+                        data={},
+                        partials_dict=partials_dict,
+                        padding=padding,
+                        def_ldel=def_ldel,
+                        def_rdel=def_rdel,
+                        scopes=data and [data] + scopes or scopes,
+                        warn=warn,
+                        keep=keep,
+                    ),
+                )
+
+                output += rend  # type: ignore[reportOperatorIssue]
+
+            # If the scope is a sequence, an iterator or generator but not
+            # derived from a string
+            elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str):
+                # Then we need to do some looping
+
+                # Gather up all the tags inside the section
+                # (And don't be tricked by nested end tags with the same key)
+                # TODO: This feels like it still has edge cases, no?
+                tags = []
+                tags_with_same_key = 0
+                for token in tokens:
+                    if token == ("section", key):
+                        tags_with_same_key += 1
+                    if token == ("end", key):
+                        tags_with_same_key -= 1
+                        if tags_with_same_key < 0:
+                            break
+                    tags.append(token)
+
+                # For every item in the scope
+                for thing in scope:
+                    # Append it as the most recent scope and render
+                    new_scope = [thing] + scopes
+                    rend = render(
+                        template=tags,
+                        scopes=new_scope,
+                        padding=padding,
+                        partials_dict=partials_dict,
+                        def_ldel=def_ldel,
+                        def_rdel=def_rdel,
+                        warn=warn,
+                        keep=keep,
+                    )
+
+                    output += rend
+
+            else:
+                # Otherwise we're just a scope section
+                scopes.insert(0, scope)  # type: ignore[reportArgumentType]
+
+        # If we're an inverted section
+        elif tag == "inverted section":
+            # Add the flipped scope to the scopes
+            scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel)
+            scopes.insert(0, cast(Literal[False], not scope))
+
+        # If we're a partial
+        elif tag == "partial":
+            # Load the partial
+            partial = _get_partial(key, partials_dict)
+
+            # Find what to pad the partial with
+            left = output.rpartition("\n")[2]
+            part_padding = padding
+            if left.isspace():
+                part_padding += left
+
+            # Render the partial
+            part_out = render(
+                template=partial,
+                partials_dict=partials_dict,
+                def_ldel=def_ldel,
+                def_rdel=def_rdel,
+                padding=part_padding,
+                scopes=scopes,
+                warn=warn,
+                keep=keep,
+            )
+
+            # If the partial was indented
+            if left.isspace():
+                # then remove the spaces from the end
+                part_out = part_out.rstrip(" \t")
+
+            # Add the partials output to the output
+            output += part_out
+
+    return output
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_parsers.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_parsers.py
new file mode 100644
index 00000000..de3c570e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_parsers.py
@@ -0,0 +1,156 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="union-attr,return-value"
+# pylint: disable=line-too-long,R,consider-using-enumerate,docstring-missing-param,docstring-missing-return,docstring-missing-rtype
+import re
+import base64
+from pathlib import Path
+from typing import Any, Union
+from ._core import Prompty
+from ._invoker import Invoker, InvokerFactory
+
+
+ROLES = ["assistant", "function", "system", "user"]
+
+
+@InvokerFactory.register_parser("prompty.chat")
+class PromptyChatParser(Invoker):
+    """Prompty Chat Parser"""
+
+    def __init__(self, prompty: Prompty) -> None:
+        super().__init__(prompty)
+        self.path = Path(self.prompty.file).parent
+
+    def invoke(self, data: str) -> Any:
+        return invoke_parser(self.path, data)
+
+    async def invoke_async(self, data: str) -> Any:
+        """Invoke the Prompty Chat Parser (Async)
+
+        Parameters
+        ----------
+        data : str
+            The data to parse
+
+        Returns
+        -------
+        str
+            The parsed data
+        """
+        return self.invoke(data)
+
+
+def _inline_image(path: Union[Path, None], image_item: str) -> str:
+    """Inline Image
+
+    Parameters
+    ----------
+    image_item : str
+        The image item to inline
+
+    Returns
+    -------
+    str
+        The inlined image
+    """
+    # pass through if it's a url or base64 encoded or the path is None
+    if image_item.startswith("http") or image_item.startswith("data") or path is None:
+        return image_item
+    # otherwise, it's a local file - need to base64 encode it
+    else:
+        image_path = (path if path is not None else Path(".")) / image_item
+        with open(image_path, "rb") as f:
+            base64_image = base64.b64encode(f.read()).decode("utf-8")
+
+        if image_path.suffix == ".png":
+            return f"data:image/png;base64,{base64_image}"
+        elif image_path.suffix == ".jpg":
+            return f"data:image/jpeg;base64,{base64_image}"
+        elif image_path.suffix == ".jpeg":
+            return f"data:image/jpeg;base64,{base64_image}"
+        else:
+            raise ValueError(
+                f"Invalid image format {image_path.suffix} - currently only .png and .jpg / .jpeg are supported."
+            )
+
+
+def _parse_content(path: Union[Path, None], content: str):
+    """for parsing inline images
+
+    Parameters
+    ----------
+    content : str
+        The content to parse
+
+    Returns
+    -------
+    any
+        The parsed content
+    """
+    # regular expression to parse markdown images
+    image = r"(?P<alt>!\[[^\]]*\])\((?P<filename>.*?)(?=\"|\))\)"
+    matches = re.findall(image, content, flags=re.MULTILINE)
+    if len(matches) > 0:
+        content_items = []
+        content_chunks = re.split(image, content, flags=re.MULTILINE)
+        current_chunk = 0
+        for i in range(len(content_chunks)):
+            # image entry
+            if current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][0]:
+                content_items.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": _inline_image(path, matches[current_chunk][1].split(" ")[0].strip())},
+                    }
+                )
+            # second part of image entry
+            elif current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][1]:
+                current_chunk += 1
+            # text entry
+            else:
+                if len(content_chunks[i].strip()) > 0:
+                    content_items.append({"type": "text", "text": content_chunks[i].strip()})
+        return content_items
+    else:
+        return content
+
+
+def invoke_parser(path: Union[Path, None], data: str) -> Any:
+    """Invoke the Prompty Chat Parser
+
+    Parameters
+    ----------
+    data : str
+        The data to parse
+
+    Returns
+    -------
+    str
+        The parsed data
+    """
+    messages = []
+    separator = r"(?i)^\s*#?\s*(" + "|".join(ROLES) + r")\s*:\s*\n"
+
+    # get valid chunks - remove empty items
+    chunks = [item for item in re.split(separator, data, flags=re.MULTILINE) if len(item.strip()) > 0]
+
+    # if no starter role, then inject system role
+    if not chunks[0].strip().lower() in ROLES:
+        chunks.insert(0, "system")
+
+    # if last chunk is role entry, then remove (no content?)
+    if chunks[-1].strip().lower() in ROLES:
+        chunks.pop()
+
+    if len(chunks) % 2 != 0:
+        raise ValueError("Invalid prompt format")
+
+    # create messages
+    for i in range(0, len(chunks), 2):
+        role = chunks[i].strip().lower()
+        content = chunks[i + 1].strip()
+        messages.append({"role": role, "content": _parse_content(path, content)})
+
+    return messages
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_patch.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_patch.py
new file mode 100644
index 00000000..14ad4f62
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_patch.py
@@ -0,0 +1,124 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# pylint: disable=line-too-long,R
+"""Customize generated code here.
+
+Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
+"""
+
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from typing_extensions import Self
+from ._core import Prompty
+from ._mustache import render
+from ._parsers import invoke_parser
+from ._prompty_utils import load, prepare
+from ._utils import remove_leading_empty_space
+
+
+class PromptTemplate:
+    """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array."""
+
+    @classmethod
+    def from_prompty(cls, file_path: str) -> Self:
+        """Initialize a PromptTemplate object from a prompty file.
+
+        :param file_path: The path to the prompty file.
+        :type file_path: str
+        :return: The PromptTemplate object.
+        :rtype: PromptTemplate
+        """
+        if not file_path:
+            raise ValueError("Please provide file_path")
+
+        # Get the absolute path of the file by `traceback.extract_stack()`, it's "-2" because:
+        #  In the stack, the last function is the current function.
+        #  The second last function is the caller function, which is the root of the file_path.
+        stack = traceback.extract_stack()
+        caller = Path(stack[-2].filename)
+        abs_file_path = Path(caller.parent / Path(file_path)).resolve().absolute()
+
+        prompty = load(str(abs_file_path))
+        return cls(prompty=prompty)
+
+    @classmethod
+    def from_string(cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None) -> Self:
+        """Initialize a PromptTemplate object from a message template.
+
+        :param prompt_template: The prompt template string.
+        :type prompt_template: str
+        :param api: The API type, e.g. "chat" or "completion".
+        :type api: str
+        :param model_name: The model name, e.g. "gpt-4o-mini".
+        :type model_name: str
+        :return: The PromptTemplate object.
+        :rtype: PromptTemplate
+        """
+        return cls(
+            api=api,
+            prompt_template=prompt_template,
+            model_name=model_name,
+            prompty=None,
+        )
+
+    def __init__(
+        self,
+        *,
+        api: str = "chat",
+        prompty: Optional[Prompty] = None,
+        prompt_template: Optional[str] = None,
+        model_name: Optional[str] = None,
+    ) -> None:
+        self.prompty = prompty
+        if self.prompty is not None:
+            self.model_name = (
+                self.prompty.model.configuration["azure_deployment"]
+                if "azure_deployment" in self.prompty.model.configuration
+                else None
+            )
+            self.parameters = self.prompty.model.parameters
+            self._config = {}
+        elif prompt_template is not None:
+            self.model_name = model_name
+            self.parameters = {}
+            # _config is a dict to hold the internal configuration
+            self._config = {
+                "api": api if api is not None else "chat",
+                "prompt_template": prompt_template,
+            }
+        else:
+            raise ValueError("Please pass valid arguments for PromptTemplate")
+
+    def create_messages(self, data: Optional[Dict[str, Any]] = None, **kwargs) -> List[Dict[str, Any]]:
+        """Render the prompt template with the given data.
+
+        :param data: The data to render the prompt template with.
+        :type data: Optional[Dict[str, Any]]
+        :return: The rendered prompt template.
+        :rtype: List[Dict[str, Any]]
+        """
+        if data is None:
+            data = kwargs
+
+        if self.prompty is not None:
+            parsed = prepare(self.prompty, data)
+            return parsed
+        elif "prompt_template" in self._config:
+            prompt_template = remove_leading_empty_space(self._config["prompt_template"])
+            system_prompt_str = render(prompt_template, data)
+            parsed = invoke_parser(None, system_prompt_str)
+            return parsed
+        else:
+            raise ValueError("Please provide valid prompt template")
+
+
+def patch_sdk():
+    """Do not remove from this file.
+
+    `patch_sdk` is a last resort escape hatch that allows you to do customizations
+    you can't accomplish using the techniques described in
+    https://aka.ms/azsdk/python/dpcodegen/python/customize
+    """
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_prompty_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_prompty_utils.py
new file mode 100644
index 00000000..5ea38bda
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_prompty_utils.py
@@ -0,0 +1,415 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="assignment"
+# pylint: disable=R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,dangerous-default-value,redefined-outer-name,unused-wildcard-import,wildcard-import,raise-missing-from
+import traceback
+from pathlib import Path
+from typing import Any, Dict, List, Union
+from ._tracer import trace
+from ._invoker import InvokerFactory
+from ._core import (
+    ModelSettings,
+    Prompty,
+    PropertySettings,
+    TemplateSettings,
+    param_hoisting,
+)
+from ._utils import (
+    load_global_config,
+    load_prompty,
+)
+
+from ._renderers import *
+from ._parsers import *
+
+
+@trace(description="Create a headless prompty object for programmatic use.")
+def headless(
+    api: str,
+    content: Union[str, List[str], dict],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    connection: str = "default",
+) -> Prompty:
+    """Create a headless prompty object for programmatic use.
+
+    Parameters
+    ----------
+    api : str
+        The API to use for the model
+    content : Union[str, List[str], dict]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The headless prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.headless(
+            api="embedding",
+            configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"},
+            content="hello world",
+        )
+    >>> emb = prompty.execute(p)
+
+    """
+
+    # get caller's path (to get relative path for prompty.json)
+    caller = Path(traceback.extract_stack()[-2].filename)
+    templateSettings = TemplateSettings(type="NOOP", parser="NOOP")
+    modelSettings = ModelSettings(
+        api=api,
+        configuration=Prompty.normalize(
+            param_hoisting(configuration, load_global_config(caller.parent, connection)),
+            caller.parent,
+        ),
+        parameters=parameters,
+    )
+
+    return Prompty(model=modelSettings, template=templateSettings, content=content)
+
+
+def _load_raw_prompty(attributes: dict, content: str, p: Path, global_config: dict):
+    if "model" not in attributes:
+        attributes["model"] = {}
+
+    if "configuration" not in attributes["model"]:
+        attributes["model"]["configuration"] = global_config
+    else:
+        attributes["model"]["configuration"] = param_hoisting(
+            attributes["model"]["configuration"],
+            global_config,
+        )
+
+    # pull model settings out of attributes
+    try:
+        model = ModelSettings(**attributes.pop("model"))
+    except Exception as e:
+        raise ValueError(f"Error in model settings: {e}")
+
+    # pull template settings
+    try:
+        if "template" in attributes:
+            t = attributes.pop("template")
+            if isinstance(t, dict):
+                template = TemplateSettings(**t)
+            # has to be a string denoting the type
+            else:
+                template = TemplateSettings(type=t, parser="prompty")
+        else:
+            template = TemplateSettings(type="mustache", parser="prompty")
+    except Exception as e:
+        raise ValueError(f"Error in template loader: {e}")
+
+    # formalize inputs and outputs
+    if "inputs" in attributes:
+        try:
+            inputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()}
+        except Exception as e:
+            raise ValueError(f"Error in inputs: {e}")
+    else:
+        inputs = {}
+    if "outputs" in attributes:
+        try:
+            outputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()}
+        except Exception as e:
+            raise ValueError(f"Error in outputs: {e}")
+    else:
+        outputs = {}
+
+    prompty = Prompty(
+        **attributes,
+        model=model,
+        inputs=inputs,
+        outputs=outputs,
+        template=template,
+        content=content,
+        file=p,
+    )
+
+    return prompty
+
+
+@trace(description="Load a prompty file.")
+def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty:
+    """Load a prompty file.
+
+    Parameters
+    ----------
+    prompty_file : Union[str, Path]
+        The path to the prompty file
+    configuration : str, optional
+        The configuration to use, by default "default"
+
+    Returns
+    -------
+    Prompty
+        The loaded prompty object
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> print(p)
+    """
+
+    p = Path(prompty_file)
+    if not p.is_absolute():
+        # get caller's path (take into account trace frame)
+        caller = Path(traceback.extract_stack()[-3].filename)
+        p = Path(caller.parent / p).resolve().absolute()
+
+    # load dictionary from prompty file
+    matter = load_prompty(p)
+
+    attributes = matter["attributes"]
+    content = matter["body"]
+
+    # normalize attribute dictionary resolve keys and files
+    attributes = Prompty.normalize(attributes, p.parent)
+
+    # load global configuration
+    global_config = Prompty.normalize(load_global_config(p.parent, configuration), p.parent)
+
+    prompty = _load_raw_prompty(attributes, content, p, global_config)
+
+    # recursive loading of base prompty
+    if "base" in attributes:
+        # load the base prompty from the same directory as the current prompty
+        base = load(p.parent / attributes["base"])
+        prompty = Prompty.hoist_base_prompty(prompty, base)
+
+    return prompty
+
+
+@trace(description="Prepare the inputs for the prompt.")
+def prepare(
+    prompt: Prompty,
+    inputs: Dict[str, Any] = {},
+):
+    """Prepare the inputs for the prompt.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+
+    Returns
+    -------
+    dict
+        The prepared and hidrated template shaped to the LLM model
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = prompty.prepare(p, inputs)
+    """
+    inputs = param_hoisting(inputs, prompt.sample)
+
+    render = InvokerFactory.run_renderer(prompt, inputs, prompt.content)
+    result = InvokerFactory.run_parser(prompt, render)
+
+    return result
+
+
+@trace(description="Prepare the inputs for the prompt.")
+async def prepare_async(
+    prompt: Prompty,
+    inputs: Dict[str, Any] = {},
+):
+    """Prepare the inputs for the prompt.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+
+    Returns
+    -------
+    dict
+        The prepared and hidrated template shaped to the LLM model
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = await prompty.prepare_async(p, inputs)
+    """
+    inputs = param_hoisting(inputs, prompt.sample)
+
+    render = await InvokerFactory.run_renderer_async(prompt, inputs, prompt.content)
+    result = await InvokerFactory.run_parser_async(prompt, render)
+
+    return result
+
+
+@trace(description="Run the prepared Prompty content against the model.")
+def run(
+    prompt: Prompty,
+    content: Union[dict, list, str],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    raw: bool = False,
+):
+    """Run the prepared Prompty content.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    content : Union[dict, list, str]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = prompty.prepare(p, inputs)
+    >>> result = prompty.run(p, content)
+    """
+
+    if configuration != {}:
+        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
+
+    if parameters != {}:
+        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
+
+    result = InvokerFactory.run_executor(prompt, content)
+    if not raw:
+        result = InvokerFactory.run_processor(prompt, result)
+
+    return result
+
+
+@trace(description="Run the prepared Prompty content against the model.")
+async def run_async(
+    prompt: Prompty,
+    content: Union[dict, list, str],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    raw: bool = False,
+):
+    """Run the prepared Prompty content.
+
+    Parameters
+    ----------
+    prompt : Prompty
+        The prompty object
+    content : Union[dict, list, str]
+        The content to process
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> p = prompty.load("prompts/basic.prompty")
+    >>> inputs = {"name": "John Doe"}
+    >>> content = await prompty.prepare_async(p, inputs)
+    >>> result = await prompty.run_async(p, content)
+    """
+
+    if configuration != {}:
+        prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration)
+
+    if parameters != {}:
+        prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters)
+
+    result = await InvokerFactory.run_executor_async(prompt, content)
+    if not raw:
+        result = await InvokerFactory.run_processor_async(prompt, result)
+
+    return result
+
+
+@trace(description="Execute a prompty")
+def execute(
+    prompt: Union[str, Prompty],
+    configuration: Dict[str, Any] = {},
+    parameters: Dict[str, Any] = {},
+    inputs: Dict[str, Any] = {},
+    raw: bool = False,
+    config_name: str = "default",
+):
+    """Execute a prompty.
+
+    Parameters
+    ----------
+    prompt : Union[str, Prompty]
+        The prompty object or path to the prompty file
+    configuration : Dict[str, Any], optional
+        The configuration to use, by default {}
+    parameters : Dict[str, Any], optional
+        The parameters to use, by default {}
+    inputs : Dict[str, Any], optional
+        The inputs to the prompt, by default {}
+    raw : bool, optional
+        Whether to skip processing, by default False
+    connection : str, optional
+        The connection to use, by default "default"
+
+    Returns
+    -------
+    Any
+        The result of the prompt
+
+    Example
+    -------
+    >>> import prompty
+    >>> inputs = {"name": "John Doe"}
+    >>> result = prompty.execute("prompts/basic.prompty", inputs=inputs)
+    """
+    if isinstance(prompt, str):
+        path = Path(prompt)
+        if not path.is_absolute():
+            # get caller's path (take into account trace frame)
+            caller = Path(traceback.extract_stack()[-3].filename)
+            path = Path(caller.parent / path).resolve().absolute()
+        prompt = load(path, config_name)
+
+    # prepare content
+    content = prepare(prompt, inputs)
+
+    # run LLM model
+    result = run(prompt, content, configuration, parameters, raw)
+
+    return result
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_renderers.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_renderers.py
new file mode 100644
index 00000000..0d682a7f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_renderers.py
@@ -0,0 +1,30 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="union-attr,assignment,arg-type"
+from pathlib import Path
+from ._core import Prompty
+from ._invoker import Invoker, InvokerFactory
+from ._mustache import render
+
+
+@InvokerFactory.register_renderer("mustache")
+class MustacheRenderer(Invoker):
+    """Render a mustache template."""
+
+    def __init__(self, prompty: Prompty) -> None:
+        super().__init__(prompty)
+        self.templates = {}
+        cur_prompt = self.prompty
+        while cur_prompt:
+            self.templates[Path(cur_prompt.file).name] = cur_prompt.content
+            cur_prompt = cur_prompt.basePrompty
+        self.name = Path(self.prompty.file).name
+
+    def invoke(self, data: str) -> str:
+        generated = render(self.prompty.content, data)  # type: ignore
+        return generated
+
+    async def invoke_async(self, data: str) -> str:
+        return self.invoke(data)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_tracer.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_tracer.py
new file mode 100644
index 00000000..24f800b4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_tracer.py
@@ -0,0 +1,316 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="union-attr,arg-type,misc,return-value,assignment,func-returns-value"
+# pylint: disable=R,redefined-outer-name,bare-except,unspecified-encoding
+import os
+import json
+import inspect
+import traceback
+import importlib
+import contextlib
+from pathlib import Path
+from numbers import Number
+from datetime import datetime
+from functools import wraps, partial
+from typing import Any, Callable, Dict, Iterator, List, Union
+
+
+# clean up key value pairs for sensitive values
+def sanitize(key: str, value: Any) -> Any:
+    if isinstance(value, str) and any([s in key.lower() for s in ["key", "token", "secret", "password", "credential"]]):
+        return len(str(value)) * "*"
+
+    if isinstance(value, dict):
+        return {k: sanitize(k, v) for k, v in value.items()}
+
+    return value
+
+
+class Tracer:
+    _tracers: Dict[str, Callable[[str], Iterator[Callable[[str, Any], None]]]] = {}
+
+    @classmethod
+    def add(cls, name: str, tracer: Callable[[str], Iterator[Callable[[str, Any], None]]]) -> None:
+        cls._tracers[name] = tracer
+
+    @classmethod
+    def clear(cls) -> None:
+        cls._tracers = {}
+
+    @classmethod
+    @contextlib.contextmanager
+    def start(cls, name: str) -> Iterator[Callable[[str, Any], None]]:
+        with contextlib.ExitStack() as stack:
+            traces: List[Any] = [stack.enter_context(tracer(name)) for tracer in cls._tracers.values()]  # type: ignore
+            yield lambda key, value: [  # type: ignore
+                # normalize and sanitize any trace values
+                trace(key, sanitize(key, to_dict(value)))
+                for trace in traces
+            ]
+
+
+def to_dict(obj: Any) -> Union[Dict[str, Any], List[Dict[str, Any]], str, Number, bool]:
+    # simple json types
+    if isinstance(obj, str) or isinstance(obj, Number) or isinstance(obj, bool):
+        return obj
+
+    # datetime
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+
+    # safe Prompty obj serialization
+    if type(obj).__name__ == "Prompty":
+        return obj.to_safe_dict()
+
+    # safe PromptyStream obj serialization
+    if type(obj).__name__ == "PromptyStream":
+        return "PromptyStream"
+
+    if type(obj).__name__ == "AsyncPromptyStream":
+        return "AsyncPromptyStream"
+
+    # recursive list and dict
+    if isinstance(obj, List):
+        return [to_dict(item) for item in obj]  # type: ignore
+
+    if isinstance(obj, Dict):
+        return {k: v if isinstance(v, str) else to_dict(v) for k, v in obj.items()}
+
+    if isinstance(obj, Path):
+        return str(obj)
+
+    # cast to string otherwise...
+    return str(obj)
+
+
+def _name(func: Callable, args):
+    if hasattr(func, "__qualname__"):
+        signature = f"{func.__module__}.{func.__qualname__}"
+    else:
+        signature = f"{func.__module__}.{func.__name__}"
+
+    # core invoker gets special treatment prompty.invoker.Invoker
+    core_invoker = signature.startswith("prompty.invoker.Invoker.run")
+    if core_invoker:
+        name = type(args[0]).__name__
+        if signature.endswith("async"):
+            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async"
+        else:
+            signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke"
+    else:
+        name = func.__name__
+
+    return name, signature
+
+
+def _inputs(func: Callable, args, kwargs) -> dict:
+    ba = inspect.signature(func).bind(*args, **kwargs)
+    ba.apply_defaults()
+
+    inputs = {k: to_dict(v) for k, v in ba.arguments.items() if k != "self"}
+
+    return inputs
+
+
+def _results(result: Any) -> Union[Dict, List[Dict], str, Number, bool]:
+    return to_dict(result) if result is not None else "None"
+
+
+def _trace_sync(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
+
+    @wraps(func)  # type: ignore
+    def wrapper(*args, **kwargs):
+        name, signature = _name(func, args)  # type: ignore
+        with Tracer.start(name) as trace:
+            trace("signature", signature)
+
+            # support arbitrary keyword
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
+
+            inputs = _inputs(func, args, kwargs)  # type: ignore
+            trace("inputs", inputs)
+
+            try:
+                result = func(*args, **kwargs)  # type: ignore
+                trace("result", _results(result))
+            except Exception as e:
+                trace(
+                    "result",
+                    {
+                        "exception": {
+                            "type": type(e),
+                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
+                            "message": str(e),
+                            "args": to_dict(e.args),
+                        }
+                    },
+                )
+                raise e
+
+            return result
+
+    return wrapper
+
+
+def _trace_async(func: Union[Callable, None] = None, **okwargs: Any) -> Callable:
+
+    @wraps(func)  # type: ignore
+    async def wrapper(*args, **kwargs):
+        name, signature = _name(func, args)  # type: ignore
+        with Tracer.start(name) as trace:
+            trace("signature", signature)
+
+            # support arbitrary keyword
+            # arguments for trace decorator
+            for k, v in okwargs.items():
+                trace(k, to_dict(v))
+
+            inputs = _inputs(func, args, kwargs)  # type: ignore
+            trace("inputs", inputs)
+            try:
+                result = await func(*args, **kwargs)  # type: ignore
+                trace("result", _results(result))
+            except Exception as e:
+                trace(
+                    "result",
+                    {
+                        "exception": {
+                            "type": type(e),
+                            "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None),
+                            "message": str(e),
+                            "args": to_dict(e.args),
+                        }
+                    },
+                )
+                raise e
+
+            return result
+
+    return wrapper
+
+
+def trace(func: Union[Callable, None] = None, **kwargs: Any) -> Callable:
+    if func is None:
+        return partial(trace, **kwargs)
+    wrapped_method = _trace_async if inspect.iscoroutinefunction(func) else _trace_sync
+    return wrapped_method(func, **kwargs)
+
+
+class PromptyTracer:
+    def __init__(self, output_dir: Union[str, None] = None) -> None:
+        if output_dir:
+            self.output = Path(output_dir).resolve().absolute()
+        else:
+            self.output = Path(Path(os.getcwd()) / ".runs").resolve().absolute()
+
+        if not self.output.exists():
+            self.output.mkdir(parents=True, exist_ok=True)
+
+        self.stack: List[Dict[str, Any]] = []
+
+    @contextlib.contextmanager
+    def tracer(self, name: str) -> Iterator[Callable[[str, Any], None]]:
+        try:
+            self.stack.append({"name": name})
+            frame = self.stack[-1]
+            frame["__time"] = {
+                "start": datetime.now(),
+            }
+
+            def add(key: str, value: Any) -> None:
+                if key not in frame:
+                    frame[key] = value
+                # multiple values creates list
+                else:
+                    if isinstance(frame[key], list):
+                        frame[key].append(value)
+                    else:
+                        frame[key] = [frame[key], value]
+
+            yield add
+        finally:
+            frame = self.stack.pop()
+            start: datetime = frame["__time"]["start"]
+            end: datetime = datetime.now()
+
+            # add duration to frame
+            frame["__time"] = {
+                "start": start.strftime("%Y-%m-%dT%H:%M:%S.%f"),
+                "end": end.strftime("%Y-%m-%dT%H:%M:%S.%f"),
+                "duration": int((end - start).total_seconds() * 1000),
+            }
+
+            # hoist usage to parent frame
+            if "result" in frame and isinstance(frame["result"], dict):
+                if "usage" in frame["result"]:
+                    frame["__usage"] = self.hoist_item(
+                        frame["result"]["usage"],
+                        frame["__usage"] if "__usage" in frame else {},
+                    )
+
+            # streamed results may have usage as well
+            if "result" in frame and isinstance(frame["result"], list):
+                for result in frame["result"]:
+                    if isinstance(result, dict) and "usage" in result and isinstance(result["usage"], dict):
+                        frame["__usage"] = self.hoist_item(
+                            result["usage"],
+                            frame["__usage"] if "__usage" in frame else {},
+                        )
+
+            # add any usage frames from below
+            if "__frames" in frame:
+                for child in frame["__frames"]:
+                    if "__usage" in child:
+                        frame["__usage"] = self.hoist_item(
+                            child["__usage"],
+                            frame["__usage"] if "__usage" in frame else {},
+                        )
+
+            # if stack is empty, dump the frame
+            if len(self.stack) == 0:
+                self.write_trace(frame)
+            # otherwise, append the frame to the parent
+            else:
+                if "__frames" not in self.stack[-1]:
+                    self.stack[-1]["__frames"] = []
+                self.stack[-1]["__frames"].append(frame)
+
+    def hoist_item(self, src: Dict[str, Any], cur: Dict[str, Any]) -> Dict[str, Any]:
+        for key, value in src.items():
+            if value is None or isinstance(value, list) or isinstance(value, dict):
+                continue
+            try:
+                if key not in cur:
+                    cur[key] = value
+                else:
+                    cur[key] += value
+            except:
+                continue
+
+        return cur
+
+    def write_trace(self, frame: Dict[str, Any]) -> None:
+        trace_file = self.output / f"{frame['name']}.{datetime.now().strftime('%Y%m%d.%H%M%S')}.tracy"
+
+        v = importlib.metadata.version("prompty")  # type: ignore
+        enriched_frame = {
+            "runtime": "python",
+            "version": v,
+            "trace": frame,
+        }
+
+        with open(trace_file, "w") as f:
+            json.dump(enriched_frame, f, indent=4)
+
+
+@contextlib.contextmanager
+def console_tracer(name: str) -> Iterator[Callable[[str, Any], None]]:
+    try:
+        print(f"Starting {name}")
+        yield lambda key, value: print(f"{key}:\n{json.dumps(to_dict(value), indent=4)}")
+    finally:
+        print(f"Ending {name}")
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_utils.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_utils.py
new file mode 100644
index 00000000..22f28418
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/prompts/_utils.py
@@ -0,0 +1,100 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+# mypy: disable-error-code="import-untyped,return-value"
+# pylint: disable=line-too-long,R,wrong-import-order,global-variable-not-assigned)
+import json
+import os
+import re
+import sys
+from typing import Any, Dict
+from pathlib import Path
+
+
+_yaml_regex = re.compile(
+    r"^\s*" + r"(?:---|\+\+\+)" + r"(.*?)" + r"(?:---|\+\+\+)" + r"\s*(.+)$",
+    re.S | re.M,
+)
+
+
+def load_text(file_path, encoding="utf-8"):
+    with open(file_path, "r", encoding=encoding) as file:
+        return file.read()
+
+
+def load_json(file_path, encoding="utf-8"):
+    return json.loads(load_text(file_path, encoding=encoding))
+
+
+def load_global_config(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]:
+    prompty_config_path = prompty_path.joinpath("prompty.json")
+    if os.path.exists(prompty_config_path):
+        c = load_json(prompty_config_path)
+        if configuration in c:
+            return c[configuration]
+        else:
+            raise ValueError(f'Item "{configuration}" not found in "{prompty_config_path}"')
+    else:
+        return {}
+
+
+def load_prompty(file_path, encoding="utf-8") -> Dict[str, Any]:
+    contents = load_text(file_path, encoding=encoding)
+    return parse(contents)
+
+
+def parse(contents):
+    try:
+        import yaml  # type: ignore
+    except ImportError as exc:
+        raise ImportError("Please install pyyaml to use this function. Run `pip install pyyaml`.") from exc
+
+    global _yaml_regex
+
+    fmatter = ""
+    body = ""
+    result = _yaml_regex.search(contents)
+
+    if result:
+        fmatter = result.group(1)
+        body = result.group(2)
+    return {
+        "attributes": yaml.load(fmatter, Loader=yaml.SafeLoader),
+        "body": body,
+        "frontmatter": fmatter,
+    }
+
+
+def remove_leading_empty_space(multiline_str: str) -> str:
+    """
+    Processes a multiline string by:
+    1. Removing empty lines
+    2. Finding the minimum leading spaces
+    3. Indenting all lines to the minimum level
+
+    :param multiline_str: The input multiline string.
+    :type multiline_str: str
+    :return: The processed multiline string.
+    :rtype: str
+    """
+    lines = multiline_str.splitlines()
+    start_index = 0
+    while start_index < len(lines) and lines[start_index].strip() == "":
+        start_index += 1
+
+    # Find the minimum number of leading spaces
+    min_spaces = sys.maxsize
+    for line in lines[start_index:]:
+        if len(line.strip()) == 0:
+            continue
+        spaces = len(line) - len(line.lstrip())
+        spaces += line.lstrip().count("\t") * 2  # Count tabs as 2 spaces
+        min_spaces = min(min_spaces, spaces)
+
+    # Remove leading spaces and indent to the minimum level
+    processed_lines = []
+    for line in lines[start_index:]:
+        processed_lines.append(line[min_spaces:])
+
+    return "\n".join(processed_lines)
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/py.typed b/.venv/lib/python3.12/site-packages/azure/ai/inference/py.typed
new file mode 100644
index 00000000..e5aff4f8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561.
\ No newline at end of file
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/inference/tracing.py b/.venv/lib/python3.12/site-packages/azure/ai/inference/tracing.py
new file mode 100644
index 00000000..f7937a99
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/inference/tracing.py
@@ -0,0 +1,850 @@
+# ------------------------------------
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+# ------------------------------------
+import copy
+from enum import Enum
+import functools
+import json
+import importlib
+import logging
+import os
+from time import time_ns
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
+from urllib.parse import urlparse
+
+# pylint: disable = no-name-in-module
+from azure.core import CaseInsensitiveEnumMeta  # type: ignore
+from azure.core.settings import settings
+from . import models as _models
+
+try:
+    # pylint: disable = no-name-in-module
+    from azure.core.tracing import AbstractSpan, SpanKind  # type: ignore
+    from opentelemetry.trace import StatusCode, Span
+
+    _tracing_library_available = True
+except ModuleNotFoundError:
+
+    _tracing_library_available = False
+
+
+__all__ = [
+    "AIInferenceInstrumentor",
+]
+
+
+_inference_traces_enabled: bool = False
+_trace_inference_content: bool = False
+_INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference"
+
+
+class TraceType(str, Enum, metaclass=CaseInsensitiveEnumMeta):  # pylint: disable=C4747
+    """An enumeration class to represent different types of traces."""
+
+    INFERENCE = "Inference"
+
+
+class AIInferenceInstrumentor:
+    """
+    A class for managing the trace instrumentation of AI Inference.
+
+    This class allows enabling or disabling tracing for AI Inference.
+    and provides functionality to check whether instrumentation is active.
+
+    """
+
+    def __init__(self):
+        if not _tracing_library_available:
+            raise ModuleNotFoundError(
+                "Azure Core Tracing Opentelemetry is not installed. "
+                "Please install it using 'pip install azure-core-tracing-opentelemetry'"
+            )
+        # In the future we could support different versions from the same library
+        # and have a parameter that specifies the version to use.
+        self._impl = _AIInferenceInstrumentorPreview()
+
+    def instrument(self, enable_content_recording: Optional[bool] = None) -> None:
+        """
+        Enable trace instrumentation for AI Inference.
+
+        :param enable_content_recording: Whether content recording is enabled as part
+            of the traces or not. Content in this context refers to chat message content
+            and function call tool related function names, function parameter names and
+            values. True will enable content recording, False will disable it. If no value
+            s provided, then the value read from environment variable
+            AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED is used. If the environment variable
+            is not found, then the value will default to False. Please note that successive calls
+            to instrument will always apply the content recording value provided with the most
+            recent call to instrument (including applying the environment variable if no value is
+            provided and defaulting to false if the environment variable is not found), even if
+            instrument was already previously called without uninstrument being called in between
+            the instrument calls.
+
+        :type enable_content_recording: bool, optional
+        """
+        self._impl.instrument(enable_content_recording=enable_content_recording)
+
+    def uninstrument(self) -> None:
+        """
+        Disable trace instrumentation for AI Inference.
+
+        Raises:
+            RuntimeError: If instrumentation is not currently enabled.
+
+        This method removes any active instrumentation, stopping the tracing
+        of AI Inference.
+        """
+        self._impl.uninstrument()
+
+    def is_instrumented(self) -> bool:
+        """
+        Check if trace instrumentation for AI Inference is currently enabled.
+
+        :return: True if instrumentation is active, False otherwise.
+        :rtype: bool
+        """
+        return self._impl.is_instrumented()
+
+    def is_content_recording_enabled(self) -> bool:
+        """
+        This function gets the content recording value.
+
+        :return: A bool value indicating whether content recording is enabled.
+        :rtype: bool
+        """
+        return self._impl.is_content_recording_enabled()
+
+
+class _AIInferenceInstrumentorPreview:
+    """
+    A class for managing the trace instrumentation of AI Inference.
+
+    This class allows enabling or disabling tracing for AI Inference.
+    and provides functionality to check whether instrumentation is active.
+    """
+
+    def _str_to_bool(self, s):
+        if s is None:
+            return False
+        return str(s).lower() == "true"
+
+    def instrument(self, enable_content_recording: Optional[bool] = None):
+        """
+        Enable trace instrumentation for AI Inference.
+
+        :param enable_content_recording: Whether content recording is enabled as part
+        of the traces or not. Content in this context refers to chat message content
+        and function call tool related function names, function parameter names and
+        values. True will enable content recording, False will disable it. If no value
+        is provided, then the value read from environment variable
+        AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED is used. If the environment variable
+        is not found, then the value will default to False.
+
+        :type enable_content_recording: bool, optional
+        """
+        if enable_content_recording is None:
+            var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED")
+            enable_content_recording = self._str_to_bool(var_value)
+        if not self.is_instrumented():
+            self._instrument_inference(enable_content_recording)
+        else:
+            self._set_content_recording_enabled(enable_content_recording=enable_content_recording)
+
+    def uninstrument(self):
+        """
+        Disable trace instrumentation for AI Inference.
+
+        This method removes any active instrumentation, stopping the tracing
+        of AI Inference.
+        """
+        if self.is_instrumented():
+            self._uninstrument_inference()
+
+    def is_instrumented(self):
+        """
+        Check if trace instrumentation for AI Inference is currently enabled.
+
+        :return: True if instrumentation is active, False otherwise.
+        :rtype: bool
+        """
+        return self._is_instrumented()
+
+    def set_content_recording_enabled(self, enable_content_recording: bool = False) -> None:
+        """This function sets the content recording value.
+
+        :param enable_content_recording: Indicates whether tracing of message content should be enabled.
+                                    This also controls whether function call tool function names,
+                                    parameter names and parameter values are traced.
+        :type enable_content_recording: bool
+        """
+        self._set_content_recording_enabled(enable_content_recording=enable_content_recording)
+
+    def is_content_recording_enabled(self) -> bool:
+        """This function gets the content recording value.
+
+        :return: A bool value indicating whether content tracing is enabled.
+        :rtype bool
+        """
+        return self._is_content_recording_enabled()
+
+    def _set_attributes(self, span: "AbstractSpan", *attrs: Tuple[str, Any]) -> None:
+        for attr in attrs:
+            key, value = attr
+            if value is not None:
+                span.add_attribute(key, value)
+
+    def _add_request_chat_message_events(self, span: "AbstractSpan", **kwargs: Any) -> int:
+        timestamp = 0
+        for message in kwargs.get("messages", []):
+            try:
+                message = message.as_dict()
+            except AttributeError:
+                pass
+
+            if message.get("role"):
+                timestamp = self._record_event(
+                    span,
+                    f"gen_ai.{message.get('role')}.message",
+                    {
+                        "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
+                        "gen_ai.event.content": json.dumps(message),
+                    },
+                    timestamp,
+                )
+
+        return timestamp
+
+    def _parse_url(self, url):
+        parsed = urlparse(url)
+        server_address = parsed.hostname
+        port = parsed.port
+        return server_address, port
+
+    def _add_request_chat_attributes(self, span: "AbstractSpan", *args: Any, **kwargs: Any) -> None:
+        client = args[0]
+        endpoint = client._config.endpoint  # pylint: disable=protected-access
+        server_address, port = self._parse_url(endpoint)
+        model = "chat"
+        if kwargs.get("model") is not None:
+            model_value = kwargs.get("model")
+            if model_value is not None:
+                model = model_value
+
+        self._set_attributes(
+            span,
+            ("gen_ai.operation.name", "chat"),
+            ("gen_ai.system", _INFERENCE_GEN_AI_SYSTEM_NAME),
+            ("gen_ai.request.model", model),
+            ("gen_ai.request.max_tokens", kwargs.get("max_tokens")),
+            ("gen_ai.request.temperature", kwargs.get("temperature")),
+            ("gen_ai.request.top_p", kwargs.get("top_p")),
+            ("server.address", server_address),
+        )
+        if port is not None and port != 443:
+            span.add_attribute("server.port", port)
+
+    def _remove_function_call_names_and_arguments(self, tool_calls: list) -> list:
+        tool_calls_copy = copy.deepcopy(tool_calls)
+        for tool_call in tool_calls_copy:
+            if "function" in tool_call:
+                if "name" in tool_call["function"]:
+                    del tool_call["function"]["name"]
+                if "arguments" in tool_call["function"]:
+                    del tool_call["function"]["arguments"]
+                if not tool_call["function"]:
+                    del tool_call["function"]
+        return tool_calls_copy
+
+    def _get_finish_reasons(self, result) -> Optional[List[str]]:
+        if hasattr(result, "choices") and result.choices:
+            finish_reasons: List[str] = []
+            for choice in result.choices:
+                finish_reason = getattr(choice, "finish_reason", None)
+
+                if finish_reason is None:
+                    # If finish_reason is None, default to "none"
+                    finish_reasons.append("none")
+                elif hasattr(finish_reason, "value"):
+                    # If finish_reason has a 'value' attribute (i.e., it's an enum), use it
+                    finish_reasons.append(finish_reason.value)
+                elif isinstance(finish_reason, str):
+                    # If finish_reason is a string, use it directly
+                    finish_reasons.append(finish_reason)
+                else:
+                    # Default to "none"
+                    finish_reasons.append("none")
+
+            return finish_reasons
+        return None
+
+    def _get_finish_reason_for_choice(self, choice):
+        finish_reason = getattr(choice, "finish_reason", None)
+        if finish_reason is not None:
+            return finish_reason.value
+
+        return "none"
+
+    def _add_response_chat_message_events(
+        self, span: "AbstractSpan", result: _models.ChatCompletions, last_event_timestamp_ns: int
+    ) -> None:
+        for choice in result.choices:
+            attributes = {}
+            if _trace_inference_content:
+                full_response: Dict[str, Any] = {
+                    "message": {"content": choice.message.content},
+                    "finish_reason": self._get_finish_reason_for_choice(choice),
+                    "index": choice.index,
+                }
+                if choice.message.tool_calls:
+                    full_response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls]
+                attributes = {
+                    "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.event.content": json.dumps(full_response),
+                }
+            else:
+                response: Dict[str, Any] = {
+                    "finish_reason": self._get_finish_reason_for_choice(choice),
+                    "index": choice.index,
+                }
+                if choice.message.tool_calls:
+                    response["message"] = {}
+                    tool_calls_function_names_and_arguments_removed = self._remove_function_call_names_and_arguments(
+                        choice.message.tool_calls
+                    )
+                    response["message"]["tool_calls"] = [
+                        tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed
+                    ]
+
+                attributes = {
+                    "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.event.content": json.dumps(response),
+                }
+            last_event_timestamp_ns = self._record_event(span, "gen_ai.choice", attributes, last_event_timestamp_ns)
+
+    def _add_response_chat_attributes(
+        self,
+        span: "AbstractSpan",
+        result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate],
+    ) -> None:
+        self._set_attributes(
+            span,
+            ("gen_ai.response.id", result.id),
+            ("gen_ai.response.model", result.model),
+            (
+                "gen_ai.usage.input_tokens",
+                (result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None),
+            ),
+            (
+                "gen_ai.usage.output_tokens",
+                (result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None),
+            ),
+        )
+        finish_reasons = self._get_finish_reasons(result)
+        if not finish_reasons is None:
+            span.add_attribute("gen_ai.response.finish_reasons", finish_reasons)  # type: ignore
+
+    def _add_request_details(self, span: "AbstractSpan", args: Any, kwargs: Any) -> int:
+        self._add_request_chat_attributes(span, *args, **kwargs)
+        if _trace_inference_content:
+            return self._add_request_chat_message_events(span, **kwargs)
+        return 0
+
+    def _add_response_details(self, span: "AbstractSpan", result: object, last_event_timestamp_ns: int) -> None:
+        if isinstance(result, _models.ChatCompletions):
+            self._add_response_chat_attributes(span, result)
+            self._add_response_chat_message_events(span, result, last_event_timestamp_ns)
+        # TODO add more models here
+
+    def _accumulate_response(self, item, accumulate: Dict[str, Any]) -> None:
+        if item.finish_reason:
+            accumulate["finish_reason"] = item.finish_reason
+        if item.index:
+            accumulate["index"] = item.index
+        if item.delta.content:
+            accumulate.setdefault("message", {})
+            accumulate["message"].setdefault("content", "")
+            accumulate["message"]["content"] += item.delta.content
+        if item.delta.tool_calls:
+            accumulate.setdefault("message", {})
+            accumulate["message"].setdefault("tool_calls", [])
+            if item.delta.tool_calls is not None:
+                for tool_call in item.delta.tool_calls:
+                    if tool_call.id:
+                        accumulate["message"]["tool_calls"].append(
+                            {
+                                "id": tool_call.id,
+                                "type": "",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        )
+                    if tool_call.function:
+                        accumulate["message"]["tool_calls"][-1]["type"] = "function"
+                    if tool_call.function and tool_call.function.name:
+                        accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name
+                    if tool_call.function and tool_call.function.arguments:
+                        accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
+
+    def _accumulate_async_streaming_response(self, item, accumulate: Dict[str, Any]) -> None:
+        if not "choices" in item:
+            return
+        if "finish_reason" in item["choices"][0] and item["choices"][0]["finish_reason"]:
+            accumulate["finish_reason"] = item["choices"][0]["finish_reason"]
+        if "index" in item["choices"][0] and item["choices"][0]["index"]:
+            accumulate["index"] = item["choices"][0]["index"]
+        if not "delta" in item["choices"][0]:
+            return
+        if "content" in item["choices"][0]["delta"] and item["choices"][0]["delta"]["content"]:
+            accumulate.setdefault("message", {})
+            accumulate["message"].setdefault("content", "")
+            accumulate["message"]["content"] += item["choices"][0]["delta"]["content"]
+        if "tool_calls" in item["choices"][0]["delta"] and item["choices"][0]["delta"]["tool_calls"]:
+            accumulate.setdefault("message", {})
+            accumulate["message"].setdefault("tool_calls", [])
+            if item["choices"][0]["delta"]["tool_calls"] is not None:
+                for tool_call in item["choices"][0]["delta"]["tool_calls"]:
+                    if tool_call.id:
+                        accumulate["message"]["tool_calls"].append(
+                            {
+                                "id": tool_call.id,
+                                "type": "",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        )
+                    if tool_call.function:
+                        accumulate["message"]["tool_calls"][-1]["type"] = "function"
+                    if tool_call.function and tool_call.function.name:
+                        accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name
+                    if tool_call.function and tool_call.function.arguments:
+                        accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments
+
+    def _wrapped_stream(
+        self, stream_obj: _models.StreamingChatCompletions, span: "AbstractSpan", previous_event_timestamp: int
+    ) -> _models.StreamingChatCompletions:
+        class StreamWrapper(_models.StreamingChatCompletions):
+            def __init__(self, stream_obj, instrumentor):
+                super().__init__(stream_obj._response)
+                self._instrumentor = instrumentor
+
+            def __iter__(  # pyright: ignore [reportIncompatibleMethodOverride]
+                self,
+            ) -> Iterator[_models.StreamingChatCompletionsUpdate]:
+                accumulate: Dict[str, Any] = {}
+                try:
+                    chunk = None
+                    for chunk in stream_obj:
+                        for item in chunk.choices:
+                            self._instrumentor._accumulate_response(item, accumulate)
+                        yield chunk
+
+                    if chunk is not None:
+                        self._instrumentor._add_response_chat_attributes(span, chunk)
+
+                except Exception as exc:
+                    # Set the span status to error
+                    if isinstance(span.span_instance, Span):  # pyright: ignore [reportPossiblyUnboundVariable]
+                        span.span_instance.set_status(
+                            StatusCode.ERROR,  # pyright: ignore [reportPossiblyUnboundVariable]
+                            description=str(exc),
+                        )
+                    module = exc.__module__ if hasattr(exc, "__module__") and exc.__module__ != "builtins" else ""
+                    error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
+                    self._instrumentor._set_attributes(span, ("error.type", error_type))
+                    raise
+
+                finally:
+                    if stream_obj._done is False:
+                        if accumulate.get("finish_reason") is None:
+                            accumulate["finish_reason"] = "error"
+                    else:
+                        # Only one choice expected with streaming
+                        accumulate["index"] = 0
+                        # Delete message if content tracing is not enabled
+                        if not _trace_inference_content:
+                            if "message" in accumulate:
+                                if "content" in accumulate["message"]:
+                                    del accumulate["message"]["content"]
+                                if not accumulate["message"]:
+                                    del accumulate["message"]
+                            if "message" in accumulate:
+                                if "tool_calls" in accumulate["message"]:
+                                    tool_calls_function_names_and_arguments_removed = (
+                                        self._instrumentor._remove_function_call_names_and_arguments(
+                                            accumulate["message"]["tool_calls"]
+                                        )
+                                    )
+                                    accumulate["message"]["tool_calls"] = list(
+                                        tool_calls_function_names_and_arguments_removed
+                                    )
+                    attributes = {
+                        "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
+                        "gen_ai.event.content": json.dumps(accumulate),
+                    }
+                    self._instrumentor._record_event(span, "gen_ai.choice", attributes, previous_event_timestamp)
+                    span.finish()
+
+        return StreamWrapper(stream_obj, self)
+
+    def _async_wrapped_stream(
+        self, stream_obj: _models.AsyncStreamingChatCompletions, span: "AbstractSpan", last_event_timestamp_ns: int
+    ) -> _models.AsyncStreamingChatCompletions:
+        class AsyncStreamWrapper(_models.AsyncStreamingChatCompletions):
+            def __init__(self, stream_obj, instrumentor, span, last_event_timestamp_ns):
+                super().__init__(stream_obj._response)
+                self._instrumentor = instrumentor
+                self._accumulate: Dict[str, Any] = {}
+                self._stream_obj = stream_obj
+                self.span = span
+                self._last_result = None
+                self._last_event_timestamp_ns = last_event_timestamp_ns
+
+            async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate":
+                try:
+                    result = await super().__anext__()
+                    self._instrumentor._accumulate_async_streaming_response(  # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess]
+                        result, self._accumulate
+                    )
+                    self._last_result = result
+                except StopAsyncIteration as exc:
+                    self._trace_stream_content()
+                    raise exc
+                return result
+
+            def _trace_stream_content(self) -> None:
+                if self._last_result:
+                    self._instrumentor._add_response_chat_attributes(  # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess]
+                        span, self._last_result
+                    )
+                # Only one choice expected with streaming
+                self._accumulate["index"] = 0
+                # Delete message if content tracing is not enabled
+                if not _trace_inference_content:
+                    if "message" in self._accumulate:
+                        if "content" in self._accumulate["message"]:
+                            del self._accumulate["message"]["content"]
+                            if not self._accumulate["message"]:
+                                del self._accumulate["message"]
+                        if "message" in self._accumulate:
+                            if "tool_calls" in self._accumulate["message"]:
+                                tools_no_recording = self._instrumentor._remove_function_call_names_and_arguments(  # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess]
+                                    self._accumulate["message"]["tool_calls"]
+                                )
+                                self._accumulate["message"]["tool_calls"] = list(tools_no_recording)
+                attributes = {
+                    "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME,
+                    "gen_ai.event.content": json.dumps(self._accumulate),
+                }
+                self._last_event_timestamp_ns = self._instrumentor._record_event(  # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess]
+                    span, "gen_ai.choice", attributes, self._last_event_timestamp_ns
+                )
+                span.finish()
+
+        async_stream_wrapper = AsyncStreamWrapper(stream_obj, self, span, last_event_timestamp_ns)
+        return async_stream_wrapper
+
+    def _record_event(
+        self, span: "AbstractSpan", name: str, attributes: Dict[str, Any], last_event_timestamp_ns: int
+    ) -> int:
+        timestamp = time_ns()
+
+        # we're recording multiple events, some of them are emitted within (hundreds of) nanoseconds of each other.
+        # time.time_ns resolution is not high enough on windows to guarantee unique timestamps for each message.
+        # Also Azure Monitor truncates resolution to microseconds and some other backends truncate to milliseconds.
+        #
+        # But we need to give users a way to restore event order, so we're incrementing the timestamp
+        # by 1 microsecond for each message.
+        #
+        # This is a workaround, we'll find a generic and better solution - see
+        # https://github.com/open-telemetry/semantic-conventions/issues/1701
+        if last_event_timestamp_ns > 0 and timestamp <= (last_event_timestamp_ns + 1000):
+            timestamp = last_event_timestamp_ns + 1000
+
+        span.span_instance.add_event(name=name, attributes=attributes, timestamp=timestamp)
+
+        return timestamp
+
+    def _trace_sync_function(
+        self,
+        function: Callable,
+        *,
+        _args_to_ignore: Optional[List[str]] = None,
+        _trace_type=TraceType.INFERENCE,
+        _name: Optional[str] = None,
+    ) -> Callable:
+        """
+        Decorator that adds tracing to a synchronous function.
+
+        :param function: The function to be traced.
+        :type function: Callable
+        :param args_to_ignore: A list of argument names to be ignored in the trace.
+                            Defaults to None.
+        :type: args_to_ignore: [List[str]], optional
+        :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE.
+        :type trace_type: TraceType, optional
+        :param name: The name of the trace, will set to func name if not provided.
+        :type name: str, optional
+        :return: The traced function.
+        :rtype: Callable
+        """
+
+        @functools.wraps(function)
+        def inner(*args, **kwargs):
+
+            span_impl_type = settings.tracing_implementation()
+            if span_impl_type is None:
+                return function(*args, **kwargs)
+
+            class_function_name = function.__qualname__
+
+            if class_function_name.startswith("ChatCompletionsClient.complete"):
+                if kwargs.get("model") is None:
+                    span_name = "chat"
+                else:
+                    model = kwargs.get("model")
+                    span_name = f"chat {model}"
+
+                span = span_impl_type(
+                    name=span_name,
+                    kind=SpanKind.CLIENT,  # pyright: ignore [reportPossiblyUnboundVariable]
+                )
+
+                try:
+                    # tracing events not supported in azure-core-tracing-opentelemetry
+                    # so need to access the span instance directly
+                    with span_impl_type.change_context(span.span_instance):
+                        last_event_timestamp_ns = self._add_request_details(span, args, kwargs)
+                        result = function(*args, **kwargs)
+                        if kwargs.get("stream") is True:
+                            return self._wrapped_stream(result, span, last_event_timestamp_ns)
+                        self._add_response_details(span, result, last_event_timestamp_ns)
+                except Exception as exc:
+                    # Set the span status to error
+                    if isinstance(span.span_instance, Span):  # pyright: ignore [reportPossiblyUnboundVariable]
+                        span.span_instance.set_status(
+                            StatusCode.ERROR,  # pyright: ignore [reportPossiblyUnboundVariable]
+                            description=str(exc),
+                        )
+                    module = getattr(exc, "__module__", "")
+                    module = module if module != "builtins" else ""
+                    error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
+                    self._set_attributes(span, ("error.type", error_type))
+                    span.finish()
+                    raise
+
+                span.finish()
+                return result
+
+            # Handle the default case (if the function name does not match)
+            return None  # Ensure all paths return
+
+        return inner
+
+    def _trace_async_function(
+        self,
+        function: Callable,
+        *,
+        _args_to_ignore: Optional[List[str]] = None,
+        _trace_type=TraceType.INFERENCE,
+        _name: Optional[str] = None,
+    ) -> Callable:
+        """
+        Decorator that adds tracing to an asynchronous function.
+
+        :param function: The function to be traced.
+        :type function: Callable
+        :param args_to_ignore: A list of argument names to be ignored in the trace.
+                            Defaults to None.
+        :type: args_to_ignore: [List[str]], optional
+        :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE.
+        :type trace_type: TraceType, optional
+        :param name: The name of the trace, will set to func name if not provided.
+        :type name: str, optional
+        :return: The traced function.
+        :rtype: Callable
+        """
+
+        @functools.wraps(function)
+        async def inner(*args, **kwargs):
+            span_impl_type = settings.tracing_implementation()
+            if span_impl_type is None:
+                return await function(*args, **kwargs)
+
+            class_function_name = function.__qualname__
+
+            if class_function_name.startswith("ChatCompletionsClient.complete"):
+                if kwargs.get("model") is None:
+                    span_name = "chat"
+                else:
+                    model = kwargs.get("model")
+                    span_name = f"chat {model}"
+
+                span = span_impl_type(
+                    name=span_name,
+                    kind=SpanKind.CLIENT,  # pyright: ignore [reportPossiblyUnboundVariable]
+                )
+                try:
+                    # tracing events not supported in azure-core-tracing-opentelemetry
+                    # so need to access the span instance directly
+                    with span_impl_type.change_context(span.span_instance):
+                        last_event_timestamp_ns = self._add_request_details(span, args, kwargs)
+                        result = await function(*args, **kwargs)
+                        if kwargs.get("stream") is True:
+                            return self._async_wrapped_stream(result, span, last_event_timestamp_ns)
+                        self._add_response_details(span, result, last_event_timestamp_ns)
+
+                except Exception as exc:
+                    # Set the span status to error
+                    if isinstance(span.span_instance, Span):  # pyright: ignore [reportPossiblyUnboundVariable]
+                        span.span_instance.set_status(
+                            StatusCode.ERROR,  # pyright: ignore [reportPossiblyUnboundVariable]
+                            description=str(exc),
+                        )
+                    module = getattr(exc, "__module__", "")
+                    module = module if module != "builtins" else ""
+                    error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__
+                    self._set_attributes(span, ("error.type", error_type))
+                    span.finish()
+                    raise
+
+                span.finish()
+                return result
+
+            # Handle the default case (if the function name does not match)
+            return None  # Ensure all paths return
+
+        return inner
+
+    def _inject_async(self, f, _trace_type, _name):
+        wrapper_fun = self._trace_async_function(f)
+        wrapper_fun._original = f  # pylint: disable=protected-access # pyright: ignore [reportFunctionMemberAccess]
+        return wrapper_fun
+
+    def _inject_sync(self, f, _trace_type, _name):
+        wrapper_fun = self._trace_sync_function(f)
+        wrapper_fun._original = f  # pylint: disable=protected-access # pyright: ignore [reportFunctionMemberAccess]
+        return wrapper_fun
+
+    def _inference_apis(self):
+        sync_apis = (
+            (
+                "azure.ai.inference",
+                "ChatCompletionsClient",
+                "complete",
+                TraceType.INFERENCE,
+                "inference_chat_completions_complete",
+            ),
+        )
+        async_apis = (
+            (
+                "azure.ai.inference.aio",
+                "ChatCompletionsClient",
+                "complete",
+                TraceType.INFERENCE,
+                "inference_chat_completions_complete",
+            ),
+        )
+        return sync_apis, async_apis
+
+    def _inference_api_list(self):
+        sync_apis, async_apis = self._inference_apis()
+        yield sync_apis, self._inject_sync
+        yield async_apis, self._inject_async
+
+    def _generate_api_and_injector(self, apis):
+        for api, injector in apis:
+            for module_name, class_name, method_name, trace_type, name in api:
+                try:
+                    module = importlib.import_module(module_name)
+                    api = getattr(module, class_name)
+                    if hasattr(api, method_name):
+                        yield api, method_name, trace_type, injector, name
+                except AttributeError as e:
+                    # Log the attribute exception with the missing class information
+                    logging.warning(
+                        "AttributeError: The module '%s' does not have the class '%s'. %s",
+                        module_name,
+                        class_name,
+                        str(e),
+                    )
+                except Exception as e:  # pylint: disable=broad-except
+                    # Log other exceptions as a warning, as we're not sure what they might be
+                    logging.warning("An unexpected error occurred: '%s'", str(e))
+
+    def _available_inference_apis_and_injectors(self):
+        """
+        Generates a sequence of tuples containing Inference API classes, method names, and
+        corresponding injector functions.
+
+        :return: A generator yielding tuples.
+        :rtype: tuple
+        """
+        yield from self._generate_api_and_injector(self._inference_api_list())
+
+    def _instrument_inference(self, enable_content_tracing: bool = False):
+        """This function modifies the methods of the Inference API classes to
+        inject logic before calling the original methods.
+        The original methods are stored as _original attributes of the methods.
+
+        :param enable_content_tracing: Indicates whether tracing of message content should be enabled.
+                                    This also controls whether function call tool function names,
+                                    parameter names and parameter values are traced.
+        :type enable_content_tracing: bool
+        """
+        # pylint: disable=W0603
+        global _inference_traces_enabled
+        global _trace_inference_content
+        if _inference_traces_enabled:
+            raise RuntimeError("Traces already started for azure.ai.inference")
+        _inference_traces_enabled = True
+        _trace_inference_content = enable_content_tracing
+        for (
+            api,
+            method,
+            trace_type,
+            injector,
+            name,
+        ) in self._available_inference_apis_and_injectors():
+            # Check if the method of the api class has already been modified
+            if not hasattr(getattr(api, method), "_original"):
+                setattr(api, method, injector(getattr(api, method), trace_type, name))
+
+    def _uninstrument_inference(self):
+        """This function restores the original methods of the Inference API classes
+        by assigning them back from the _original attributes of the modified methods.
+        """
+        # pylint: disable=W0603
+        global _inference_traces_enabled
+        global _trace_inference_content
+        _trace_inference_content = False
+        for api, method, _, _, _ in self._available_inference_apis_and_injectors():
+            if hasattr(getattr(api, method), "_original"):
+                setattr(api, method, getattr(getattr(api, method), "_original"))
+        _inference_traces_enabled = False
+
+    def _is_instrumented(self):
+        """This function returns True if Inference libary has already been instrumented
+        for tracing and False if it has not been instrumented.
+
+        :return: A value indicating whether the Inference library is currently instrumented or not.
+        :rtype: bool
+        """
+        return _inference_traces_enabled
+
+    def _set_content_recording_enabled(self, enable_content_recording: bool = False) -> None:
+        """This function sets the content recording value.
+
+        :param enable_content_recording: Indicates whether tracing of message content should be enabled.
+                                    This also controls whether function call tool function names,
+                                    parameter names and parameter values are traced.
+        :type enable_content_recording: bool
+        """
+        global _trace_inference_content  # pylint: disable=W0603
+        _trace_inference_content = enable_content_recording
+
+    def _is_content_recording_enabled(self) -> bool:
+        """This function gets the content recording value.
+
+        :return: A bool value indicating whether content tracing is enabled.
+        :rtype bool
+        """
+        return _trace_inference_content