37 files changed, 7061 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/adapter.py b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py
new file mode 100644
index 00000000..2995cfbc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from typing_extensions import Dict, Required, TypedDict, override
+
+from litellm.integrations.custom_logger import CustomLogger
+
+
+class AdapterItem(TypedDict):
+    id: str
+    adapter: CustomLogger
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/caching.py b/.venv/lib/python3.12/site-packages/litellm/types/caching.py
new file mode 100644
index 00000000..c15d4d15
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/caching.py
@@ -0,0 +1,78 @@
+from enum import Enum
+from typing import Any, Dict, Literal, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+
+
+class LiteLLMCacheType(str, Enum):
+    LOCAL = "local"
+    REDIS = "redis"
+    REDIS_SEMANTIC = "redis-semantic"
+    S3 = "s3"
+    DISK = "disk"
+    QDRANT_SEMANTIC = "qdrant-semantic"
+
+
+CachingSupportedCallTypes = Literal[
+    "completion",
+    "acompletion",
+    "embedding",
+    "aembedding",
+    "atranscription",
+    "transcription",
+    "atext_completion",
+    "text_completion",
+    "arerank",
+    "rerank",
+]
+
+
+class RedisPipelineIncrementOperation(TypedDict):
+    """
+    TypeDict for 1 Redis Pipeline Increment Operation
+    """
+
+    key: str
+    increment_value: float
+    ttl: Optional[int]
+
+
+DynamicCacheControl = TypedDict(
+    "DynamicCacheControl",
+    {
+        # Will cache the response for the user-defined amount of time (in seconds).
+        "ttl": Optional[int],
+        # Namespace to use for caching
+        "namespace": Optional[str],
+        # Max Age to use for caching
+        "s-maxage": Optional[int],
+        "s-max-age": Optional[int],
+        # Will not return a cached response, but instead call the actual endpoint.
+        "no-cache": Optional[bool],
+        # Will not store the response in the cache.
+        "no-store": Optional[bool],
+    },
+)
+
+
+class CachePingResponse(BaseModel):
+    status: str
+    cache_type: str
+    ping_response: Optional[bool] = None
+    set_cache_response: Optional[str] = None
+    litellm_cache_params: Optional[str] = None
+
+    # intentionally a dict, since we run masker.mask_dict() on HealthCheckCacheParams
+    health_check_cache_params: Optional[dict] = None
+
+
+class HealthCheckCacheParams(BaseModel):
+    """
+    Cache Params returned on /cache/ping call
+    """
+
+    host: Optional[str] = None
+    port: Optional[Union[str, int]] = None
+    redis_kwargs: Optional[Dict[str, Any]] = None
+    namespace: Optional[str] = None
+    redis_version: Optional[Union[str, int, float]] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/completion.py b/.venv/lib/python3.12/site-packages/litellm/types/completion.py
new file mode 100644
index 00000000..b06bb733
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/completion.py
@@ -0,0 +1,193 @@
+from typing import Iterable, List, Optional, Union
+
+from pydantic import BaseModel, ConfigDict
+from typing_extensions import Literal, Required, TypedDict
+
+
+class ChatCompletionSystemMessageParam(TypedDict, total=False):
+    content: Required[str]
+    """The contents of the system message."""
+
+    role: Required[Literal["system"]]
+    """The role of the messages author, in this case `system`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+
+class ChatCompletionContentPartTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text content."""
+
+    type: Required[Literal["text"]]
+    """The type of the content part."""
+
+
+class ImageURL(TypedDict, total=False):
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image.
+
+    Learn more in the
+    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+    """
+
+
+class ChatCompletionContentPartImageParam(TypedDict, total=False):
+    image_url: Required[ImageURL]
+
+    type: Required[Literal["image_url"]]
+    """The type of the content part."""
+
+
+ChatCompletionContentPartParam = Union[
+    ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+]
+
+
+class ChatCompletionUserMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+    """The contents of the user message."""
+
+    role: Required[Literal["user"]]
+    """The role of the messages author, in this case `user`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+
+class FunctionCall(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class Function(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionToolMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+    """The contents of the tool message."""
+
+    role: Required[Literal["tool"]]
+    """The role of the messages author, in this case `tool`."""
+
+    tool_call_id: Required[str]
+    """Tool call that this message is responding to."""
+
+
+class ChatCompletionFunctionMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+    """The contents of the function message."""
+
+    name: Required[str]
+    """The name of the function to call."""
+
+    role: Required[Literal["function"]]
+    """The role of the messages author, in this case `function`."""
+
+
+class ChatCompletionMessageToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the tool call."""
+
+    function: Required[Function]
+    """The function that the model called."""
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChatCompletionAssistantMessageParam(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    """The role of the messages author, in this case `assistant`."""
+
+    content: Optional[str]
+    """The contents of the assistant message.
+
+    Required unless `tool_calls` or `function_call` is specified.
+    """
+
+    function_call: FunctionCall
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
+    """The tool calls generated by the model, such as function calls."""
+
+
+ChatCompletionMessageParam = Union[
+    ChatCompletionSystemMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionFunctionMessageParam,
+    ChatCompletionToolMessageParam,
+]
+
+
+class CompletionRequest(BaseModel):
+    model: str
+    messages: List[str] = []
+    timeout: Optional[Union[float, int]] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    n: Optional[int] = None
+    stream: Optional[bool] = None
+    stop: Optional[dict] = None
+    max_tokens: Optional[int] = None
+    presence_penalty: Optional[float] = None
+    frequency_penalty: Optional[float] = None
+    logit_bias: Optional[dict] = None
+    user: Optional[str] = None
+    response_format: Optional[dict] = None
+    seed: Optional[int] = None
+    tools: Optional[List[str]] = None
+    tool_choice: Optional[str] = None
+    logprobs: Optional[bool] = None
+    top_logprobs: Optional[int] = None
+    deployment_id: Optional[str] = None
+    functions: Optional[List[str]] = None
+    function_call: Optional[str] = None
+    base_url: Optional[str] = None
+    api_version: Optional[str] = None
+    api_key: Optional[str] = None
+    model_list: Optional[List[str]] = None
+
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/embedding.py b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py
new file mode 100644
index 00000000..f8fdebc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py
@@ -0,0 +1,21 @@
+from typing import List, Optional, Union
+
+from pydantic import BaseModel, ConfigDict
+
+
+class EmbeddingRequest(BaseModel):
+    model: str
+    input: List[str] = []
+    timeout: int = 600
+    api_base: Optional[str] = None
+    api_version: Optional[str] = None
+    api_key: Optional[str] = None
+    api_type: Optional[str] = None
+    caching: bool = False
+    user: Optional[str] = None
+    custom_llm_provider: Optional[Union[str, dict]] = None
+    litellm_call_id: Optional[str] = None
+    litellm_logging_obj: Optional[dict] = None
+    logger_fn: Optional[str] = None
+
+    model_config = ConfigDict(extra="allow")
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/files.py b/.venv/lib/python3.12/site-packages/litellm/types/files.py
new file mode 100644
index 00000000..600ad806
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/files.py
@@ -0,0 +1,283 @@
+from enum import Enum
+from types import MappingProxyType
+from typing import List, Set, Mapping
+
+"""
+Base Enums/Consts
+"""
+
+
+class FileType(Enum):
+    AAC = "AAC"
+    CSV = "CSV"
+    DOC = "DOC"
+    DOCX = "DOCX"
+    FLAC = "FLAC"
+    FLV = "FLV"
+    GIF = "GIF"
+    GOOGLE_DOC = "GOOGLE_DOC"
+    GOOGLE_DRAWINGS = "GOOGLE_DRAWINGS"
+    GOOGLE_SHEETS = "GOOGLE_SHEETS"
+    GOOGLE_SLIDES = "GOOGLE_SLIDES"
+    HEIC = "HEIC"
+    HEIF = "HEIF"
+    HTML = "HTML"
+    JPEG = "JPEG"
+    JSON = "JSON"
+    M4A = "M4A"
+    M4V = "M4V"
+    MOV = "MOV"
+    MP3 = "MP3"
+    MP4 = "MP4"
+    MPEG = "MPEG"
+    MPEGPS = "MPEGPS"
+    MPG = "MPG"
+    MPA = "MPA"
+    MPGA = "MPGA"
+    OGG = "OGG"
+    OPUS = "OPUS"
+    PDF = "PDF"
+    PCM = "PCM"
+    PNG = "PNG"
+    PPT = "PPT"
+    PPTX = "PPTX"
+    RTF = "RTF"
+    THREE_GPP = "3GPP"
+    TXT = "TXT"
+    WAV = "WAV"
+    WEBM = "WEBM"
+    WEBP = "WEBP"
+    WMV = "WMV"
+    XLS = "XLS"
+    XLSX = "XLSX"
+
+
+FILE_EXTENSIONS: Mapping[FileType, List[str]] = MappingProxyType(
+    {
+        FileType.AAC: ["aac"],
+        FileType.CSV: ["csv"],
+        FileType.DOC: ["doc"],
+        FileType.DOCX: ["docx"],
+        FileType.FLAC: ["flac"],
+        FileType.FLV: ["flv"],
+        FileType.GIF: ["gif"],
+        FileType.GOOGLE_DOC: ["gdoc"],
+        FileType.GOOGLE_DRAWINGS: ["gdraw"],
+        FileType.GOOGLE_SHEETS: ["gsheet"],
+        FileType.GOOGLE_SLIDES: ["gslides"],
+        FileType.HEIC: ["heic"],
+        FileType.HEIF: ["heif"],
+        FileType.HTML: ["html", "htm"],
+        FileType.JPEG: ["jpeg", "jpg"],
+        FileType.JSON: ["json"],
+        FileType.M4A: ["m4a"],
+        FileType.M4V: ["m4v"],
+        FileType.MOV: ["mov"],
+        FileType.MP3: ["mp3"],
+        FileType.MP4: ["mp4"],
+        FileType.MPEG: ["mpeg"],
+        FileType.MPEGPS: ["mpegps"],
+        FileType.MPG: ["mpg"],
+        FileType.MPA: ["mpa"],
+        FileType.MPGA: ["mpga"],
+        FileType.OGG: ["ogg"],
+        FileType.OPUS: ["opus"],
+        FileType.PDF: ["pdf"],
+        FileType.PCM: ["pcm"],
+        FileType.PNG: ["png"],
+        FileType.PPT: ["ppt"],
+        FileType.PPTX: ["pptx"],
+        FileType.RTF: ["rtf"],
+        FileType.THREE_GPP: ["3gpp"],
+        FileType.TXT: ["txt"],
+        FileType.WAV: ["wav"],
+        FileType.WEBM: ["webm"],
+        FileType.WEBP: ["webp"],
+        FileType.WMV: ["wmv"],
+        FileType.XLS: ["xls"],
+        FileType.XLSX: ["xlsx"],
+    }
+)
+
+FILE_MIME_TYPES: Mapping[FileType, str] = MappingProxyType(
+    {
+        FileType.AAC: "audio/aac",
+        FileType.CSV: "text/csv",
+        FileType.DOC: "application/msword",
+        FileType.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        FileType.FLAC: "audio/flac",
+        FileType.FLV: "video/x-flv",
+        FileType.GIF: "image/gif",
+        FileType.GOOGLE_DOC: "application/vnd.google-apps.document",
+        FileType.GOOGLE_DRAWINGS: "application/vnd.google-apps.drawing",
+        FileType.GOOGLE_SHEETS: "application/vnd.google-apps.spreadsheet",
+        FileType.GOOGLE_SLIDES: "application/vnd.google-apps.presentation",
+        FileType.HEIC: "image/heic",
+        FileType.HEIF: "image/heif",
+        FileType.HTML: "text/html",
+        FileType.JPEG: "image/jpeg",
+        FileType.JSON: "application/json",
+        FileType.M4A: "audio/x-m4a",
+        FileType.M4V: "video/x-m4v",
+        FileType.MOV: "video/quicktime",
+        FileType.MP3: "audio/mpeg",
+        FileType.MP4: "video/mp4",
+        FileType.MPEG: "video/mpeg",
+        FileType.MPEGPS: "video/mpegps",
+        FileType.MPG: "video/mpg",
+        FileType.MPA: "audio/m4a",
+        FileType.MPGA: "audio/mpga",
+        FileType.OGG: "audio/ogg",
+        FileType.OPUS: "audio/opus",
+        FileType.PDF: "application/pdf",
+        FileType.PCM: "audio/pcm",
+        FileType.PNG: "image/png",
+        FileType.PPT: "application/vnd.ms-powerpoint",
+        FileType.PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        FileType.RTF: "application/rtf",
+        FileType.THREE_GPP: "video/3gpp",
+        FileType.TXT: "text/plain",
+        FileType.WAV: "audio/wav",
+        FileType.WEBM: "video/webm",
+        FileType.WEBP: "image/webp",
+        FileType.WMV: "video/wmv",
+        FileType.XLS: "application/vnd.ms-excel",
+        FileType.XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    }
+)
+
+"""
+Util Functions
+"""
+
+
+def get_file_extension_from_mime_type(mime_type: str) -> str:
+    for file_type, mime in FILE_MIME_TYPES.items():
+        if mime.lower() == mime_type.lower():
+            return FILE_EXTENSIONS[file_type][0]
+    raise ValueError(f"Unknown extension for mime type: {mime_type}")
+
+
+def get_file_type_from_extension(extension: str) -> FileType:
+    for file_type, extensions in FILE_EXTENSIONS.items():
+        if extension.lower() in extensions:
+            return file_type
+
+    raise ValueError(f"Unknown file type for extension: {extension}")
+
+
+def get_file_extension_for_file_type(file_type: FileType) -> str:
+    return FILE_EXTENSIONS[file_type][0]
+
+
+def get_file_mime_type_for_file_type(file_type: FileType) -> str:
+    return FILE_MIME_TYPES[file_type]
+
+
+def get_file_mime_type_from_extension(extension: str) -> str:
+    file_type = get_file_type_from_extension(extension)
+    return get_file_mime_type_for_file_type(file_type)
+
+
+"""
+FileType Type Groupings (Videos, Images, etc)
+"""
+
+# Images
+IMAGE_FILE_TYPES = {
+    FileType.PNG,
+    FileType.JPEG,
+    FileType.GIF,
+    FileType.WEBP,
+    FileType.HEIC,
+    FileType.HEIF,
+}
+
+
+def is_image_file_type(file_type):
+    return file_type in IMAGE_FILE_TYPES
+
+
+# Videos
+VIDEO_FILE_TYPES = {
+    FileType.MOV,
+    FileType.MP4,
+    FileType.MPEG,
+    FileType.M4V,
+    FileType.FLV,
+    FileType.MPEGPS,
+    FileType.MPG,
+    FileType.WEBM,
+    FileType.WMV,
+    FileType.THREE_GPP,
+}
+
+
+def is_video_file_type(file_type):
+    return file_type in VIDEO_FILE_TYPES
+
+
+# Audio
+AUDIO_FILE_TYPES = {
+    FileType.AAC,
+    FileType.FLAC,
+    FileType.MP3,
+    FileType.MPA,
+    FileType.MPGA,
+    FileType.OPUS,
+    FileType.PCM,
+    FileType.WAV,
+}
+
+
+def is_audio_file_type(file_type):
+    return file_type in AUDIO_FILE_TYPES
+
+
+# Text
+TEXT_FILE_TYPES = {FileType.CSV, FileType.HTML, FileType.RTF, FileType.TXT}
+
+
+def is_text_file_type(file_type):
+    return file_type in TEXT_FILE_TYPES
+
+
+"""
+Other FileType Groupings
+"""
+# Accepted file types for GEMINI 1.5 through Vertex AI
+# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-images-nodejs
+GEMINI_1_5_ACCEPTED_FILE_TYPES: Set[FileType] = {
+    # Image
+    FileType.PNG,
+    FileType.JPEG,
+    FileType.WEBP,
+    # Audio
+    FileType.AAC,
+    FileType.FLAC,
+    FileType.MP3,
+    FileType.MPA,
+    FileType.MPEG,
+    FileType.MPGA,
+    FileType.OPUS,
+    FileType.PCM,
+    FileType.WAV,
+    FileType.WEBM,
+    # Video
+    FileType.FLV,
+    FileType.MOV,
+    FileType.MPEG,
+    FileType.MPEGPS,
+    FileType.MPG,
+    FileType.MP4,
+    FileType.WEBM,
+    FileType.WMV,
+    FileType.THREE_GPP,
+    # PDF
+    FileType.PDF,
+    FileType.TXT,
+}
+
+
+def is_gemini_1_5_accepted_file_type(file_type: FileType) -> bool:
+    return file_type in GEMINI_1_5_ACCEPTED_FILE_TYPES
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py
new file mode 100644
index 00000000..af99d88c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py
@@ -0,0 +1,5 @@
+from openai.types.fine_tuning.fine_tuning_job import Hyperparameters
+
+
+class OpenAIFineTuningHyperparameters(Hyperparameters):
+    model_config = {"extra": "allow"}
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py
new file mode 100644
index 00000000..b7018fe2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py
@@ -0,0 +1,168 @@
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from pydantic import BaseModel, ConfigDict, Field, SecretStr
+from typing_extensions import Required, TypedDict
+
+"""
+Pydantic object defining how to set guardrails on litellm proxy
+
+guardrails:
+  - guardrail_name: "bedrock-pre-guard"
+    litellm_params:
+      guardrail: bedrock  # supported values: "aporia", "bedrock", "lakera"
+      mode: "during_call"
+      guardrailIdentifier: ff6ujrregl1q
+      guardrailVersion: "DRAFT"
+      default_on: true
+"""
+
+
+class SupportedGuardrailIntegrations(Enum):
+    APORIA = "aporia"
+    BEDROCK = "bedrock"
+    GURDRAILS_AI = "guardrails_ai"
+    LAKERA = "lakera"
+    PRESIDIO = "presidio"
+    HIDE_SECRETS = "hide-secrets"
+    AIM = "aim"
+
+
+class Role(Enum):
+    SYSTEM = "system"
+    ASSISTANT = "assistant"
+    USER = "user"
+
+
+default_roles = [Role.SYSTEM, Role.ASSISTANT, Role.USER]
+
+
+class GuardrailItemSpec(TypedDict, total=False):
+    callbacks: Required[List[str]]
+    default_on: bool
+    logging_only: Optional[bool]
+    enabled_roles: Optional[List[Role]]
+    callback_args: Dict[str, Dict]
+
+
+class GuardrailItem(BaseModel):
+    callbacks: List[str]
+    default_on: bool
+    logging_only: Optional[bool]
+    guardrail_name: str
+    callback_args: Dict[str, Dict]
+    enabled_roles: Optional[List[Role]]
+
+    model_config = ConfigDict(use_enum_values=True)
+
+    def __init__(
+        self,
+        callbacks: List[str],
+        guardrail_name: str,
+        default_on: bool = False,
+        logging_only: Optional[bool] = None,
+        enabled_roles: Optional[List[Role]] = default_roles,
+        callback_args: Dict[str, Dict] = {},
+    ):
+        super().__init__(
+            callbacks=callbacks,
+            default_on=default_on,
+            logging_only=logging_only,
+            guardrail_name=guardrail_name,
+            enabled_roles=enabled_roles,
+            callback_args=callback_args,
+        )
+
+
+# Define the TypedDicts
+class LakeraCategoryThresholds(TypedDict, total=False):
+    prompt_injection: float
+    jailbreak: float
+
+
+class LitellmParams(TypedDict):
+    guardrail: str
+    mode: str
+    api_key: Optional[str]
+    api_base: Optional[str]
+
+    # Lakera specific params
+    category_thresholds: Optional[LakeraCategoryThresholds]
+
+    # Bedrock specific params
+    guardrailIdentifier: Optional[str]
+    guardrailVersion: Optional[str]
+
+    # Presidio params
+    output_parse_pii: Optional[bool]
+    presidio_ad_hoc_recognizers: Optional[str]
+    mock_redacted_text: Optional[dict]
+
+    # hide secrets params
+    detect_secrets_config: Optional[dict]
+
+    # guardrails ai params
+    guard_name: Optional[str]
+    default_on: Optional[bool]
+
+
+class Guardrail(TypedDict, total=False):
+    guardrail_name: str
+    litellm_params: LitellmParams
+    guardrail_info: Optional[Dict]
+
+
+class guardrailConfig(TypedDict):
+    guardrails: List[Guardrail]
+
+
+class GuardrailEventHooks(str, Enum):
+    pre_call = "pre_call"
+    post_call = "post_call"
+    during_call = "during_call"
+    logging_only = "logging_only"
+
+
+class BedrockTextContent(TypedDict, total=False):
+    text: str
+
+
+class BedrockContentItem(TypedDict, total=False):
+    text: BedrockTextContent
+
+
+class BedrockRequest(TypedDict, total=False):
+    source: Literal["INPUT", "OUTPUT"]
+    content: List[BedrockContentItem]
+
+
+class DynamicGuardrailParams(TypedDict):
+    extra_body: Dict[str, Any]
+
+
+class GuardrailLiteLLMParamsResponse(BaseModel):
+    """The returned LiteLLM Params object for /guardrails/list"""
+
+    guardrail: str
+    mode: Union[str, List[str]]
+    default_on: bool = Field(default=False)
+
+    def __init__(self, **kwargs):
+        default_on = kwargs.get("default_on")
+        if default_on is None:
+            default_on = False
+
+        super().__init__(**kwargs)
+
+
+class GuardrailInfoResponse(BaseModel):
+    guardrail_name: str
+    litellm_params: GuardrailLiteLLMParamsResponse
+    guardrail_info: Optional[Dict]
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+
+class ListGuardrailsResponse(BaseModel):
+    guardrails: List[GuardrailInfoResponse]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py
new file mode 100644
index 00000000..6c0de762
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py
@@ -0,0 +1,21 @@
+import os
+from datetime import datetime as dt
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
+
+
+class ArgillaItem(TypedDict):
+    fields: Dict[str, Any]
+
+
+class ArgillaPayload(TypedDict):
+    items: List[ArgillaItem]
+
+
+class ArgillaCredentialsObject(TypedDict):
+    ARGILLA_API_KEY: str
+    ARGILLA_DATASET_NAME: str
+    ARGILLA_BASE_URL: str
+
+
+SUPPORTED_PAYLOAD_FIELDS = ["messages", "response"]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py
new file mode 100644
index 00000000..e1ec1755
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py
@@ -0,0 +1,15 @@
+from typing import TYPE_CHECKING, Any, Literal, Optional
+
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+    Protocol = Literal["otlp_grpc", "otlp_http"]
+else:
+    Protocol = Any
+
+
+class ArizeConfig(BaseModel):
+    space_key: Optional[str] = None
+    api_key: Optional[str] = None
+    protocol: Protocol
+    endpoint: str
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py
new file mode 100644
index 00000000..4566022d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py
@@ -0,0 +1,9 @@
+from typing import TYPE_CHECKING, Literal, Optional
+
+from pydantic import BaseModel
+from .arize import Protocol
+
+class ArizePhoenixConfig(BaseModel):
+    otlp_auth_headers: Optional[str] = None
+    protocol: Protocol
+    endpoint: str 
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py
new file mode 100644
index 00000000..b69529d1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py
@@ -0,0 +1,6 @@
+from typing import Literal, Optional, TypedDict
+
+
+class IntegrationHealthCheckStatus(TypedDict):
+    status: Literal["healthy", "unhealthy"]
+    error_message: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py
new file mode 100644
index 00000000..79d4eded
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py
@@ -0,0 +1,29 @@
+from enum import Enum
+from typing import Optional, TypedDict
+
+
+class DataDogStatus(str, Enum):
+    INFO = "info"
+    WARN = "warning"
+    ERROR = "error"
+
+
+class DatadogPayload(TypedDict, total=False):
+    ddsource: str
+    ddtags: str
+    hostname: str
+    message: str
+    service: str
+    status: str
+
+
+class DD_ERRORS(Enum):
+    DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
+
+
+class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
+    exception: str
+    error_class: str
+    status_code: Optional[int]
+    traceback: str
+    user_api_key_dict: dict
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py
new file mode 100644
index 00000000..9298b157
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py
@@ -0,0 +1,54 @@
+"""
+Payloads for Datadog LLM Observability Service (LLMObs)
+
+API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=example#api-standards
+"""
+
+from typing import Any, Dict, List, Literal, Optional, TypedDict
+
+
+class InputMeta(TypedDict):
+    messages: List[Any]
+
+
+class OutputMeta(TypedDict):
+    messages: List[Any]
+
+
+class Meta(TypedDict):
+    # The span kind: "agent", "workflow", "llm", "tool", "task", "embedding", or "retrieval".
+    kind: Literal["llm", "tool", "task", "embedding", "retrieval"]
+    input: InputMeta  # The span’s input information.
+    output: OutputMeta  # The span’s output information.
+    metadata: Dict[str, Any]
+
+
+class LLMMetrics(TypedDict, total=False):
+    input_tokens: float
+    output_tokens: float
+    total_tokens: float
+    time_to_first_token: float
+    time_per_output_token: float
+
+
+class LLMObsPayload(TypedDict):
+    parent_id: str
+    trace_id: str
+    span_id: str
+    name: str
+    meta: Meta
+    start_ns: int
+    duration: int
+    metrics: LLMMetrics
+    tags: List
+
+
+class DDSpanAttributes(TypedDict):
+    ml_app: str
+    tags: List[str]
+    spans: List[LLMObsPayload]
+
+
+class DDIntakePayload(TypedDict):
+    type: str
+    attributes: DDSpanAttributes
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py
new file mode 100644
index 00000000..a4fd8a6a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py
@@ -0,0 +1,28 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict
+
+from litellm.types.utils import StandardLoggingPayload
+
+if TYPE_CHECKING:
+    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+else:
+    VertexBase = Any
+
+
+class GCSLoggingConfig(TypedDict):
+    """
+    Internal LiteLLM Config for GCS Bucket logging
+    """
+
+    bucket_name: str
+    vertex_instance: VertexBase
+    path_service_account: Optional[str]
+
+
+class GCSLogQueueItem(TypedDict):
+    """
+    Internal Type, used for queueing logs to be sent to GCS Bucket
+    """
+
+    payload: StandardLoggingPayload
+    kwargs: Dict[str, Any]
+    response_obj: Optional[Any]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py
new file mode 100644
index 00000000..ecf42d8c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py
@@ -0,0 +1,7 @@
+from typing import Optional, TypedDict
+
+
+class LangfuseLoggingConfig(TypedDict):
+    langfuse_secret: Optional[str]
+    langfuse_public_key: Optional[str]
+    langfuse_host: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py
new file mode 100644
index 00000000..48c8e2e0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py
@@ -0,0 +1,61 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, NamedTuple, Optional, TypedDict
+
+from pydantic import BaseModel
+
+
+class LangsmithInputs(BaseModel):
+    model: Optional[str] = None
+    messages: Optional[List[Any]] = None
+    stream: Optional[bool] = None
+    call_type: Optional[str] = None
+    litellm_call_id: Optional[str] = None
+    completion_start_time: Optional[datetime] = None
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    custom_llm_provider: Optional[str] = None
+    input: Optional[List[Any]] = None
+    log_event_type: Optional[str] = None
+    original_response: Optional[Any] = None
+    response_cost: Optional[float] = None
+
+    # LiteLLM Virtual Key specific fields
+    user_api_key: Optional[str] = None
+    user_api_key_user_id: Optional[str] = None
+    user_api_key_team_alias: Optional[str] = None
+
+
+class LangsmithCredentialsObject(TypedDict):
+    LANGSMITH_API_KEY: str
+    LANGSMITH_PROJECT: str
+    LANGSMITH_BASE_URL: str
+
+
+class LangsmithQueueObject(TypedDict):
+    """
+    Langsmith Queue Object - this is what gets stored in the internal system queue before flushing to Langsmith
+
+    We need to store:
+        - data[Dict] - data that should get logged on langsmith
+        - credentials[LangsmithCredentialsObject] - credentials to use for logging to langsmith
+    """
+
+    data: Dict
+    credentials: LangsmithCredentialsObject
+
+
+class CredentialsKey(NamedTuple):
+    """Immutable key for grouping credentials"""
+
+    api_key: str
+    project: str
+    base_url: str
+
+
+@dataclass
+class BatchGroup:
+    """Groups credentials with their associated queue objects"""
+
+    credentials: LangsmithCredentialsObject
+    queue_objects: List[LangsmithQueueObject]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py
new file mode 100644
index 00000000..22fd1665
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py
@@ -0,0 +1,62 @@
+from datetime import datetime
+from typing import List, Literal, Optional, TypedDict, Union
+
+from litellm.types.utils import StandardLoggingUserAPIKeyMetadata
+
+
+class LinkDict(TypedDict, total=False):
+    href: str
+    text: Optional[str]
+
+
+class ImageDict(TypedDict, total=False):
+    src: str
+    href: Optional[str]
+    alt: Optional[str]
+
+
+class PagerDutyPayload(TypedDict, total=False):
+    summary: str
+    timestamp: Optional[str]  # ISO 8601 date-time format
+    severity: Literal["critical", "warning", "error", "info"]
+    source: str
+    component: Optional[str]
+    group: Optional[str]
+    class_: Optional[str]  # Using class_ since 'class' is a reserved keyword
+    custom_details: Optional[dict]
+
+
+class PagerDutyRequestBody(TypedDict, total=False):
+    payload: PagerDutyPayload
+    routing_key: str
+    event_action: Literal["trigger", "acknowledge", "resolve"]
+    dedup_key: Optional[str]
+    client: Optional[str]
+    client_url: Optional[str]
+    links: Optional[List[LinkDict]]
+    images: Optional[List[ImageDict]]
+
+
+class AlertingConfig(TypedDict, total=False):
+    """
+    Config for alerting thresholds
+    """
+
+    # Requests failing threshold
+    failure_threshold: int  # Number of requests failing in a window
+    failure_threshold_window_seconds: int  # Window in seconds
+
+    # Requests hanging threshold
+    hanging_threshold_seconds: float  # Number of seconds of waiting for a response before a request is considered hanging
+    hanging_threshold_fails: int  # Number of requests hanging in a window
+    hanging_threshold_window_seconds: int  # Window in seconds
+
+
+class PagerDutyInternalEvent(StandardLoggingUserAPIKeyMetadata, total=False):
+    """Simple structure to hold timestamp and error info."""
+
+    failure_event_type: Literal["failed_response", "hanging_response"]
+    timestamp: datetime
+    error_class: Optional[str]
+    error_code: Optional[str]
+    error_llm_provider: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py
new file mode 100644
index 00000000..8fdcce4c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py
@@ -0,0 +1,294 @@
+from enum import Enum
+from typing import Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+import litellm
+
+REQUESTED_MODEL = "requested_model"
+EXCEPTION_STATUS = "exception_status"
+EXCEPTION_CLASS = "exception_class"
+STATUS_CODE = "status_code"
+EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
+LATENCY_BUCKETS = (
+    0.005,
+    0.00625,
+    0.0125,
+    0.025,
+    0.05,
+    0.1,
+    0.5,
+    1.0,
+    1.5,
+    2.0,
+    2.5,
+    3.0,
+    3.5,
+    4.0,
+    4.5,
+    5.0,
+    5.5,
+    6.0,
+    6.5,
+    7.0,
+    7.5,
+    8.0,
+    8.5,
+    9.0,
+    9.5,
+    10.0,
+    15.0,
+    20.0,
+    25.0,
+    30.0,
+    60.0,
+    120.0,
+    180.0,
+    240.0,
+    300.0,
+    float("inf"),
+)
+
+
+class UserAPIKeyLabelNames(Enum):
+    END_USER = "end_user"
+    USER = "user"
+    USER_EMAIL = "user_email"
+    API_KEY_HASH = "hashed_api_key"
+    API_KEY_ALIAS = "api_key_alias"
+    TEAM = "team"
+    TEAM_ALIAS = "team_alias"
+    REQUESTED_MODEL = REQUESTED_MODEL
+    v1_LITELLM_MODEL_NAME = "model"
+    v2_LITELLM_MODEL_NAME = "litellm_model_name"
+    TAG = "tag"
+    MODEL_ID = "model_id"
+    API_BASE = "api_base"
+    API_PROVIDER = "api_provider"
+    EXCEPTION_STATUS = EXCEPTION_STATUS
+    EXCEPTION_CLASS = EXCEPTION_CLASS
+    STATUS_CODE = "status_code"
+    FALLBACK_MODEL = "fallback_model"
+
+
+DEFINED_PROMETHEUS_METRICS = Literal[
+    "litellm_llm_api_latency_metric",
+    "litellm_request_total_latency_metric",
+    "litellm_proxy_total_requests_metric",
+    "litellm_proxy_failed_requests_metric",
+    "litellm_deployment_latency_per_output_token",
+    "litellm_requests_metric",
+    "litellm_input_tokens_metric",
+    "litellm_output_tokens_metric",
+    "litellm_deployment_successful_fallbacks",
+    "litellm_deployment_failed_fallbacks",
+    "litellm_remaining_team_budget_metric",
+    "litellm_team_max_budget_metric",
+    "litellm_team_budget_remaining_hours_metric",
+    "litellm_remaining_api_key_budget_metric",
+    "litellm_api_key_max_budget_metric",
+    "litellm_api_key_budget_remaining_hours_metric",
+]
+
+
+class PrometheusMetricLabels:
+    litellm_llm_api_latency_metric = [
+        UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.USER.value,
+    ]
+
+    litellm_request_total_latency_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+    ]
+
+    litellm_proxy_total_requests_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.STATUS_CODE.value,
+        UserAPIKeyLabelNames.USER_EMAIL.value,
+    ]
+
+    litellm_proxy_failed_requests_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.EXCEPTION_STATUS.value,
+        UserAPIKeyLabelNames.EXCEPTION_CLASS.value,
+    ]
+
+    litellm_deployment_latency_per_output_token = [
+        UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value,
+        UserAPIKeyLabelNames.MODEL_ID.value,
+        UserAPIKeyLabelNames.API_BASE.value,
+        UserAPIKeyLabelNames.API_PROVIDER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+    ]
+
+    litellm_requests_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.USER_EMAIL.value,
+    ]
+
+    litellm_input_tokens_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+    ]
+
+    litellm_output_tokens_metric = [
+        UserAPIKeyLabelNames.END_USER.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.USER.value,
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+    ]
+
+    litellm_deployment_successful_fallbacks = [
+        UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+        UserAPIKeyLabelNames.FALLBACK_MODEL.value,
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+        UserAPIKeyLabelNames.EXCEPTION_STATUS.value,
+        UserAPIKeyLabelNames.EXCEPTION_CLASS.value,
+    ]
+
+    litellm_deployment_failed_fallbacks = litellm_deployment_successful_fallbacks
+
+    litellm_remaining_team_budget_metric = [
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+    ]
+
+    litellm_team_max_budget_metric = [
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+    ]
+
+    litellm_team_budget_remaining_hours_metric = [
+        UserAPIKeyLabelNames.TEAM.value,
+        UserAPIKeyLabelNames.TEAM_ALIAS.value,
+    ]
+
+    litellm_remaining_api_key_budget_metric = [
+        UserAPIKeyLabelNames.API_KEY_HASH.value,
+        UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+    ]
+
+    litellm_api_key_max_budget_metric = litellm_remaining_api_key_budget_metric
+
+    litellm_api_key_budget_remaining_hours_metric = (
+        litellm_remaining_api_key_budget_metric
+    )
+
+    @staticmethod
+    def get_labels(label_name: DEFINED_PROMETHEUS_METRICS) -> List[str]:
+        default_labels = getattr(PrometheusMetricLabels, label_name)
+        return default_labels + [
+            metric.replace(".", "_")
+            for metric in litellm.custom_prometheus_metadata_labels
+        ]
+
+
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class UserAPIKeyLabelValues(BaseModel):
+    end_user: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.END_USER.value)
+    ] = None
+    user: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER.value)
+    ] = None
+    user_email: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER_EMAIL.value)
+    ] = None
+    hashed_api_key: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_HASH.value)
+    ] = None
+    api_key_alias: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_ALIAS.value)
+    ] = None
+    team: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM.value)
+    ] = None
+    team_alias: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM_ALIAS.value)
+    ] = None
+    requested_model: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.REQUESTED_MODEL.value)
+    ] = None
+    model: Annotated[
+        Optional[str],
+        Field(..., alias=UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value),
+    ] = None
+    litellm_model_name: Annotated[
+        Optional[str],
+        Field(..., alias=UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value),
+    ] = None
+    tags: List[str] = []
+    custom_metadata_labels: Dict[str, str] = {}
+    model_id: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.MODEL_ID.value)
+    ] = None
+    api_base: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_BASE.value)
+    ] = None
+    api_provider: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_PROVIDER.value)
+    ] = None
+    exception_status: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_STATUS.value)
+    ] = None
+    exception_class: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_CLASS.value)
+    ] = None
+    status_code: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.STATUS_CODE.value)
+    ] = None
+    fallback_model: Annotated[
+        Optional[str], Field(..., alias=UserAPIKeyLabelNames.FALLBACK_MODEL.value)
+    ] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py
new file mode 100644
index 00000000..9019b098
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py
@@ -0,0 +1,186 @@
+import os
+from datetime import datetime as dt
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
+
+from pydantic import BaseModel, Field
+
+from litellm.types.utils import LiteLLMPydanticObjectBase
+
+
+class BaseOutageModel(TypedDict):
+    alerts: List[int]
+    minor_alert_sent: bool
+    major_alert_sent: bool
+    last_updated_at: float
+
+
+class OutageModel(BaseOutageModel):
+    model_id: str
+
+
+class ProviderRegionOutageModel(BaseOutageModel):
+    provider_region_id: str
+    deployment_ids: Set[str]
+
+
+# we use this for the email header, please send a test email if you change this. verify it looks good on email
+LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
+LITELLM_SUPPORT_CONTACT = "support@berri.ai"
+
+
+class SlackAlertingArgsEnum(Enum):
+    daily_report_frequency = 12 * 60 * 60
+    report_check_interval = 5 * 60
+    budget_alert_ttl = 24 * 60 * 60
+    outage_alert_ttl = 1 * 60
+    region_outage_alert_ttl = 1 * 60
+    minor_outage_alert_threshold = 1 * 5
+    major_outage_alert_threshold = 1 * 10
+    max_outage_alert_list_size = 1 * 10
+
+
+class SlackAlertingArgs(LiteLLMPydanticObjectBase):
+    daily_report_frequency: int = Field(
+        default=int(
+            os.getenv(
+                "SLACK_DAILY_REPORT_FREQUENCY",
+                int(SlackAlertingArgsEnum.daily_report_frequency.value),
+            )
+        ),
+        description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.",
+    )
+    report_check_interval: int = Field(
+        default=SlackAlertingArgsEnum.report_check_interval.value,
+        description="Frequency of checking cache if report should be sent. Background process. Default is once per hour. Value is in seconds.",
+    )  # 5 minutes
+    budget_alert_ttl: int = Field(
+        default=SlackAlertingArgsEnum.budget_alert_ttl.value,
+        description="Cache ttl for budgets alerts. Prevents spamming same alert, each time budget is crossed. Value is in seconds.",
+    )  # 24 hours
+    outage_alert_ttl: int = Field(
+        default=SlackAlertingArgsEnum.outage_alert_ttl.value,
+        description="Cache ttl for model outage alerts. Sets time-window for errors. Default is 1 minute. Value is in seconds.",
+    )  # 1 minute ttl
+    region_outage_alert_ttl: int = Field(
+        default=SlackAlertingArgsEnum.region_outage_alert_ttl.value,
+        description="Cache ttl for provider-region based outage alerts. Alert sent if 2+ models in same region report errors. Sets time-window for errors. Default is 1 minute. Value is in seconds.",
+    )  # 1 minute ttl
+    minor_outage_alert_threshold: int = Field(
+        default=SlackAlertingArgsEnum.minor_outage_alert_threshold.value,
+        description="The number of errors that count as a model/region minor outage. ('400' error code is not counted).",
+    )
+    major_outage_alert_threshold: int = Field(
+        default=SlackAlertingArgsEnum.major_outage_alert_threshold.value,
+        description="The number of errors that countas a model/region major outage. ('400' error code is not counted).",
+    )
+    max_outage_alert_list_size: int = Field(
+        default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
+        description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
+    )  # prevent memory leak
+    log_to_console: bool = Field(
+        default=False,
+        description="If true, the alerting payload will be printed to the console.",
+    )
+
+
+class DeploymentMetrics(LiteLLMPydanticObjectBase):
+    """
+    Metrics per deployment, stored in cache
+
+    Used for daily reporting
+    """
+
+    id: str
+    """id of deployment in router model list"""
+
+    failed_request: bool
+    """did it fail the request?"""
+
+    latency_per_output_token: Optional[float]
+    """latency/output token of deployment"""
+
+    updated_at: dt
+    """Current time of deployment being updated"""
+
+
+class SlackAlertingCacheKeys(Enum):
+    """
+    Enum for deployment daily metrics keys - {deployment_id}:{enum}
+    """
+
+    failed_requests_key = "failed_requests_daily_metrics"
+    latency_key = "latency_daily_metrics"
+    report_sent_key = "daily_metrics_report_sent"
+
+
+class AlertType(str, Enum):
+    """
+    Enum for alert types and management event types
+    """
+
+    # LLM-related alerts
+    llm_exceptions = "llm_exceptions"
+    llm_too_slow = "llm_too_slow"
+    llm_requests_hanging = "llm_requests_hanging"
+
+    # Budget and spend alerts
+    budget_alerts = "budget_alerts"
+    spend_reports = "spend_reports"
+    failed_tracking_spend = "failed_tracking_spend"
+
+    # Database alerts
+    db_exceptions = "db_exceptions"
+
+    # Report alerts
+    daily_reports = "daily_reports"
+
+    # Deployment alerts
+    cooldown_deployment = "cooldown_deployment"
+    new_model_added = "new_model_added"
+
+    # Outage alerts
+    outage_alerts = "outage_alerts"
+    region_outage_alerts = "region_outage_alerts"
+
+    # Fallback alerts
+    fallback_reports = "fallback_reports"
+
+    # Virtual Key Events
+    new_virtual_key_created = "new_virtual_key_created"
+    virtual_key_updated = "virtual_key_updated"
+    virtual_key_deleted = "virtual_key_deleted"
+
+    # Team Events
+    new_team_created = "new_team_created"
+    team_updated = "team_updated"
+    team_deleted = "team_deleted"
+
+    # Internal User Events
+    new_internal_user_created = "new_internal_user_created"
+    internal_user_updated = "internal_user_updated"
+    internal_user_deleted = "internal_user_deleted"
+
+
+DEFAULT_ALERT_TYPES: List[AlertType] = [
+    # LLM related alerts
+    AlertType.llm_exceptions,
+    AlertType.llm_too_slow,
+    AlertType.llm_requests_hanging,
+    # Budget and spend alerts
+    AlertType.budget_alerts,
+    AlertType.spend_reports,
+    AlertType.failed_tracking_spend,
+    # Database alerts
+    AlertType.db_exceptions,
+    # Report alerts
+    AlertType.daily_reports,
+    # Deployment alerts
+    AlertType.cooldown_deployment,
+    AlertType.new_model_added,
+    # Outage alerts
+    AlertType.outage_alerts,
+    AlertType.region_outage_alerts,
+    # Fallback alerts
+    AlertType.fallback_reports,
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py
new file mode 100644
index 00000000..367b2421
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py
@@ -0,0 +1,366 @@
+from typing import Any, Dict, Iterable, List, Optional, Union
+
+from pydantic import BaseModel, validator
+from typing_extensions import Literal, Required, TypedDict
+
+from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
+
+
+class AnthropicMessagesToolChoice(TypedDict, total=False):
+    type: Required[Literal["auto", "any", "tool"]]
+    name: str
+    disable_parallel_tool_use: bool  # default is false
+
+
+class AnthropicInputSchema(TypedDict, total=False):
+    type: Optional[str]
+    properties: Optional[dict]
+    additionalProperties: Optional[bool]
+
+
+class AnthropicMessagesTool(TypedDict, total=False):
+    name: Required[str]
+    description: str
+    input_schema: Optional[AnthropicInputSchema]
+    type: Literal["custom"]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class AnthropicComputerTool(TypedDict, total=False):
+    display_width_px: Required[int]
+    display_height_px: Required[int]
+    display_number: int
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+    type: Required[str]
+    name: Required[str]
+
+
+class AnthropicHostedTools(TypedDict, total=False):  # for bash_tool and text_editor
+    type: Required[str]
+    name: Required[str]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AllAnthropicToolsValues = Union[
+    AnthropicComputerTool, AnthropicHostedTools, AnthropicMessagesTool
+]
+
+
+class AnthropicMessagesTextParam(TypedDict, total=False):
+    type: Required[Literal["text"]]
+    text: Required[str]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class AnthropicMessagesToolUseParam(TypedDict):
+    type: Required[Literal["tool_use"]]
+    id: str
+    name: str
+    input: dict
+
+
+AnthropicMessagesAssistantMessageValues = Union[
+    AnthropicMessagesTextParam,
+    AnthropicMessagesToolUseParam,
+    ChatCompletionThinkingBlock,
+]
+
+
+class AnthopicMessagesAssistantMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[AnthropicMessagesAssistantMessageValues]]]
+    """The contents of the system message."""
+
+    role: Required[Literal["assistant"]]
+    """The role of the messages author, in this case `author`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+
+class AnthropicContentParamSource(TypedDict):
+    type: Literal["base64"]
+    media_type: str
+    data: str
+
+
+class AnthropicMessagesImageParam(TypedDict, total=False):
+    type: Required[Literal["image"]]
+    source: Required[AnthropicContentParamSource]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class CitationsObject(TypedDict):
+    enabled: bool
+
+
+class AnthropicMessagesDocumentParam(TypedDict, total=False):
+    type: Required[Literal["document"]]
+    source: Required[AnthropicContentParamSource]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+    title: str
+    context: str
+    citations: Optional[CitationsObject]
+
+
+class AnthropicMessagesToolResultContent(TypedDict):
+    type: Literal["text"]
+    text: str
+
+
+class AnthropicMessagesToolResultParam(TypedDict, total=False):
+    type: Required[Literal["tool_result"]]
+    tool_use_id: Required[str]
+    is_error: bool
+    content: Union[
+        str,
+        Iterable[
+            Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
+        ],
+    ]
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AnthropicMessagesUserMessageValues = Union[
+    AnthropicMessagesTextParam,
+    AnthropicMessagesImageParam,
+    AnthropicMessagesToolResultParam,
+    AnthropicMessagesDocumentParam,
+]
+
+
+class AnthropicMessagesUserMessageParam(TypedDict, total=False):
+    role: Required[Literal["user"]]
+    content: Required[Union[str, Iterable[AnthropicMessagesUserMessageValues]]]
+
+
+class AnthropicMetadata(TypedDict, total=False):
+    user_id: str
+
+
+class AnthropicSystemMessageContent(TypedDict, total=False):
+    type: str
+    text: str
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AllAnthropicMessageValues = Union[
+    AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
+]
+
+
+class AnthropicMessageRequestBase(TypedDict, total=False):
+    messages: Required[List[AllAnthropicMessageValues]]
+    max_tokens: Required[int]
+    metadata: AnthropicMetadata
+    stop_sequences: List[str]
+    stream: bool
+    system: Union[str, List]
+    temperature: float
+    tool_choice: AnthropicMessagesToolChoice
+    tools: List[AllAnthropicToolsValues]
+    top_k: int
+    top_p: float
+
+
+class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
+    model: Required[str]
+    # litellm param - used for tracking litellm proxy metadata in the request
+    litellm_metadata: dict
+
+
+class ContentTextBlockDelta(TypedDict):
+    """
+    'delta': {'type': 'text_delta', 'text': 'Hello'}
+    """
+
+    type: str
+    text: str
+
+
+class ContentCitationsBlockDelta(TypedDict):
+    type: Literal["citations"]
+    citation: dict
+
+
+class ContentJsonBlockDelta(TypedDict):
+    """
+    "delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}
+    """
+
+    type: str
+    partial_json: str
+
+
+class ContentBlockDelta(TypedDict):
+    type: Literal["content_block_delta"]
+    index: int
+    delta: Union[
+        ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+    ]
+
+
+class ContentBlockStop(TypedDict):
+    type: Literal["content_block_stop"]
+    index: int
+
+
+class ToolUseBlock(TypedDict):
+    """
+    "content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}
+    """
+
+    id: str
+
+    input: dict
+
+    name: str
+
+    type: Literal["tool_use"]
+
+
+class TextBlock(TypedDict):
+    text: str
+
+    type: Literal["text"]
+
+
+class ContentBlockStart(TypedDict):
+    """
+    event: content_block_start
+    data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
+    """
+
+    type: str
+    index: int
+    content_block: Union[ToolUseBlock, TextBlock]
+
+
+class MessageDelta(TypedDict, total=False):
+    stop_reason: Optional[str]
+
+
+class UsageDelta(TypedDict, total=False):
+    input_tokens: int
+    output_tokens: int
+
+
+class MessageBlockDelta(TypedDict):
+    """
+    Anthropic
+    chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
+    """
+
+    type: Literal["message_delta"]
+    delta: MessageDelta
+    usage: UsageDelta
+
+
+class MessageChunk(TypedDict, total=False):
+    id: str
+    type: str
+    role: str
+    model: str
+    content: List
+    stop_reason: Optional[str]
+    stop_sequence: Optional[str]
+    usage: UsageDelta
+
+
+class MessageStartBlock(TypedDict):
+    """
+        Anthropic
+        chunk = {
+        "type": "message_start",
+        "message": {
+            "id": "msg_vrtx_011PqREFEMzd3REdCoUFAmdG",
+            "type": "message",
+            "role": "assistant",
+            "model": "claude-3-sonnet-20240229",
+            "content": [],
+            "stop_reason": null,
+            "stop_sequence": null,
+            "usage": {
+                "input_tokens": 270,
+                "output_tokens": 1
+            }
+        }
+    }
+    """
+
+    type: Literal["message_start"]
+    message: MessageChunk
+
+
+class AnthropicResponseContentBlockText(BaseModel):
+    type: Literal["text"]
+    text: str
+
+
+class AnthropicResponseContentBlockToolUse(BaseModel):
+    type: Literal["tool_use"]
+    id: str
+    name: str
+    input: dict
+
+
+class AnthropicResponseUsageBlock(BaseModel):
+    input_tokens: int
+    output_tokens: int
+
+
+AnthropicFinishReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+
+
+class AnthropicResponse(BaseModel):
+    id: str
+    """Unique object identifier."""
+
+    type: Literal["message"]
+    """For Messages, this is always "message"."""
+
+    role: Literal["assistant"]
+    """Conversational role of the generated message. This will always be "assistant"."""
+
+    content: List[
+        Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+    ]
+    """Content generated by the model."""
+
+    model: str
+    """The model that handled the request."""
+
+    stop_reason: Optional[AnthropicFinishReason]
+    """The reason that we stopped."""
+
+    stop_sequence: Optional[str]
+    """Which custom stop sequence was generated, if any."""
+
+    usage: AnthropicResponseUsageBlock
+    """Billing and rate-limit usage."""
+
+
+from .openai import ChatCompletionUsageBlock
+
+
+class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+ANTHROPIC_API_HEADERS = {
+    "anthropic-version",
+    "anthropic-beta",
+}
+
+ANTHROPIC_API_ONLY_HEADERS = {  # fails if calling anthropic on vertex ai / bedrock
+    "anthropic-beta",
+}
+
+
+class AnthropicThinkingParam(TypedDict, total=False):
+    type: Literal["enabled"]
+    budget_tokens: int
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py
new file mode 100644
index 00000000..2d597aef
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py
@@ -0,0 +1,17 @@
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+
+from typing_extensions import Required, TypedDict
+
+
+class ImageEmbeddingInput(TypedDict, total=False):
+    image: Required[str]
+    text: str
+
+
+EncodingFormat = Literal["base64", "binary", "float", "int8", "ubinary", "uint8"]
+
+
+class ImageEmbeddingRequest(TypedDict, total=False):
+    input: Required[List[ImageEmbeddingInput]]
+    dimensions: int
+    encoding_format: EncodingFormat
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py
new file mode 100644
index 00000000..57fb04c8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py
@@ -0,0 +1,503 @@
+import json
+from typing import Any, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import (
+    TYPE_CHECKING,
+    Protocol,
+    Required,
+    Self,
+    TypeGuard,
+    get_origin,
+    override,
+    runtime_checkable,
+)
+
+from .openai import ChatCompletionToolCallChunk
+
+
+class CachePointBlock(TypedDict, total=False):
+    type: Literal["default"]
+
+
+class SystemContentBlock(TypedDict, total=False):
+    text: str
+    cachePoint: CachePointBlock
+
+
+class SourceBlock(TypedDict):
+    bytes: Optional[str]  # base 64 encoded string
+
+
+BedrockImageTypes = Literal["png", "jpeg", "gif", "webp"]
+
+
+class ImageBlock(TypedDict):
+    format: Union[BedrockImageTypes, str]
+    source: SourceBlock
+
+
+BedrockDocumentTypes = Literal[
+    "pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"
+]
+
+
+class DocumentBlock(TypedDict):
+    format: Union[BedrockDocumentTypes, str]
+    source: SourceBlock
+    name: str
+
+
+class ToolResultContentBlock(TypedDict, total=False):
+    image: ImageBlock
+    document: DocumentBlock
+    json: dict
+    text: str
+
+
+class ToolResultBlock(TypedDict, total=False):
+    content: Required[List[ToolResultContentBlock]]
+    toolUseId: Required[str]
+    status: Literal["success", "error"]
+
+
+class ToolUseBlock(TypedDict):
+    input: dict
+    name: str
+    toolUseId: str
+
+
+class BedrockConverseReasoningTextBlock(TypedDict, total=False):
+    text: Required[str]
+    signature: str
+
+
+class BedrockConverseReasoningContentBlock(TypedDict, total=False):
+    reasoningText: BedrockConverseReasoningTextBlock
+    redactedContent: str
+
+
+class BedrockConverseReasoningContentBlockDelta(TypedDict, total=False):
+    signature: str
+    redactedContent: str
+    text: str
+
+
+class ContentBlock(TypedDict, total=False):
+    text: str
+    image: ImageBlock
+    document: DocumentBlock
+    toolResult: ToolResultBlock
+    toolUse: ToolUseBlock
+    cachePoint: CachePointBlock
+    reasoningContent: BedrockConverseReasoningContentBlock
+
+
+class MessageBlock(TypedDict):
+    content: List[ContentBlock]
+    role: Literal["user", "assistant"]
+
+
+class ConverseMetricsBlock(TypedDict):
+    latencyMs: float  # time in ms
+
+
+class ConverseResponseOutputBlock(TypedDict):
+    message: Optional[MessageBlock]
+
+
+class ConverseTokenUsageBlock(TypedDict):
+    inputTokens: int
+    outputTokens: int
+    totalTokens: int
+    cacheReadInputTokenCount: int
+    cacheReadInputTokens: int
+    cacheWriteInputTokenCount: int
+    cacheWriteInputTokens: int
+
+
+class ConverseResponseBlock(TypedDict):
+    additionalModelResponseFields: dict
+    metrics: ConverseMetricsBlock
+    output: ConverseResponseOutputBlock
+    stopReason: (
+        str  # end_turn | tool_use | max_tokens | stop_sequence | content_filtered
+    )
+    usage: ConverseTokenUsageBlock
+
+
+class ToolInputSchemaBlock(TypedDict):
+    json: Optional[dict]
+
+
+class ToolSpecBlock(TypedDict, total=False):
+    inputSchema: Required[ToolInputSchemaBlock]
+    name: Required[str]
+    description: str
+
+
+class ToolBlock(TypedDict):
+    toolSpec: Optional[ToolSpecBlock]
+
+
+class SpecificToolChoiceBlock(TypedDict):
+    name: str
+
+
+class ToolChoiceValuesBlock(TypedDict, total=False):
+    any: dict
+    auto: dict
+    tool: SpecificToolChoiceBlock
+
+
+class ToolConfigBlock(TypedDict, total=False):
+    tools: Required[List[ToolBlock]]
+    toolChoice: Union[str, ToolChoiceValuesBlock]
+
+
+class GuardrailConfigBlock(TypedDict, total=False):
+    guardrailIdentifier: str
+    guardrailVersion: str
+    trace: Literal["enabled", "disabled"]
+
+
+class InferenceConfig(TypedDict, total=False):
+    maxTokens: int
+    stopSequences: List[str]
+    temperature: float
+    topP: float
+    topK: int
+
+
+class ToolBlockDeltaEvent(TypedDict):
+    input: str
+
+
+class ToolUseBlockStartEvent(TypedDict):
+    name: str
+    toolUseId: str
+
+
+class ContentBlockStartEvent(TypedDict, total=False):
+    toolUse: Optional[ToolUseBlockStartEvent]
+
+
+class ContentBlockDeltaEvent(TypedDict, total=False):
+    """
+    Either 'text' or 'toolUse' will be specified for Converse API streaming response.
+    """
+
+    text: str
+    toolUse: ToolBlockDeltaEvent
+    reasoningContent: BedrockConverseReasoningContentBlockDelta
+
+
+class CommonRequestObject(
+    TypedDict, total=False
+):  # common request object across sync + async flows
+    additionalModelRequestFields: dict
+    additionalModelResponseFieldPaths: List[str]
+    inferenceConfig: InferenceConfig
+    system: List[SystemContentBlock]
+    toolConfig: ToolConfigBlock
+    guardrailConfig: Optional[GuardrailConfigBlock]
+
+
+class RequestObject(CommonRequestObject, total=False):
+    messages: Required[List[MessageBlock]]
+
+
+class BedrockInvokeNovaRequest(TypedDict, total=False):
+    """
+    Request object for sending `nova` requests to `/bedrock/invoke/`
+    """
+
+    messages: List[MessageBlock]
+    inferenceConfig: InferenceConfig
+    system: List[SystemContentBlock]
+    toolConfig: ToolConfigBlock
+    guardrailConfig: Optional[GuardrailConfigBlock]
+
+
+class GenericStreamingChunk(TypedDict):
+    text: Required[str]
+    tool_use: Optional[ChatCompletionToolCallChunk]
+    is_finished: Required[bool]
+    finish_reason: Required[str]
+    usage: Optional[ConverseTokenUsageBlock]
+    index: int
+
+
+class Document(TypedDict):
+    title: str
+    snippet: str
+
+
+class ServerSentEvent:
+    def __init__(
+        self,
+        *,
+        event: Optional[str] = None,
+        data: Optional[str] = None,
+        id: Optional[str] = None,
+        retry: Optional[int] = None,
+    ) -> None:
+        if data is None:
+            data = ""
+
+        self._id = id
+        self._data = data
+        self._event = event or None
+        self._retry = retry
+
+    @property
+    def event(self) -> Optional[str]:
+        return self._event
+
+    @property
+    def id(self) -> Optional[str]:
+        return self._id
+
+    @property
+    def retry(self) -> Optional[int]:
+        return self._retry
+
+    @property
+    def data(self) -> str:
+        return self._data
+
+    def json(self) -> Any:
+        return json.loads(self.data)
+
+    @override
+    def __repr__(self) -> str:
+        return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
+
+
+COHERE_EMBEDDING_INPUT_TYPES = Literal[
+    "search_document", "search_query", "classification", "clustering", "image"
+]
+
+
+class CohereEmbeddingRequest(TypedDict, total=False):
+    texts: List[str]
+    images: List[str]
+    input_type: Required[COHERE_EMBEDDING_INPUT_TYPES]
+    truncate: Literal["NONE", "START", "END"]
+    embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"]
+
+
+class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest):
+    model: Required[str]
+
+
+class CohereEmbeddingResponse(TypedDict):
+    embeddings: List[List[float]]
+    id: str
+    response_type: Literal["embedding_floats"]
+    texts: List[str]
+
+
+class AmazonTitanV2EmbeddingRequest(TypedDict):
+    inputText: str
+    dimensions: int
+    normalize: bool
+
+
+class AmazonTitanV2EmbeddingResponse(TypedDict):
+    embedding: List[float]
+    inputTextTokenCount: int
+
+
+class AmazonTitanG1EmbeddingRequest(TypedDict):
+    inputText: str
+
+
+class AmazonTitanG1EmbeddingResponse(TypedDict):
+    embedding: List[float]
+    inputTextTokenCount: int
+
+
+class AmazonTitanMultimodalEmbeddingConfig(TypedDict):
+    outputEmbeddingLength: Literal[256, 384, 1024]
+
+
+class AmazonTitanMultimodalEmbeddingRequest(TypedDict, total=False):
+    inputText: str
+    inputImage: str
+    embeddingConfig: AmazonTitanMultimodalEmbeddingConfig
+
+
+class AmazonTitanMultimodalEmbeddingResponse(TypedDict):
+    embedding: List[float]
+    inputTextTokenCount: int
+    message: str  # Specifies any errors that occur during generation.
+
+
+AmazonEmbeddingRequest = Union[
+    AmazonTitanMultimodalEmbeddingRequest,
+    AmazonTitanV2EmbeddingRequest,
+    AmazonTitanG1EmbeddingRequest,
+]
+
+
+class AmazonStability3TextToImageRequest(TypedDict, total=False):
+    """
+    Request for Amazon Stability 3 Text to Image API
+
+    Ref here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html
+    """
+
+    prompt: str
+    aspect_ratio: Literal[
+        "16:9", "1:1", "21:9", "2:3", "3:2", "4:5", "5:4", "9:16", "9:21"
+    ]
+    mode: Literal["image-to-image", "text-to-image"]
+    output_format: Literal["JPEG", "PNG"]
+    seed: int
+    negative_prompt: str
+
+
+class AmazonStability3TextToImageResponse(TypedDict, total=False):
+    """
+    Response for Amazon Stability 3 Text to Image API
+
+    Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html
+    """
+
+    images: List[str]
+    seeds: List[str]
+    finish_reasons: List[str]
+
+
+class AmazonNovaCanvasRequestBase(TypedDict, total=False):
+    """
+    Base class for Amazon Nova Canvas API requests
+    """
+
+    pass
+
+
+class AmazonNovaCanvasImageGenerationConfig(TypedDict, total=False):
+    """
+    Config for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    cfgScale: int
+    seed: int
+    quality: Literal["standard", "premium"]
+    width: int
+    height: int
+    numberOfImages: int
+
+
+class AmazonNovaCanvasTextToImageParams(TypedDict, total=False):
+    """
+    Params for Amazon Nova Canvas Text to Image API
+    """
+
+    text: str
+    negativeText: str
+    controlStrength: float
+    controlMode: Literal["CANNY_EDIT", "SEGMENTATION"]
+    conditionImage: str
+
+
+class AmazonNovaCanvasTextToImageRequest(
+    AmazonNovaCanvasRequestBase, TypedDict, total=False
+):
+    """
+    Request for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    textToImageParams: AmazonNovaCanvasTextToImageParams
+    taskType: Literal["TEXT_IMAGE"]
+    imageGenerationConfig: AmazonNovaCanvasImageGenerationConfig
+
+
+class AmazonNovaCanvasTextToImageResponse(TypedDict, total=False):
+    """
+    Response for Amazon Nova Canvas Text to Image API
+
+    Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+    """
+
+    images: List[str]
+
+
+if TYPE_CHECKING:
+    from botocore.awsrequest import AWSPreparedRequest
+else:
+    AWSPreparedRequest = Any
+
+from pydantic import BaseModel
+
+
+class BedrockPreparedRequest(TypedDict):
+    """
+    Internal/Helper class for preparing the request for bedrock image generation
+    """
+
+    endpoint_url: str
+    prepped: AWSPreparedRequest
+    body: bytes
+    data: dict
+
+
+class BedrockRerankTextQuery(TypedDict):
+    text: str
+
+
+class BedrockRerankQuery(TypedDict):
+    textQuery: BedrockRerankTextQuery
+    type: Literal["TEXT"]
+
+
+class BedrockRerankModelConfiguration(TypedDict, total=False):
+    modelArn: Required[str]
+    modelConfiguration: dict
+
+
+class BedrockRerankBedrockRerankingConfiguration(TypedDict):
+    modelConfiguration: BedrockRerankModelConfiguration
+    numberOfResults: int
+
+
+class BedrockRerankConfiguration(TypedDict):
+    bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration
+    type: Literal["BEDROCK_RERANKING_MODEL"]
+
+
+class BedrockRerankTextDocument(TypedDict, total=False):
+    text: str
+
+
+class BedrockRerankInlineDocumentSource(TypedDict, total=False):
+    jsonDocument: dict
+    textDocument: BedrockRerankTextDocument
+    type: Literal["TEXT", "JSON"]
+
+
+class BedrockRerankSource(TypedDict):
+    inlineDocumentSource: BedrockRerankInlineDocumentSource
+    type: Literal["INLINE"]
+
+
+class BedrockRerankRequest(TypedDict):
+    """
+    Request for Bedrock Rerank API
+    """
+
+    queries: List[BedrockRerankQuery]
+    rerankingConfiguration: BedrockRerankConfiguration
+    sources: List[BedrockRerankSource]
+
+
+class AmazonDeepSeekR1StreamingResponse(TypedDict):
+    generation: str
+    generation_token_count: int
+    stop_reason: Optional[str]
+    prompt_token_count: int
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py
new file mode 100644
index 00000000..7112a242
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py
@@ -0,0 +1,46 @@
+from typing import Iterable, List, Optional, Union
+
+from typing_extensions import Literal, Required, TypedDict
+
+
+class CallObject(TypedDict):
+    name: str
+    parameters: dict
+
+
+class ToolResultObject(TypedDict):
+    call: CallObject
+    outputs: List[dict]
+
+
+class ChatHistoryToolResult(TypedDict, total=False):
+    role: Required[Literal["TOOL"]]
+    tool_results: List[ToolResultObject]
+
+
+class ToolCallObject(TypedDict):
+    name: str
+    parameters: dict
+
+
+class ChatHistoryUser(TypedDict, total=False):
+    role: Required[Literal["USER"]]
+    message: str
+    tool_calls: List[ToolCallObject]
+
+
+class ChatHistorySystem(TypedDict, total=False):
+    role: Required[Literal["SYSTEM"]]
+    message: str
+    tool_calls: List[ToolCallObject]
+
+
+class ChatHistoryChatBot(TypedDict, total=False):
+    role: Required[Literal["CHATBOT"]]
+    message: str
+    tool_calls: List[ToolCallObject]
+
+
+ChatHistory = List[
+    Union[ChatHistorySystem, ChatHistoryChatBot, ChatHistoryUser, ChatHistoryToolResult]
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py
new file mode 100644
index 00000000..5eec187d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py
@@ -0,0 +1,24 @@
+import ssl
+from enum import Enum
+from typing import Union
+
+
+class httpxSpecialProvider(str, Enum):
+    """
+    Httpx Clients can be created for these litellm internal providers
+
+    Example:
+    - langsmith logging would need a custom async httpx client
+    - pass through endpoint would need a custom async httpx client
+    """
+
+    LoggingCallback = "logging_callback"
+    GuardrailCallback = "guardrail_callback"
+    Caching = "caching"
+    Oauth2Check = "oauth2_check"
+    SecretManager = "secret_manager"
+    PassThroughEndpoint = "pass_through_endpoint"
+    PromptFactory = "prompt_factory"
+
+
+VerifyTypes = Union[str, bool, ssl.SSLContext]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py
new file mode 100644
index 00000000..d5499a41
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from typing_extensions import Dict, Required, TypedDict, override
+
+from litellm.llms.custom_llm import CustomLLM
+
+
+class CustomLLMItem(TypedDict):
+    provider: str
+    custom_handler: CustomLLM
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py
new file mode 100644
index 00000000..770e05fe
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py
@@ -0,0 +1,21 @@
+from typing import TypedDict, Any, Union, Optional
+import json
+from typing_extensions import (
+    Self,
+    Protocol,
+    TypeGuard,
+    override,
+    get_origin,
+    runtime_checkable,
+    Required,
+)
+from pydantic import BaseModel
+
+
+class GenericStreamingChunk(TypedDict, total=False):
+    text: Required[str]
+    is_finished: Required[bool]
+    finish_reason: Required[Optional[str]]
+    logprobs: Optional[BaseModel]
+    original_chunk: Optional[BaseModel]
+    usage: Optional[BaseModel]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py
new file mode 100644
index 00000000..e9563a9a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py
@@ -0,0 +1,12 @@
+from typing import List, Literal, Optional, TypedDict, Union
+
+
+class FunctionCall(TypedDict):
+    name: Optional[str]
+    arguments: Optional[Union[str, dict]]
+
+
+class MistralToolCallMessage(TypedDict):
+    id: Optional[str]
+    type: Literal["function"]
+    function: Optional[FunctionCall]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py
new file mode 100644
index 00000000..9d71904c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py
@@ -0,0 +1,29 @@
+import json
+from typing import Any, List, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+from typing_extensions import (
+    Protocol,
+    Required,
+    Self,
+    TypeGuard,
+    get_origin,
+    override,
+    runtime_checkable,
+)
+
+
+class OllamaToolCallFunction(
+    TypedDict
+):  # follows - https://github.com/ollama/ollama/blob/6bd8a4b0a1ac15d5718f52bbe1cd56f827beb694/api/types.go#L148
+    name: str
+    arguments: dict
+
+
+class OllamaToolCall(TypedDict):
+    function: OllamaToolCallFunction
+
+
+class OllamaVisionModelObject(TypedDict):
+    prompt: str
+    images: List[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py
new file mode 100644
index 00000000..4b0be9d5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py
@@ -0,0 +1,1040 @@
+from enum import Enum
+from os import PathLike
+from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
+
+import httpx
+from openai._legacy_response import (
+    HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
+)
+from openai.lib.streaming._assistants import (
+    AssistantEventHandler,
+    AssistantStreamManager,
+    AsyncAssistantEventHandler,
+    AsyncAssistantStreamManager,
+)
+from openai.pagination import AsyncCursorPage, SyncCursorPage
+from openai.types import Batch, EmbeddingCreateParams, FileObject
+from openai.types.beta.assistant import Assistant
+from openai.types.beta.assistant_tool_param import AssistantToolParam
+from openai.types.beta.thread_create_params import (
+    Message as OpenAICreateThreadParamsMessage,
+)
+from openai.types.beta.threads.message import Message as OpenAIMessage
+from openai.types.beta.threads.message_content import MessageContent
+from openai.types.beta.threads.run import Run
+from openai.types.chat import ChatCompletionChunk
+from openai.types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from openai.types.chat.chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam,
+)
+from openai.types.chat.chat_completion_modality import ChatCompletionModality
+from openai.types.chat.chat_completion_prediction_content_param import (
+    ChatCompletionPredictionContentParam,
+)
+from openai.types.embedding import Embedding as OpenAIEmbedding
+from openai.types.fine_tuning.fine_tuning_job import FineTuningJob
+from openai.types.responses.response import (
+    IncompleteDetails,
+    Response,
+    ResponseOutputItem,
+    ResponseTextConfig,
+    Tool,
+    ToolChoice,
+)
+from openai.types.responses.response_create_params import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from pydantic import BaseModel, Discriminator, Field, PrivateAttr
+from typing_extensions import Annotated, Dict, Required, TypedDict, override
+
+FileContent = Union[IO[bytes], bytes, PathLike]
+
+FileTypes = Union[
+    # file (or bytes)
+    FileContent,
+    # (filename, file (or bytes))
+    Tuple[Optional[str], FileContent],
+    # (filename, file (or bytes), content_type)
+    Tuple[Optional[str], FileContent, Optional[str]],
+    # (filename, file (or bytes), content_type, headers)
+    Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
+]
+
+
+EmbeddingInput = Union[str, List[str]]
+
+
+class HttpxBinaryResponseContent(_HttpxBinaryResponseContent):
+    _hidden_params: dict = {}
+    pass
+
+
+class NotGiven:
+    """
+    A sentinel singleton class used to distinguish omitted keyword arguments
+    from those passed in with the value None (which may have different behavior).
+
+    For example:
+
+    ```py
+    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
+        ...
+
+
+    get(timeout=1)  # 1s timeout
+    get(timeout=None)  # No timeout
+    get()  # Default timeout behavior, which may not be statically known at the method definition.
+    ```
+    """
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+    @override
+    def __repr__(self) -> str:
+        return "NOT_GIVEN"
+
+
+NOT_GIVEN = NotGiven()
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+    available to the `code_interpreter` tool. There can be a maximum of 20 files
+    associated with the tool.
+    """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    file_ids: List[str]
+    """
+    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+    add to the vector store. There can be a maximum of 10000 files in a vector
+    store.
+    """
+
+    metadata: object
+    """Set of 16 key-value pairs that can be attached to a vector store.
+
+    This can be useful for storing additional information about the vector store in
+    a structured format. Keys can be a maximum of 64 characters long and values can
+    be a maxium of 512 characters long.
+    """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+    vector_store_ids: List[str]
+    """
+    The
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    attached to this thread. There can be a maximum of 1 vector store attached to
+    the thread.
+    """
+
+    vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+    """
+    A helper to create a
+    [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+    with file_ids and attach it to this thread. There can be a maximum of 1 vector
+    store attached to the thread.
+    """
+
+
+class OpenAICreateThreadParamsToolResources(TypedDict, total=False):
+    code_interpreter: ToolResourcesCodeInterpreter
+
+    file_search: ToolResourcesFileSearch
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+    type: Required[Literal["code_interpreter"]]
+    """The type of tool being defined: `code_interpreter`"""
+
+
+AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class Attachment(TypedDict, total=False):
+    file_id: str
+    """The ID of the file to attach to the message."""
+
+    tools: Iterable[AttachmentTool]
+    """The tools to add this file to."""
+
+
+class ImageFileObject(TypedDict):
+    file_id: Required[str]
+    detail: Optional[str]
+
+
+class ImageURLObject(TypedDict):
+    url: Required[str]
+    detail: Optional[str]
+
+
+class MessageContentTextObject(TypedDict):
+    type: Required[Literal["text"]]
+    text: str
+
+
+class MessageContentImageFileObject(TypedDict):
+    type: Literal["image_file"]
+    image_file: ImageFileObject
+
+
+class MessageContentImageURLObject(TypedDict):
+    type: Required[str]
+    image_url: ImageURLObject
+
+
+class MessageData(TypedDict):
+    role: Literal["user", "assistant"]
+    content: Union[
+        str,
+        List[
+            Union[
+                MessageContentTextObject,
+                MessageContentImageFileObject,
+                MessageContentImageURLObject,
+            ]
+        ],
+    ]
+    attachments: Optional[List[Attachment]]
+    metadata: Optional[dict]
+
+
+class Thread(BaseModel):
+    id: str
+    """The identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the thread was created."""
+
+    metadata: Optional[object] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format. Keys can be a maximum of 64 characters long and values can be
+    a maxium of 512 characters long.
+    """
+
+    object: Literal["thread"]
+    """The object type, which is always `thread`."""
+
+
+# OpenAI Files Types
+class CreateFileRequest(TypedDict, total=False):
+    """
+    CreateFileRequest
+    Used by Assistants API, Batches API, and Fine-Tunes API
+
+    Required Params:
+        file: FileTypes
+        purpose: Literal['assistants', 'batch', 'fine-tune']
+
+    Optional Params:
+        extra_headers: Optional[Dict[str, str]]
+        extra_body: Optional[Dict[str, str]] = None
+        timeout: Optional[float] = None
+    """
+
+    file: FileTypes
+    purpose: Literal["assistants", "batch", "fine-tune"]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class FileContentRequest(TypedDict, total=False):
+    """
+    FileContentRequest
+    Used by Assistants API, Batches API, and Fine-Tunes API
+
+    Required Params:
+        file_id: str
+
+    Optional Params:
+        extra_headers: Optional[Dict[str, str]]
+        extra_body: Optional[Dict[str, str]] = None
+        timeout: Optional[float] = None
+    """
+
+    file_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+# OpenAI Batches Types
+class CreateBatchRequest(TypedDict, total=False):
+    """
+    CreateBatchRequest
+    """
+
+    completion_window: Literal["24h"]
+    endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]
+    input_file_id: str
+    metadata: Optional[Dict[str, str]]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class RetrieveBatchRequest(TypedDict, total=False):
+    """
+    RetrieveBatchRequest
+    """
+
+    batch_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class CancelBatchRequest(TypedDict, total=False):
+    """
+    CancelBatchRequest
+    """
+
+    batch_id: str
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+class ListBatchRequest(TypedDict, total=False):
+    """
+    ListBatchRequest - List your organization's batches
+    Calls https://api.openai.com/v1/batches
+    """
+
+    after: Union[str, NotGiven]
+    limit: Union[int, NotGiven]
+    extra_headers: Optional[Dict[str, str]]
+    extra_body: Optional[Dict[str, str]]
+    timeout: Optional[float]
+
+
+BatchJobStatus = Literal[
+    "validating",
+    "failed",
+    "in_progress",
+    "finalizing",
+    "completed",
+    "expired",
+    "cancelling",
+    "cancelled",
+]
+
+
+class ChatCompletionAudioDelta(TypedDict, total=False):
+    data: str
+    transcript: str
+    expires_at: int
+    id: str
+
+
+class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
+    name: Optional[str]
+    arguments: str
+
+
+class ChatCompletionAssistantToolCall(TypedDict):
+    id: Optional[str]
+    type: Literal["function"]
+    function: ChatCompletionToolCallFunctionChunk
+
+
+class ChatCompletionToolCallChunk(TypedDict):  # result of /chat/completions call
+    id: Optional[str]
+    type: Literal["function"]
+    function: ChatCompletionToolCallFunctionChunk
+    index: int
+
+
+class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
+    id: str
+    type: Literal["function"]
+    function: ChatCompletionToolCallFunctionChunk
+    index: int
+
+
+class ChatCompletionCachedContent(TypedDict):
+    type: Literal["ephemeral"]
+
+
+class ChatCompletionThinkingBlock(TypedDict, total=False):
+    type: Required[Literal["thinking"]]
+    thinking: str
+    signature: str
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class OpenAIChatCompletionTextObject(TypedDict):
+    type: Literal["text"]
+    text: str
+
+
+class ChatCompletionTextObject(
+    OpenAIChatCompletionTextObject, total=False
+):  # litellm wrapper on top of openai object for handling cached content
+    cache_control: ChatCompletionCachedContent
+
+
+class ChatCompletionImageUrlObject(TypedDict, total=False):
+    url: Required[str]
+    detail: str
+    format: str
+
+
+class ChatCompletionImageObject(TypedDict):
+    type: Literal["image_url"]
+    image_url: Union[str, ChatCompletionImageUrlObject]
+
+
+class ChatCompletionVideoUrlObject(TypedDict, total=False):
+    url: Required[str]
+    detail: str
+
+
+class ChatCompletionVideoObject(TypedDict):
+    type: Literal["video_url"]
+    video_url: Union[str, ChatCompletionVideoUrlObject]
+
+
+class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
+    pass
+
+
+class DocumentObject(TypedDict):
+    type: Literal["text"]
+    media_type: str
+    data: str
+
+
+class CitationsObject(TypedDict):
+    enabled: bool
+
+
+class ChatCompletionDocumentObject(TypedDict):
+    type: Literal["document"]
+    source: DocumentObject
+    title: str
+    context: str
+    citations: Optional[CitationsObject]
+
+
+class ChatCompletionFileObjectFile(TypedDict):
+    file_data: Optional[str]
+    file_id: Optional[str]
+    filename: Optional[str]
+
+
+class ChatCompletionFileObject(TypedDict):
+    type: Literal["file"]
+    file: ChatCompletionFileObjectFile
+
+
+OpenAIMessageContentListBlock = Union[
+    ChatCompletionTextObject,
+    ChatCompletionImageObject,
+    ChatCompletionAudioObject,
+    ChatCompletionDocumentObject,
+    ChatCompletionVideoObject,
+    ChatCompletionFileObject,
+]
+
+OpenAIMessageContent = Union[
+    str,
+    Iterable[OpenAIMessageContentListBlock],
+]
+
+# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
+AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
+
+
+class OpenAIChatCompletionUserMessage(TypedDict):
+    role: Literal["user"]
+    content: OpenAIMessageContent
+
+
+class OpenAITextCompletionUserMessage(TypedDict):
+    role: Literal["user"]
+    content: AllPromptValues
+
+
+class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
+    cache_control: ChatCompletionCachedContent
+
+
+class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    content: Optional[
+        Union[
+            str, Iterable[Union[ChatCompletionTextObject, ChatCompletionThinkingBlock]]
+        ]
+    ]
+    name: Optional[str]
+    tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
+    function_call: Optional[ChatCompletionToolCallFunctionChunk]
+
+
+class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
+    cache_control: ChatCompletionCachedContent
+    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+
+
+class ChatCompletionToolMessage(TypedDict):
+    role: Literal["tool"]
+    content: Union[str, Iterable[ChatCompletionTextObject]]
+    tool_call_id: str
+
+
+class ChatCompletionFunctionMessage(TypedDict):
+    role: Literal["function"]
+    content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
+    name: str
+    tool_call_id: Optional[str]
+
+
+class OpenAIChatCompletionSystemMessage(TypedDict, total=False):
+    role: Required[Literal["system"]]
+    content: Required[Union[str, List]]
+    name: str
+
+
+class OpenAIChatCompletionDeveloperMessage(TypedDict, total=False):
+    role: Required[Literal["developer"]]
+    content: Required[Union[str, List]]
+    name: str
+
+
+class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False):
+    cache_control: ChatCompletionCachedContent
+
+
+class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total=False):
+    cache_control: ChatCompletionCachedContent
+
+
+ValidUserMessageContentTypes = [
+    "text",
+    "image_url",
+    "input_audio",
+    "document",
+    "video_url",
+    "file",
+]  # used for validating user messages. Prevent users from accidentally sending anthropic messages.
+
+AllMessageValues = Union[
+    ChatCompletionUserMessage,
+    ChatCompletionAssistantMessage,
+    ChatCompletionToolMessage,
+    ChatCompletionSystemMessage,
+    ChatCompletionFunctionMessage,
+    ChatCompletionDeveloperMessage,
+]
+
+
+class ChatCompletionToolChoiceFunctionParam(TypedDict):
+    name: str
+
+
+class ChatCompletionToolChoiceObjectParam(TypedDict):
+    type: Literal["function"]
+    function: ChatCompletionToolChoiceFunctionParam
+
+
+ChatCompletionToolChoiceStringValues = Literal["none", "auto", "required"]
+
+ChatCompletionToolChoiceValues = Union[
+    ChatCompletionToolChoiceStringValues, ChatCompletionToolChoiceObjectParam
+]
+
+
+class ChatCompletionToolParamFunctionChunk(TypedDict, total=False):
+    name: Required[str]
+    description: str
+    parameters: dict
+
+
+class OpenAIChatCompletionToolParam(TypedDict):
+    type: Union[Literal["function"], str]
+    function: ChatCompletionToolParamFunctionChunk
+
+
+class ChatCompletionToolParam(OpenAIChatCompletionToolParam, total=False):
+    cache_control: ChatCompletionCachedContent
+
+
+class Function(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
+    function: Required[Function]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChatCompletionRequest(TypedDict, total=False):
+    model: Required[str]
+    messages: Required[List[AllMessageValues]]
+    frequency_penalty: float
+    logit_bias: dict
+    logprobs: bool
+    top_logprobs: int
+    max_tokens: int
+    n: int
+    presence_penalty: float
+    response_format: dict
+    seed: int
+    service_tier: str
+    stop: Union[str, List[str]]
+    stream_options: dict
+    temperature: float
+    top_p: float
+    tools: List[ChatCompletionToolParam]
+    tool_choice: ChatCompletionToolChoiceValues
+    parallel_tool_calls: bool
+    function_call: Union[str, dict]
+    functions: List
+    user: str
+    metadata: dict  # litellm specific param
+
+
+class ChatCompletionDeltaChunk(TypedDict, total=False):
+    content: Optional[str]
+    tool_calls: List[ChatCompletionDeltaToolCallChunk]
+    role: str
+
+
+ChatCompletionAssistantContentValue = (
+    str  # keep as var, used in stream_chunk_builder as well
+)
+
+
+class ChatCompletionResponseMessage(TypedDict, total=False):
+    content: Optional[ChatCompletionAssistantContentValue]
+    tool_calls: Optional[List[ChatCompletionToolCallChunk]]
+    role: Literal["assistant"]
+    function_call: Optional[ChatCompletionToolCallFunctionChunk]
+    provider_specific_fields: Optional[dict]
+    reasoning_content: Optional[str]
+    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+
+
+class ChatCompletionUsageBlock(TypedDict):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+class OpenAIChatCompletionChunk(ChatCompletionChunk):
+    def __init__(self, **kwargs):
+        # Set the 'object' kwarg to 'chat.completion.chunk'
+        kwargs["object"] = "chat.completion.chunk"
+        super().__init__(**kwargs)
+
+
+class Hyperparameters(BaseModel):
+    batch_size: Optional[Union[str, int]] = None  # "Number of examples in each batch."
+    learning_rate_multiplier: Optional[Union[str, float]] = (
+        None  # Scaling factor for the learning rate
+    )
+    n_epochs: Optional[Union[str, int]] = (
+        None  # "The number of epochs to train the model for"
+    )
+
+
+class FineTuningJobCreate(BaseModel):
+    """
+    FineTuningJobCreate - Create a fine-tuning job
+
+    Example Request
+    ```
+    {
+        "model": "gpt-3.5-turbo",
+        "training_file": "file-abc123",
+        "hyperparameters": {
+            "batch_size": "auto",
+            "learning_rate_multiplier": 0.1,
+            "n_epochs": 3
+        },
+        "suffix": "custom-model-name",
+        "validation_file": "file-xyz789",
+        "integrations": ["slack"],
+        "seed": 42
+    }
+    ```
+    """
+
+    model: str  # "The name of the model to fine-tune."
+    training_file: str  # "The ID of an uploaded file that contains training data."
+    hyperparameters: Optional[Hyperparameters] = (
+        None  # "The hyperparameters used for the fine-tuning job."
+    )
+    suffix: Optional[str] = (
+        None  # "A string of up to 18 characters that will be added to your fine-tuned model name."
+    )
+    validation_file: Optional[str] = (
+        None  # "The ID of an uploaded file that contains validation data."
+    )
+    integrations: Optional[List[str]] = (
+        None  # "A list of integrations to enable for your fine-tuning job."
+    )
+    seed: Optional[int] = None  # "The seed controls the reproducibility of the job."
+
+
+class LiteLLMFineTuningJobCreate(FineTuningJobCreate):
+    custom_llm_provider: Literal["openai", "azure", "vertex_ai"]
+
+    class Config:
+        extra = "allow"  # This allows the model to accept additional fields
+
+
+AllEmbeddingInputValues = Union[str, List[str], List[int], List[List[int]]]
+
+OpenAIAudioTranscriptionOptionalParams = Literal[
+    "language", "prompt", "temperature", "response_format", "timestamp_granularities"
+]
+
+
+OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
+
+
+class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
+    """TypedDict for Optional parameters supported by the responses API."""
+
+    include: Optional[List[ResponseIncludable]]
+    instructions: Optional[str]
+    max_output_tokens: Optional[int]
+    metadata: Optional[Dict[str, Any]]
+    parallel_tool_calls: Optional[bool]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    store: Optional[bool]
+    stream: Optional[bool]
+    temperature: Optional[float]
+    text: Optional[ResponseTextConfigParam]
+    tool_choice: Optional[ToolChoice]
+    tools: Optional[Iterable[ToolParam]]
+    top_p: Optional[float]
+    truncation: Optional[Literal["auto", "disabled"]]
+    user: Optional[str]
+
+
+class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
+    """TypedDict for request parameters supported by the responses API."""
+
+    input: Union[str, ResponseInputParam]
+    model: str
+
+
+class BaseLiteLLMOpenAIResponseObject(BaseModel):
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def get(self, key, default=None):
+        return self.__dict__.get(key, default)
+
+    def __contains__(self, key):
+        return key in self.__dict__
+
+    def items(self):
+        return self.__dict__.items()
+
+
+class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
+    reasoning_tokens: int
+
+    model_config = {"extra": "allow"}
+
+
+class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject):
+    input_tokens: int
+    """The number of input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: Optional[OutputTokensDetails]
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+    model_config = {"extra": "allow"}
+
+
+class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
+    id: str
+    created_at: float
+    error: Optional[dict]
+    incomplete_details: Optional[IncompleteDetails]
+    instructions: Optional[str]
+    metadata: Optional[Dict]
+    model: Optional[str]
+    object: Optional[str]
+    output: List[ResponseOutputItem]
+    parallel_tool_calls: bool
+    temperature: Optional[float]
+    tool_choice: ToolChoice
+    tools: List[Tool]
+    top_p: Optional[float]
+    max_output_tokens: Optional[int]
+    previous_response_id: Optional[str]
+    reasoning: Optional[Reasoning]
+    status: Optional[str]
+    text: Optional[ResponseTextConfig]
+    truncation: Optional[Literal["auto", "disabled"]]
+    usage: Optional[ResponseAPIUsage]
+    user: Optional[str]
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponsesAPIStreamEvents(str, Enum):
+    """
+    Enum representing all supported OpenAI stream event types for the Responses API.
+
+    Inherits from str to allow direct string comparison and usage as dictionary keys.
+    """
+
+    # Response lifecycle events
+    RESPONSE_CREATED = "response.created"
+    RESPONSE_IN_PROGRESS = "response.in_progress"
+    RESPONSE_COMPLETED = "response.completed"
+    RESPONSE_FAILED = "response.failed"
+    RESPONSE_INCOMPLETE = "response.incomplete"
+
+    # Output item events
+    OUTPUT_ITEM_ADDED = "response.output_item.added"
+    OUTPUT_ITEM_DONE = "response.output_item.done"
+
+    # Content part events
+    CONTENT_PART_ADDED = "response.content_part.added"
+    CONTENT_PART_DONE = "response.content_part.done"
+
+    # Output text events
+    OUTPUT_TEXT_DELTA = "response.output_text.delta"
+    OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added"
+    OUTPUT_TEXT_DONE = "response.output_text.done"
+
+    # Refusal events
+    REFUSAL_DELTA = "response.refusal.delta"
+    REFUSAL_DONE = "response.refusal.done"
+
+    # Function call events
+    FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
+    FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
+
+    # File search events
+    FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress"
+    FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching"
+    FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed"
+
+    # Web search events
+    WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress"
+    WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching"
+    WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed"
+
+    # Error event
+    ERROR = "error"
+
+
+class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
+    response: ResponsesAPIResponse
+
+
+class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
+    response: ResponsesAPIResponse
+
+
+class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
+    response: ResponsesAPIResponse
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
+    response: ResponsesAPIResponse
+
+
+class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
+    response: ResponsesAPIResponse
+
+
+class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
+    output_index: int
+    item: dict
+
+
+class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
+    output_index: int
+    item: dict
+
+
+class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict
+
+
+class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
+    item_id: str
+    output_index: int
+    content_index: int
+    annotation_index: int
+    annotation: dict
+
+
+class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    text: str
+
+
+class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+
+
+class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
+    item_id: str
+    output_index: int
+    content_index: int
+    refusal: str
+
+
+class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
+    item_id: str
+    output_index: int
+    delta: str
+
+
+class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
+    item_id: str
+    output_index: int
+    arguments: str
+
+
+class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
+    output_index: int
+    item_id: str
+
+
+class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
+    output_index: int
+    item_id: str
+
+
+class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
+    type: Literal[ResponsesAPIStreamEvents.ERROR]
+    code: Optional[str]
+    message: str
+    param: Optional[str]
+
+
+# Union type for all possible streaming responses
+ResponsesAPIStreamingResponse = Annotated[
+    Union[
+        ResponseCreatedEvent,
+        ResponseInProgressEvent,
+        ResponseCompletedEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        OutputItemAddedEvent,
+        OutputItemDoneEvent,
+        ContentPartAddedEvent,
+        ContentPartDoneEvent,
+        OutputTextDeltaEvent,
+        OutputTextAnnotationAddedEvent,
+        OutputTextDoneEvent,
+        RefusalDeltaEvent,
+        RefusalDoneEvent,
+        FunctionCallArgumentsDeltaEvent,
+        FunctionCallArgumentsDoneEvent,
+        FileSearchCallInProgressEvent,
+        FileSearchCallSearchingEvent,
+        FileSearchCallCompletedEvent,
+        WebSearchCallInProgressEvent,
+        WebSearchCallSearchingEvent,
+        WebSearchCallCompletedEvent,
+        ErrorEvent,
+    ],
+    Discriminator("type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py
new file mode 100644
index 00000000..f781af88
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py
@@ -0,0 +1,19 @@
+import json
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
+
+from typing_extensions import (
+    Protocol,
+    Required,
+    Self,
+    TypeGuard,
+    get_origin,
+    override,
+    runtime_checkable,
+)
+
+
+class InfinityRerankResult(TypedDict):
+    index: int
+    relevance_score: float
+    document: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py
new file mode 100644
index 00000000..7024909a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py
@@ -0,0 +1,486 @@
+import json
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
+
+from typing_extensions import (
+    Protocol,
+    Required,
+    Self,
+    TypeGuard,
+    get_origin,
+    override,
+    runtime_checkable,
+)
+
+
+class FunctionResponse(TypedDict):
+    name: str
+    response: Optional[dict]
+
+
+class FunctionCall(TypedDict):
+    name: str
+    args: Optional[dict]
+
+
+class FileDataType(TypedDict):
+    mime_type: str
+    file_uri: str  # the cloud storage uri of storing this file
+
+
+class BlobType(TypedDict):
+    mime_type: Required[str]
+    data: Required[str]
+
+
+class PartType(TypedDict, total=False):
+    text: str
+    inline_data: BlobType
+    file_data: FileDataType
+    function_call: FunctionCall
+    function_response: FunctionResponse
+
+
+class HttpxFunctionCall(TypedDict):
+    name: str
+    args: dict
+
+
+class HttpxExecutableCode(TypedDict):
+    code: str
+    language: str
+
+
+class HttpxCodeExecutionResult(TypedDict):
+    outcome: str
+    output: str
+
+
+class HttpxPartType(TypedDict, total=False):
+    text: str
+    inline_data: BlobType
+    file_data: FileDataType
+    functionCall: HttpxFunctionCall
+    function_response: FunctionResponse
+    executableCode: HttpxExecutableCode
+    codeExecutionResult: HttpxCodeExecutionResult
+
+
+class HttpxContentType(TypedDict, total=False):
+    role: Literal["user", "model"]
+    parts: List[HttpxPartType]
+
+
+class ContentType(TypedDict, total=False):
+    role: Literal["user", "model"]
+    parts: Required[List[PartType]]
+
+
+class SystemInstructions(TypedDict):
+    parts: Required[List[PartType]]
+
+
+class Schema(TypedDict, total=False):
+    type: Literal["STRING", "INTEGER", "BOOLEAN", "NUMBER", "ARRAY", "OBJECT"]
+    description: str
+    enum: List[str]
+    items: List["Schema"]
+    properties: "Schema"
+    required: List[str]
+    nullable: bool
+
+
+class FunctionDeclaration(TypedDict, total=False):
+    name: Required[str]
+    description: str
+    parameters: Union[Schema, dict]
+    response: Schema
+
+
+class VertexAISearch(TypedDict, total=False):
+    datastore: Required[str]
+
+
+class Retrieval(TypedDict):
+    source: VertexAISearch
+
+
+class FunctionCallingConfig(TypedDict, total=False):
+    mode: Literal["ANY", "AUTO", "NONE"]
+    allowed_function_names: List[str]
+
+
+HarmCategory = Literal[
+    "HARM_CATEGORY_UNSPECIFIED",
+    "HARM_CATEGORY_HATE_SPEECH",
+    "HARM_CATEGORY_DANGEROUS_CONTENT",
+    "HARM_CATEGORY_HARASSMENT",
+    "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+]
+HarmBlockThreshold = Literal[
+    "HARM_BLOCK_THRESHOLD_UNSPECIFIED",
+    "BLOCK_LOW_AND_ABOVE",
+    "BLOCK_MEDIUM_AND_ABOVE",
+    "BLOCK_ONLY_HIGH",
+    "BLOCK_NONE",
+]
+HarmBlockMethod = Literal["HARM_BLOCK_METHOD_UNSPECIFIED", "SEVERITY", "PROBABILITY"]
+
+HarmProbability = Literal[
+    "HARM_PROBABILITY_UNSPECIFIED", "NEGLIGIBLE", "LOW", "MEDIUM", "HIGH"
+]
+
+HarmSeverity = Literal[
+    "HARM_SEVERITY_UNSPECIFIED",
+    "HARM_SEVERITY_NEGLIGIBLE",
+    "HARM_SEVERITY_LOW",
+    "HARM_SEVERITY_MEDIUM",
+    "HARM_SEVERITY_HIGH",
+]
+
+
+class SafetSettingsConfig(TypedDict, total=False):
+    category: HarmCategory
+    threshold: HarmBlockThreshold
+    max_influential_terms: int
+    method: HarmBlockMethod
+
+
+class GenerationConfig(TypedDict, total=False):
+    temperature: float
+    top_p: float
+    top_k: float
+    candidate_count: int
+    max_output_tokens: int
+    stop_sequences: List[str]
+    presence_penalty: float
+    frequency_penalty: float
+    response_mime_type: Literal["text/plain", "application/json"]
+    response_schema: dict
+    seed: int
+    responseLogprobs: bool
+    logprobs: int
+
+
+class Tools(TypedDict, total=False):
+    function_declarations: List[FunctionDeclaration]
+    googleSearch: dict
+    googleSearchRetrieval: dict
+    code_execution: dict
+    retrieval: Retrieval
+
+
+class ToolConfig(TypedDict):
+    functionCallingConfig: FunctionCallingConfig
+
+
+class TTL(TypedDict, total=False):
+    seconds: Required[float]
+    nano: float
+
+
+class UsageMetadata(TypedDict, total=False):
+    promptTokenCount: int
+    totalTokenCount: int
+    candidatesTokenCount: int
+    cachedContentTokenCount: int
+
+
+class CachedContent(TypedDict, total=False):
+    ttl: TTL
+    expire_time: str
+    contents: List[ContentType]
+    tools: List[Tools]
+    createTime: str  # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+    updateTime: str  # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+    usageMetadata: UsageMetadata
+    expireTime: str  # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+    name: str
+    displayName: str
+    model: str
+    systemInstruction: ContentType
+    toolConfig: ToolConfig
+
+
+class RequestBody(TypedDict, total=False):
+    contents: Required[List[ContentType]]
+    system_instruction: SystemInstructions
+    tools: Tools
+    toolConfig: ToolConfig
+    safetySettings: List[SafetSettingsConfig]
+    generationConfig: GenerationConfig
+    cachedContent: str
+
+
+class CachedContentRequestBody(TypedDict, total=False):
+    contents: Required[List[ContentType]]
+    system_instruction: SystemInstructions
+    tools: Tools
+    toolConfig: ToolConfig
+    model: Required[str]  # Format: models/{model}
+    ttl: str  # ending in 's' - Example: "3.5s".
+    displayName: str
+
+
+class CachedContentListAllResponseBody(TypedDict, total=False):
+    cachedContents: List[CachedContent]
+    nextPageToken: str
+
+
+class SafetyRatings(TypedDict):
+    category: HarmCategory
+    probability: HarmProbability
+    probabilityScore: int
+    severity: HarmSeverity
+    blocked: bool
+
+
+class Date(TypedDict):
+    year: int
+    month: int
+    date: int
+
+
+class Citation(TypedDict):
+    startIndex: int
+    endIndex: int
+    uri: str
+    title: str
+    license: str
+    publicationDate: Date
+
+
+class CitationMetadata(TypedDict):
+    citations: List[Citation]
+
+
+class SearchEntryPoint(TypedDict, total=False):
+    renderedContent: str
+    sdkBlob: str
+
+
+class GroundingMetadata(TypedDict, total=False):
+    webSearchQueries: List[str]
+    searchEntryPoint: SearchEntryPoint
+    groundingAttributions: List[dict]
+
+
+class LogprobsCandidate(TypedDict):
+    token: str
+    tokenId: int
+    logProbability: float
+
+
+class LogprobsTopCandidate(TypedDict):
+    candidates: List[LogprobsCandidate]
+
+
+class LogprobsResult(TypedDict, total=False):
+    topCandidates: List[LogprobsTopCandidate]
+    chosenCandidates: List[LogprobsCandidate]
+
+
+class Candidates(TypedDict, total=False):
+    index: int
+    content: HttpxContentType
+    finishReason: Literal[
+        "FINISH_REASON_UNSPECIFIED",
+        "STOP",
+        "MAX_TOKENS",
+        "SAFETY",
+        "RECITATION",
+        "OTHER",
+        "BLOCKLIST",
+        "PROHIBITED_CONTENT",
+        "SPII",
+    ]
+    safetyRatings: List[SafetyRatings]
+    citationMetadata: CitationMetadata
+    groundingMetadata: GroundingMetadata
+    finishMessage: str
+    logprobsResult: LogprobsResult
+
+
+class PromptFeedback(TypedDict):
+    blockReason: str
+    safetyRatings: List[SafetyRatings]
+    blockReasonMessage: str
+
+
+class GenerateContentResponseBody(TypedDict, total=False):
+    candidates: List[Candidates]
+    promptFeedback: PromptFeedback
+    usageMetadata: Required[UsageMetadata]
+
+
+class FineTuneHyperparameters(TypedDict, total=False):
+    epoch_count: Optional[int]
+    learning_rate_multiplier: Optional[float]
+    adapter_size: Optional[
+        Literal[
+            "ADAPTER_SIZE_UNSPECIFIED",
+            "ADAPTER_SIZE_ONE",
+            "ADAPTER_SIZE_FOUR",
+            "ADAPTER_SIZE_EIGHT",
+            "ADAPTER_SIZE_SIXTEEN",
+        ]
+    ]
+
+
+class FineTunesupervisedTuningSpec(TypedDict, total=False):
+    training_dataset_uri: str
+    validation_dataset: Optional[str]
+    tuned_model_display_name: Optional[str]
+    hyperParameters: Optional[FineTuneHyperparameters]
+
+
+class FineTuneJobCreate(TypedDict, total=False):
+    baseModel: str
+    supervisedTuningSpec: FineTunesupervisedTuningSpec
+    tunedModelDisplayName: Optional[str]
+
+
+class ResponseSupervisedTuningSpec(TypedDict, total=False):
+    trainingDatasetUri: Optional[str]
+    hyperParameters: Optional[FineTuneHyperparameters]
+
+
+class ResponseTuningJob(TypedDict):
+    name: Optional[str]
+    tunedModelDisplayName: Optional[str]
+    baseModel: Optional[str]
+    supervisedTuningSpec: Optional[ResponseSupervisedTuningSpec]
+    state: Optional[
+        Literal[
+            "JOB_STATE_PENDING",
+            "JOB_STATE_RUNNING",
+            "JOB_STATE_SUCCEEDED",
+            "JOB_STATE_FAILED",
+            "JOB_STATE_CANCELLED",
+        ]
+    ]
+    createTime: Optional[str]
+    updateTime: Optional[str]
+
+
+class InstanceVideo(TypedDict, total=False):
+    gcsUri: str
+    videoSegmentConfig: Tuple[float, float, float]
+
+
+class InstanceImage(TypedDict, total=False):
+    gcsUri: Optional[str]
+    bytesBase64Encoded: Optional[str]
+    mimeType: Optional[str]
+
+
+class Instance(TypedDict, total=False):
+    text: str
+    image: InstanceImage
+    video: InstanceVideo
+
+
+class VertexMultimodalEmbeddingRequest(TypedDict, total=False):
+    instances: List[Instance]
+
+
+class VideoEmbedding(TypedDict):
+    startOffsetSec: int
+    endOffsetSec: int
+    embedding: List[float]
+
+
+class MultimodalPrediction(TypedDict, total=False):
+    textEmbedding: List[float]
+    imageEmbedding: List[float]
+    videoEmbeddings: List[VideoEmbedding]
+
+
+class MultimodalPredictions(TypedDict, total=False):
+    predictions: List[MultimodalPrediction]
+
+
+class VertexAICachedContentResponseObject(TypedDict):
+    name: str
+    model: str
+
+
+class TaskTypeEnum(Enum):
+    TASK_TYPE_UNSPECIFIED = "TASK_TYPE_UNSPECIFIED"
+    RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
+    RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
+    SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
+    CLASSIFICATION = "CLASSIFICATION"
+    CLUSTERING = "CLUSTERING"
+    QUESTION_ANSWERING = "QUESTION_ANSWERING"
+    FACT_VERIFICATION = "FACT_VERIFICATION"
+
+
+class VertexAITextEmbeddingsRequestBody(TypedDict, total=False):
+    content: Required[ContentType]
+    taskType: TaskTypeEnum
+    title: str
+    outputDimensionality: int
+
+
+class ContentEmbeddings(TypedDict):
+    values: List[int]
+
+
+class VertexAITextEmbeddingsResponseObject(TypedDict):
+    embedding: ContentEmbeddings
+
+
+class EmbedContentRequest(VertexAITextEmbeddingsRequestBody):
+    model: Required[str]
+
+
+class VertexAIBatchEmbeddingsRequestBody(TypedDict, total=False):
+    requests: List[EmbedContentRequest]
+
+
+class VertexAIBatchEmbeddingsResponseObject(TypedDict):
+    embeddings: List[ContentEmbeddings]
+
+
+# Vertex AI Batch Prediction
+
+
+class GcsSource(TypedDict):
+    uris: str
+
+
+class InputConfig(TypedDict):
+    instancesFormat: str
+    gcsSource: GcsSource
+
+
+class GcsDestination(TypedDict):
+    outputUriPrefix: str
+
+
+class OutputConfig(TypedDict, total=False):
+    predictionsFormat: str
+    gcsDestination: GcsDestination
+
+
+class VertexAIBatchPredictionJob(TypedDict):
+    displayName: str
+    model: str
+    inputConfig: InputConfig
+    outputConfig: OutputConfig
+
+
+class VertexBatchPredictionResponse(TypedDict, total=False):
+    name: str
+    displayName: str
+    model: str
+    inputConfig: InputConfig
+    outputConfig: OutputConfig
+    state: str
+    createTime: str
+    updateTime: str
+    modelVersionId: str
+
+
+VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py
new file mode 100644
index 00000000..7dee2836
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py
@@ -0,0 +1,33 @@
+import json
+from enum import Enum
+from typing import Any, List, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+
+
+class WatsonXAPIParams(TypedDict):
+    project_id: str
+    space_id: Optional[str]
+    region_name: Optional[str]
+
+
+class WatsonXCredentials(TypedDict):
+    api_key: str
+    api_base: str
+    token: Optional[str]
+
+
+class WatsonXAIEndpoint(str, Enum):
+    TEXT_GENERATION = "/ml/v1/text/generation"
+    TEXT_GENERATION_STREAM = "/ml/v1/text/generation_stream"
+    CHAT = "/ml/v1/text/chat"
+    CHAT_STREAM = "/ml/v1/text/chat_stream"
+    DEPLOYMENT_TEXT_GENERATION = "/ml/v1/deployments/{deployment_id}/text/generation"
+    DEPLOYMENT_TEXT_GENERATION_STREAM = (
+        "/ml/v1/deployments/{deployment_id}/text/generation_stream"
+    )
+    DEPLOYMENT_CHAT = "/ml/v1/deployments/{deployment_id}/text/chat"
+    DEPLOYMENT_CHAT_STREAM = "/ml/v1/deployments/{deployment_id}/text/chat_stream"
+    EMBEDDINGS = "/ml/v1/text/embeddings"
+    PROMPTS = "/ml/v1/prompts"
+    AVAILABLE_MODELS = "/ml/v1/foundation_model_specs"
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py
new file mode 100644
index 00000000..90871198
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py
@@ -0,0 +1,20 @@
+"""
+Used for /vertex_ai/ pass through endpoints
+"""
+
+from typing import Optional
+
+from pydantic import BaseModel
+
+from ..llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+
+
+class VertexPassThroughCredentials(BaseModel):
+    # Example: vertex_project = "my-project-123"
+    vertex_project: Optional[str] = None
+
+    # Example: vertex_location = "us-central1"
+    vertex_location: Optional[str] = None
+
+    # Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS"
+    vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py
new file mode 100644
index 00000000..8e2a8cc3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py
@@ -0,0 +1,78 @@
+"""
+LiteLLM Follows the cohere API format for the re rank API
+https://docs.cohere.com/reference/rerank
+
+"""
+
+from typing import List, Optional, Union
+
+from pydantic import BaseModel, PrivateAttr
+from typing_extensions import Required, TypedDict
+
+
+class RerankRequest(BaseModel):
+    model: str
+    query: str
+    top_n: Optional[int] = None
+    documents: List[Union[str, dict]]
+    rank_fields: Optional[List[str]] = None
+    return_documents: Optional[bool] = None
+    max_chunks_per_doc: Optional[int] = None
+    max_tokens_per_doc: Optional[int] = None
+
+
+
+class OptionalRerankParams(TypedDict, total=False):
+    query: str
+    top_n: Optional[int]
+    documents: List[Union[str, dict]]
+    rank_fields: Optional[List[str]]
+    return_documents: Optional[bool]
+    max_chunks_per_doc: Optional[int]
+    max_tokens_per_doc: Optional[int]
+
+
+class RerankBilledUnits(TypedDict, total=False):
+    search_units: Optional[int]
+    total_tokens: Optional[int]
+
+
+class RerankTokens(TypedDict, total=False):
+    input_tokens: Optional[int]
+    output_tokens: Optional[int]
+
+
+class RerankResponseMeta(TypedDict, total=False):
+    api_version: Optional[dict]
+    billed_units: Optional[RerankBilledUnits]
+    tokens: Optional[RerankTokens]
+
+
+class RerankResponseDocument(TypedDict):
+    text: str
+
+
+class RerankResponseResult(TypedDict, total=False):
+    index: Required[int]
+    relevance_score: Required[float]
+    document: RerankResponseDocument
+
+
+class RerankResponse(BaseModel):
+    id: Optional[str] = None
+    results: Optional[List[RerankResponseResult]] = (
+        None  # Contains index and relevance_score
+    )
+    meta: Optional[RerankResponseMeta] = None  # Contains api_version and billed_units
+
+    # Define private attributes using PrivateAttr
+    _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def get(self, key, default=None):
+        return self.__dict__.get(key, default)
+
+    def __contains__(self, key):
+        return key in self.__dict__
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/router.py b/.venv/lib/python3.12/site-packages/litellm/types/router.py
new file mode 100644
index 00000000..e34366aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/router.py
@@ -0,0 +1,707 @@
+"""
+litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
+"""
+
+import datetime
+import enum
+import uuid
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union, get_type_hints
+
+import httpx
+from httpx import AsyncClient, Client
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from pydantic import BaseModel, ConfigDict, Field
+from typing_extensions import Required, TypedDict
+
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+
+from ..exceptions import RateLimitError
+from .completion import CompletionRequest
+from .embedding import EmbeddingRequest
+from .llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+from .utils import ModelResponse, ProviderSpecificModelInfo
+
+
+class ConfigurableClientsideParamsCustomAuth(TypedDict):
+    api_base: str
+
+
+CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = Optional[
+    List[Union[str, ConfigurableClientsideParamsCustomAuth]]
+]
+
+
+class ModelConfig(BaseModel):
+    model_name: str
+    litellm_params: Union[CompletionRequest, EmbeddingRequest]
+    tpm: int
+    rpm: int
+
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class RouterConfig(BaseModel):
+    model_list: List[ModelConfig]
+
+    redis_url: Optional[str] = None
+    redis_host: Optional[str] = None
+    redis_port: Optional[int] = None
+    redis_password: Optional[str] = None
+
+    cache_responses: Optional[bool] = False
+    cache_kwargs: Optional[Dict] = {}
+    caching_groups: Optional[List[Tuple[str, List[str]]]] = None
+    client_ttl: Optional[int] = 3600
+    num_retries: Optional[int] = 0
+    timeout: Optional[float] = None
+    default_litellm_params: Optional[Dict[str, str]] = {}
+    set_verbose: Optional[bool] = False
+    fallbacks: Optional[List] = []
+    allowed_fails: Optional[int] = None
+    context_window_fallbacks: Optional[List] = []
+    model_group_alias: Optional[Dict[str, List[str]]] = {}
+    retry_after: Optional[int] = 0
+    routing_strategy: Literal[
+        "simple-shuffle",
+        "least-busy",
+        "usage-based-routing",
+        "latency-based-routing",
+    ] = "simple-shuffle"
+
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class UpdateRouterConfig(BaseModel):
+    """
+    Set of params that you can modify via `router.update_settings()`.
+    """
+
+    routing_strategy_args: Optional[dict] = None
+    routing_strategy: Optional[str] = None
+    model_group_retry_policy: Optional[dict] = None
+    allowed_fails: Optional[int] = None
+    cooldown_time: Optional[float] = None
+    num_retries: Optional[int] = None
+    timeout: Optional[float] = None
+    max_retries: Optional[int] = None
+    retry_after: Optional[float] = None
+    fallbacks: Optional[List[dict]] = None
+    context_window_fallbacks: Optional[List[dict]] = None
+
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class ModelInfo(BaseModel):
+    id: Optional[
+        str
+    ]  # Allow id to be optional on input, but it will always be present as a str in the model instance
+    db_model: bool = (
+        False  # used for proxy - to separate models which are stored in the db vs. config.
+    )
+    updated_at: Optional[datetime.datetime] = None
+    updated_by: Optional[str] = None
+
+    created_at: Optional[datetime.datetime] = None
+    created_by: Optional[str] = None
+
+    base_model: Optional[str] = (
+        None  # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
+    )
+    tier: Optional[Literal["free", "paid"]] = None
+
+    """
+    Team Model Specific Fields
+    """
+    # the team id that this model belongs to
+    team_id: Optional[str] = None
+
+    # the model_name that can be used by the team when making LLM calls
+    team_public_model_name: Optional[str] = None
+
+    def __init__(self, id: Optional[Union[str, int]] = None, **params):
+        if id is None:
+            id = str(uuid.uuid4())  # Generate a UUID if id is None or not provided
+        elif isinstance(id, int):
+            id = str(id)
+        super().__init__(id=id, **params)
+
+    model_config = ConfigDict(extra="allow")
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class CredentialLiteLLMParams(BaseModel):
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    api_version: Optional[str] = None
+    ## VERTEX AI ##
+    vertex_project: Optional[str] = None
+    vertex_location: Optional[str] = None
+    vertex_credentials: Optional[Union[str, dict]] = None
+    ## UNIFIED PROJECT/REGION ##
+    region_name: Optional[str] = None
+
+    ## AWS BEDROCK / SAGEMAKER ##
+    aws_access_key_id: Optional[str] = None
+    aws_secret_access_key: Optional[str] = None
+    aws_region_name: Optional[str] = None
+    ## IBM WATSONX ##
+    watsonx_region_name: Optional[str] = None
+
+
+class GenericLiteLLMParams(CredentialLiteLLMParams):
+    """
+    LiteLLM Params without 'model' arg (used across completion / assistants api)
+    """
+
+    custom_llm_provider: Optional[str] = None
+    tpm: Optional[int] = None
+    rpm: Optional[int] = None
+    timeout: Optional[Union[float, str, httpx.Timeout]] = (
+        None  # if str, pass in as os.environ/
+    )
+    stream_timeout: Optional[Union[float, str]] = (
+        None  # timeout when making stream=True calls, if str, pass in as os.environ/
+    )
+    max_retries: Optional[int] = None
+    organization: Optional[str] = None  # for openai orgs
+    configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
+
+    ## LOGGING PARAMS ##
+    litellm_trace_id: Optional[str] = None
+
+    ## CUSTOM PRICING ##
+    input_cost_per_token: Optional[float] = None
+    output_cost_per_token: Optional[float] = None
+    input_cost_per_second: Optional[float] = None
+    output_cost_per_second: Optional[float] = None
+
+    max_file_size_mb: Optional[float] = None
+
+    # Deployment budgets
+    max_budget: Optional[float] = None
+    budget_duration: Optional[str] = None
+    use_in_pass_through: Optional[bool] = False
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+    merge_reasoning_content_in_choices: Optional[bool] = False
+    model_info: Optional[Dict] = None
+
+    def __init__(
+        self,
+        custom_llm_provider: Optional[str] = None,
+        max_retries: Optional[Union[int, str]] = None,
+        tpm: Optional[int] = None,
+        rpm: Optional[int] = None,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_version: Optional[str] = None,
+        timeout: Optional[Union[float, str]] = None,  # if str, pass in as os.environ/
+        stream_timeout: Optional[Union[float, str]] = (
+            None  # timeout when making stream=True calls, if str, pass in as os.environ/
+        ),
+        organization: Optional[str] = None,  # for openai orgs
+        ## LOGGING PARAMS ##
+        litellm_trace_id: Optional[str] = None,
+        ## UNIFIED PROJECT/REGION ##
+        region_name: Optional[str] = None,
+        ## VERTEX AI ##
+        vertex_project: Optional[str] = None,
+        vertex_location: Optional[str] = None,
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+        ## AWS BEDROCK / SAGEMAKER ##
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        aws_region_name: Optional[str] = None,
+        ## IBM WATSONX ##
+        watsonx_region_name: Optional[str] = None,
+        input_cost_per_token: Optional[float] = None,
+        output_cost_per_token: Optional[float] = None,
+        input_cost_per_second: Optional[float] = None,
+        output_cost_per_second: Optional[float] = None,
+        max_file_size_mb: Optional[float] = None,
+        # Deployment budgets
+        max_budget: Optional[float] = None,
+        budget_duration: Optional[str] = None,
+        # Pass through params
+        use_in_pass_through: Optional[bool] = False,
+        # This will merge the reasoning content in the choices
+        merge_reasoning_content_in_choices: Optional[bool] = False,
+        model_info: Optional[Dict] = None,
+        **params,
+    ):
+        args = locals()
+        args.pop("max_retries", None)
+        args.pop("self", None)
+        args.pop("params", None)
+        args.pop("__class__", None)
+        if max_retries is not None and isinstance(max_retries, str):
+            max_retries = int(max_retries)  # cast to int
+        # We need to keep max_retries in args since it's a parameter of GenericLiteLLMParams
+        args["max_retries"] = (
+            max_retries  # Put max_retries back in args after popping it
+        )
+        super().__init__(**args, **params)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class LiteLLM_Params(GenericLiteLLMParams):
+    """
+    LiteLLM Params with 'model' requirement - used for completions
+    """
+
+    model: str
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
+    def __init__(
+        self,
+        model: str,
+        custom_llm_provider: Optional[str] = None,
+        max_retries: Optional[Union[int, str]] = None,
+        tpm: Optional[int] = None,
+        rpm: Optional[int] = None,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_version: Optional[str] = None,
+        timeout: Optional[Union[float, str]] = None,  # if str, pass in as os.environ/
+        stream_timeout: Optional[Union[float, str]] = (
+            None  # timeout when making stream=True calls, if str, pass in as os.environ/
+        ),
+        organization: Optional[str] = None,  # for openai orgs
+        ## VERTEX AI ##
+        vertex_project: Optional[str] = None,
+        vertex_location: Optional[str] = None,
+        ## AWS BEDROCK / SAGEMAKER ##
+        aws_access_key_id: Optional[str] = None,
+        aws_secret_access_key: Optional[str] = None,
+        aws_region_name: Optional[str] = None,
+        # OpenAI / Azure Whisper
+        # set a max-size of file that can be passed to litellm proxy
+        max_file_size_mb: Optional[float] = None,
+        # will use deployment on pass-through endpoints if True
+        use_in_pass_through: Optional[bool] = False,
+        **params,
+    ):
+        args = locals()
+        args.pop("max_retries", None)
+        args.pop("self", None)
+        args.pop("params", None)
+        args.pop("__class__", None)
+        if max_retries is not None and isinstance(max_retries, str):
+            max_retries = int(max_retries)  # cast to int
+        super().__init__(max_retries=max_retries, **args, **params)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class updateLiteLLMParams(GenericLiteLLMParams):
+    # This class is used to update the LiteLLM_Params
+    # only differece is model is optional
+    model: Optional[str] = None
+
+
+class updateDeployment(BaseModel):
+    model_name: Optional[str] = None
+    litellm_params: Optional[updateLiteLLMParams] = None
+    model_info: Optional[ModelInfo] = None
+
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class LiteLLMParamsTypedDict(TypedDict, total=False):
+    model: str
+    custom_llm_provider: Optional[str]
+    tpm: Optional[int]
+    rpm: Optional[int]
+    order: Optional[int]
+    weight: Optional[int]
+    max_parallel_requests: Optional[int]
+    api_key: Optional[str]
+    api_base: Optional[str]
+    api_version: Optional[str]
+    timeout: Optional[Union[float, str, httpx.Timeout]]
+    stream_timeout: Optional[Union[float, str]]
+    max_retries: Optional[int]
+    organization: Optional[Union[List, str]]  # for openai orgs
+    configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS  # for allowing api base switching on finetuned models
+    ## DROP PARAMS ##
+    drop_params: Optional[bool]
+    ## UNIFIED PROJECT/REGION ##
+    region_name: Optional[str]
+    ## VERTEX AI ##
+    vertex_project: Optional[str]
+    vertex_location: Optional[str]
+    ## AWS BEDROCK / SAGEMAKER ##
+    aws_access_key_id: Optional[str]
+    aws_secret_access_key: Optional[str]
+    aws_region_name: Optional[str]
+    ## IBM WATSONX ##
+    watsonx_region_name: Optional[str]
+    ## CUSTOM PRICING ##
+    input_cost_per_token: Optional[float]
+    output_cost_per_token: Optional[float]
+    input_cost_per_second: Optional[float]
+    output_cost_per_second: Optional[float]
+    num_retries: Optional[int]
+    ## MOCK RESPONSES ##
+    mock_response: Optional[Union[str, ModelResponse, Exception]]
+
+    # routing params
+    # use this for tag-based routing
+    tags: Optional[List[str]]
+
+    # deployment budgets
+    max_budget: Optional[float]
+    budget_duration: Optional[str]
+
+
+class DeploymentTypedDict(TypedDict, total=False):
+    model_name: Required[str]
+    litellm_params: Required[LiteLLMParamsTypedDict]
+    model_info: dict
+
+
+SPECIAL_MODEL_INFO_PARAMS = [
+    "input_cost_per_token",
+    "output_cost_per_token",
+    "input_cost_per_character",
+    "output_cost_per_character",
+]
+
+
+class Deployment(BaseModel):
+    model_name: str
+    litellm_params: LiteLLM_Params
+    model_info: ModelInfo
+
+    model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+    def __init__(
+        self,
+        model_name: str,
+        litellm_params: LiteLLM_Params,
+        model_info: Optional[Union[ModelInfo, dict]] = None,
+        **params,
+    ):
+        if model_info is None:
+            model_info = ModelInfo()
+        elif isinstance(model_info, dict):
+            model_info = ModelInfo(**model_info)
+
+        for (
+            key
+        ) in (
+            SPECIAL_MODEL_INFO_PARAMS
+        ):  # ensures custom pricing info is consistently in 'model_info'
+            field = getattr(litellm_params, key, None)
+            if field is not None:
+                setattr(model_info, key, field)
+
+        super().__init__(
+            model_info=model_info,
+            model_name=model_name,
+            litellm_params=litellm_params,
+            **params,
+        )
+
+    def to_json(self, **kwargs):
+        try:
+            return self.model_dump(**kwargs)  # noqa
+        except Exception as e:
+            # if using pydantic v1
+            return self.dict(**kwargs)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class RouterErrors(enum.Enum):
+    """
+    Enum for router specific errors with common codes
+    """
+
+    user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
+    no_deployments_available = "No deployments available for selected model"
+    no_deployments_with_tag_routing = (
+        "Not allowed to access model due to tags configuration"
+    )
+    no_deployments_with_provider_budget_routing = (
+        "No deployments available - crossed budget"
+    )
+
+
+class AllowedFailsPolicy(BaseModel):
+    """
+    Use this to set a custom number of allowed fails/minute before cooling down a deployment
+    If `AuthenticationErrorAllowedFails = 1000`, then 1000 AuthenticationError will be allowed before cooling down a deployment
+
+    Mapping of Exception type to allowed_fails for each exception
+    https://docs.litellm.ai/docs/exception_mapping
+    """
+
+    BadRequestErrorAllowedFails: Optional[int] = None
+    AuthenticationErrorAllowedFails: Optional[int] = None
+    TimeoutErrorAllowedFails: Optional[int] = None
+    RateLimitErrorAllowedFails: Optional[int] = None
+    ContentPolicyViolationErrorAllowedFails: Optional[int] = None
+    InternalServerErrorAllowedFails: Optional[int] = None
+
+
+class RetryPolicy(BaseModel):
+    """
+    Use this to set a custom number of retries per exception type
+    If RateLimitErrorRetries = 3, then 3 retries will be made for RateLimitError
+    Mapping of Exception type to number of retries
+    https://docs.litellm.ai/docs/exception_mapping
+    """
+
+    BadRequestErrorRetries: Optional[int] = None
+    AuthenticationErrorRetries: Optional[int] = None
+    TimeoutErrorRetries: Optional[int] = None
+    RateLimitErrorRetries: Optional[int] = None
+    ContentPolicyViolationErrorRetries: Optional[int] = None
+    InternalServerErrorRetries: Optional[int] = None
+
+
+class AlertingConfig(BaseModel):
+    """
+    Use this configure alerting for the router. Receive alerts on the following events
+    - LLM API Exceptions
+    - LLM Responses Too Slow
+    - LLM Requests Hanging
+
+    Args:
+        webhook_url: str            - webhook url for alerting, slack provides a webhook url to send alerts to
+        alerting_threshold: Optional[float] = None - threshold for slow / hanging llm responses (in seconds)
+    """
+
+    webhook_url: str
+    alerting_threshold: Optional[float] = 300
+
+
+class ModelGroupInfo(BaseModel):
+    model_group: str
+    providers: List[str]
+    max_input_tokens: Optional[float] = None
+    max_output_tokens: Optional[float] = None
+    input_cost_per_token: Optional[float] = None
+    output_cost_per_token: Optional[float] = None
+    mode: Optional[
+        Union[
+            str,
+            Literal[
+                "chat",
+                "embedding",
+                "completion",
+                "image_generation",
+                "audio_transcription",
+                "rerank",
+                "moderations",
+            ],
+        ]
+    ] = Field(default="chat")
+    tpm: Optional[int] = None
+    rpm: Optional[int] = None
+    supports_parallel_function_calling: bool = Field(default=False)
+    supports_vision: bool = Field(default=False)
+    supports_function_calling: bool = Field(default=False)
+    supported_openai_params: Optional[List[str]] = Field(default=[])
+    configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
+
+    def __init__(self, **data):
+        for field_name, field_type in get_type_hints(self.__class__).items():
+            if field_type == bool and data.get(field_name) is None:
+                data[field_name] = False
+        super().__init__(**data)
+
+
+class AssistantsTypedDict(TypedDict):
+    custom_llm_provider: Literal["azure", "openai"]
+    litellm_params: LiteLLMParamsTypedDict
+
+
+class FineTuningConfig(BaseModel):
+
+    custom_llm_provider: Literal["azure", "openai"]
+
+
+class CustomRoutingStrategyBase:
+    async def async_get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        """
+        Asynchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
+        pass
+
+    def get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        """
+        Synchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
+        pass
+
+
+class RouterGeneralSettings(BaseModel):
+    async_only_mode: bool = Field(
+        default=False
+    )  # this will only initialize async clients. Good for memory utils
+    pass_through_all_models: bool = Field(
+        default=False
+    )  # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
+
+
+class RouterRateLimitErrorBasic(ValueError):
+    """
+    Raise a basic error inside helper functions.
+    """
+
+    def __init__(
+        self,
+        model: str,
+    ):
+        self.model = model
+        _message = f"{RouterErrors.no_deployments_available.value}."
+        super().__init__(_message)
+
+
+class RouterRateLimitError(ValueError):
+    def __init__(
+        self,
+        model: str,
+        cooldown_time: float,
+        enable_pre_call_checks: bool,
+        cooldown_list: List,
+    ):
+        self.model = model
+        self.cooldown_time = cooldown_time
+        self.enable_pre_call_checks = enable_pre_call_checks
+        self.cooldown_list = cooldown_list
+        _message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}"
+        super().__init__(_message)
+
+
+class RouterModelGroupAliasItem(TypedDict):
+    model: str
+    hidden: bool  # if 'True', don't return on `.get_model_list`
+
+
+VALID_LITELLM_ENVIRONMENTS = [
+    "development",
+    "staging",
+    "production",
+]
+
+
+class RoutingStrategy(enum.Enum):
+    LEAST_BUSY = "least-busy"
+    LATENCY_BASED = "latency-based-routing"
+    COST_BASED = "cost-based-routing"
+    USAGE_BASED_ROUTING_V2 = "usage-based-routing-v2"
+    USAGE_BASED_ROUTING = "usage-based-routing"
+    PROVIDER_BUDGET_LIMITING = "provider-budget-routing"
+
+
+class RouterCacheEnum(enum.Enum):
+    TPM = "global_router:{id}:{model}:tpm:{current_minute}"
+    RPM = "global_router:{id}:{model}:rpm:{current_minute}"
+
+
+class GenericBudgetWindowDetails(BaseModel):
+    """Details about a provider's budget window"""
+
+    budget_start: float
+    spend_key: str
+    start_time_key: str
+    ttl_seconds: int
+
+
+OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/services.py b/.venv/lib/python3.12/site-packages/litellm/types/services.py
new file mode 100644
index 00000000..3eb283db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/services.py
@@ -0,0 +1,39 @@
+import enum
+import uuid
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ServiceTypes(str, enum.Enum):
+    """
+    Enum for litellm + litellm-adjacent services (redis/postgres/etc.)
+    """
+
+    REDIS = "redis"
+    DB = "postgres"
+    BATCH_WRITE_TO_DB = "batch_write_to_db"
+    RESET_BUDGET_JOB = "reset_budget_job"
+    LITELLM = "self"
+    ROUTER = "router"
+    AUTH = "auth"
+    PROXY_PRE_CALL = "proxy_pre_call"
+
+
+class ServiceLoggerPayload(BaseModel):
+    """
+    The payload logged during service success/failure
+    """
+
+    is_error: bool = Field(description="did an error occur")
+    error: Optional[str] = Field(None, description="what was the error")
+    service: ServiceTypes = Field(description="who is this for? - postgres/redis")
+    duration: float = Field(description="How long did the request take?")
+    call_type: str = Field(description="The call of the service, being made")
+
+    def to_json(self, **kwargs):
+        try:
+            return self.model_dump(**kwargs)  # noqa
+        except Exception as e:
+            # if using pydantic v1
+            return self.dict(**kwargs)
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/utils.py b/.venv/lib/python3.12/site-packages/litellm/types/utils.py
new file mode 100644
index 00000000..a6654285
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/utils.py
@@ -0,0 +1,2081 @@
+import json
+import time
+import uuid
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+from aiohttp import FormData
+from openai._models import BaseModel as OpenAIObject
+from openai.types.audio.transcription_create_params import FileTypes  # type: ignore
+from openai.types.completion_usage import (
+    CompletionTokensDetails,
+    CompletionUsage,
+    PromptTokensDetails,
+)
+from openai.types.moderation import (
+    Categories,
+    CategoryAppliedInputTypes,
+    CategoryScores,
+)
+from openai.types.moderation_create_response import Moderation, ModerationCreateResponse
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
+from typing_extensions import Callable, Dict, Required, TypedDict, override
+
+import litellm
+
+from ..litellm_core_utils.core_helpers import map_finish_reason
+from .guardrails import GuardrailEventHooks
+from .llms.openai import (
+    Batch,
+    ChatCompletionThinkingBlock,
+    ChatCompletionToolCallChunk,
+    ChatCompletionUsageBlock,
+    OpenAIChatCompletionChunk,
+)
+from .rerank import RerankResponse
+
+
+def _generate_id():  # private helper function
+    return "chatcmpl-" + str(uuid.uuid4())
+
+
+class LiteLLMPydanticObjectBase(BaseModel):
+    """
+    Implements default functions, all pydantic objects should have.
+    """
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump(**kwargs)  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict(**kwargs)
+
+    def fields_set(self):
+        try:
+            return self.model_fields_set  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.__fields_set__
+
+    model_config = ConfigDict(protected_namespaces=())
+
+
+class LiteLLMCommonStrings(Enum):
+    redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
+    llm_provider_not_provided = "Unmapped LLM provider for this endpoint. You passed model={model}, custom_llm_provider={custom_llm_provider}. Check supported provider and route: https://docs.litellm.ai/docs/providers"
+
+
+SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
+
+
+class CostPerToken(TypedDict):
+    input_cost_per_token: float
+    output_cost_per_token: float
+
+
+class ProviderField(TypedDict):
+    field_name: str
+    field_type: Literal["string"]
+    field_description: str
+    field_value: str
+
+
+class ProviderSpecificModelInfo(TypedDict, total=False):
+    supports_system_messages: Optional[bool]
+    supports_response_schema: Optional[bool]
+    supports_vision: Optional[bool]
+    supports_function_calling: Optional[bool]
+    supports_tool_choice: Optional[bool]
+    supports_assistant_prefill: Optional[bool]
+    supports_prompt_caching: Optional[bool]
+    supports_audio_input: Optional[bool]
+    supports_embedding_image_input: Optional[bool]
+    supports_audio_output: Optional[bool]
+    supports_pdf_input: Optional[bool]
+    supports_native_streaming: Optional[bool]
+    supports_parallel_function_calling: Optional[bool]
+
+
+class ModelInfoBase(ProviderSpecificModelInfo, total=False):
+    key: Required[str]  # the key in litellm.model_cost which is returned
+
+    max_tokens: Required[Optional[int]]
+    max_input_tokens: Required[Optional[int]]
+    max_output_tokens: Required[Optional[int]]
+    input_cost_per_token: Required[float]
+    cache_creation_input_token_cost: Optional[float]
+    cache_read_input_token_cost: Optional[float]
+    input_cost_per_character: Optional[float]  # only for vertex ai models
+    input_cost_per_audio_token: Optional[float]
+    input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
+    input_cost_per_character_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
+    input_cost_per_query: Optional[float]  # only for rerank models
+    input_cost_per_image: Optional[float]  # only for vertex ai models
+    input_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+    input_cost_per_video_per_second: Optional[float]  # only for vertex ai models
+    input_cost_per_second: Optional[float]  # for OpenAI Speech models
+    input_cost_per_token_batches: Optional[float]
+    output_cost_per_token_batches: Optional[float]
+    output_cost_per_token: Required[float]
+    output_cost_per_character: Optional[float]  # only for vertex ai models
+    output_cost_per_audio_token: Optional[float]
+    output_cost_per_token_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
+    output_cost_per_character_above_128k_tokens: Optional[
+        float
+    ]  # only for vertex ai models
+    output_cost_per_image: Optional[float]
+    output_vector_size: Optional[int]
+    output_cost_per_video_per_second: Optional[float]  # only for vertex ai models
+    output_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+    output_cost_per_second: Optional[float]  # for OpenAI Speech models
+
+    litellm_provider: Required[str]
+    mode: Required[
+        Literal[
+            "completion", "embedding", "image_generation", "chat", "audio_transcription"
+        ]
+    ]
+    tpm: Optional[int]
+    rpm: Optional[int]
+
+
+class ModelInfo(ModelInfoBase, total=False):
+    """
+    Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
+    """
+
+    supported_openai_params: Required[Optional[List[str]]]
+
+
+class GenericStreamingChunk(TypedDict, total=False):
+    text: Required[str]
+    tool_use: Optional[ChatCompletionToolCallChunk]
+    is_finished: Required[bool]
+    finish_reason: Required[str]
+    usage: Required[Optional[ChatCompletionUsageBlock]]
+    index: int
+
+    # use this dict if you want to return any provider specific fields in the response
+    provider_specific_fields: Optional[Dict[str, Any]]
+
+
+from enum import Enum
+
+
+class CallTypes(Enum):
+    embedding = "embedding"
+    aembedding = "aembedding"
+    completion = "completion"
+    acompletion = "acompletion"
+    atext_completion = "atext_completion"
+    text_completion = "text_completion"
+    image_generation = "image_generation"
+    aimage_generation = "aimage_generation"
+    moderation = "moderation"
+    amoderation = "amoderation"
+    atranscription = "atranscription"
+    transcription = "transcription"
+    aspeech = "aspeech"
+    speech = "speech"
+    rerank = "rerank"
+    arerank = "arerank"
+    arealtime = "_arealtime"
+    create_batch = "create_batch"
+    acreate_batch = "acreate_batch"
+    aretrieve_batch = "aretrieve_batch"
+    retrieve_batch = "retrieve_batch"
+    pass_through = "pass_through_endpoint"
+    anthropic_messages = "anthropic_messages"
+    get_assistants = "get_assistants"
+    aget_assistants = "aget_assistants"
+    create_assistants = "create_assistants"
+    acreate_assistants = "acreate_assistants"
+    delete_assistant = "delete_assistant"
+    adelete_assistant = "adelete_assistant"
+    acreate_thread = "acreate_thread"
+    create_thread = "create_thread"
+    aget_thread = "aget_thread"
+    get_thread = "get_thread"
+    a_add_message = "a_add_message"
+    add_message = "add_message"
+    aget_messages = "aget_messages"
+    get_messages = "get_messages"
+    arun_thread = "arun_thread"
+    run_thread = "run_thread"
+    arun_thread_stream = "arun_thread_stream"
+    run_thread_stream = "run_thread_stream"
+    afile_retrieve = "afile_retrieve"
+    file_retrieve = "file_retrieve"
+    afile_delete = "afile_delete"
+    file_delete = "file_delete"
+    afile_list = "afile_list"
+    file_list = "file_list"
+    acreate_file = "acreate_file"
+    create_file = "create_file"
+    afile_content = "afile_content"
+    file_content = "file_content"
+    create_fine_tuning_job = "create_fine_tuning_job"
+    acreate_fine_tuning_job = "acreate_fine_tuning_job"
+    acancel_fine_tuning_job = "acancel_fine_tuning_job"
+    cancel_fine_tuning_job = "cancel_fine_tuning_job"
+    alist_fine_tuning_jobs = "alist_fine_tuning_jobs"
+    list_fine_tuning_jobs = "list_fine_tuning_jobs"
+    aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
+    retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
+    responses = "responses"
+    aresponses = "aresponses"
+
+
+CallTypesLiteral = Literal[
+    "embedding",
+    "aembedding",
+    "completion",
+    "acompletion",
+    "atext_completion",
+    "text_completion",
+    "image_generation",
+    "aimage_generation",
+    "moderation",
+    "amoderation",
+    "atranscription",
+    "transcription",
+    "aspeech",
+    "speech",
+    "rerank",
+    "arerank",
+    "_arealtime",
+    "create_batch",
+    "acreate_batch",
+    "pass_through_endpoint",
+    "anthropic_messages",
+    "aretrieve_batch",
+    "retrieve_batch",
+]
+
+
+class PassthroughCallTypes(Enum):
+    passthrough_image_generation = "passthrough-image-generation"
+
+
+class TopLogprob(OpenAIObject):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+
+class ChatCompletionTokenLogprob(OpenAIObject):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+
+class ChoiceLogprobs(OpenAIObject):
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+
+class FunctionCall(OpenAIObject):
+    arguments: str
+    name: Optional[str] = None
+
+
+class Function(OpenAIObject):
+    arguments: str
+    name: Optional[
+        str
+    ]  # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)
+
+    def __init__(
+        self,
+        arguments: Optional[Union[Dict, str]],
+        name: Optional[str] = None,
+        **params,
+    ):
+        if arguments is None:
+            arguments = ""
+        elif isinstance(arguments, Dict):
+            arguments = json.dumps(arguments)
+        else:
+            arguments = arguments
+
+        name = name
+
+        # Build a dictionary with the structure your BaseModel expects
+        data = {"arguments": arguments, "name": name, **params}
+
+        super(Function, self).__init__(**data)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class ChatCompletionDeltaToolCall(OpenAIObject):
+    id: Optional[str] = None
+    function: Function
+    type: Optional[str] = None
+    index: int
+
+
+class HiddenParams(OpenAIObject):
+    original_response: Optional[Union[str, Any]] = None
+    model_id: Optional[str] = None  # used in Router for individual deployments
+    api_base: Optional[str] = None  # returns api base used for making completion call
+
+    model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class ChatCompletionMessageToolCall(OpenAIObject):
+    def __init__(
+        self,
+        function: Union[Dict, Function],
+        id: Optional[str] = None,
+        type: Optional[str] = None,
+        **params,
+    ):
+        super(ChatCompletionMessageToolCall, self).__init__(**params)
+        if isinstance(function, Dict):
+            self.function = Function(**function)
+        else:
+            self.function = function
+
+        if id is not None:
+            self.id = id
+        else:
+            self.id = f"{uuid.uuid4()}"
+
+        if type is not None:
+            self.type = type
+        else:
+            self.type = "function"
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+from openai.types.chat.chat_completion_audio import ChatCompletionAudio
+
+
+class ChatCompletionAudioResponse(ChatCompletionAudio):
+
+    def __init__(
+        self,
+        data: str,
+        expires_at: int,
+        transcript: str,
+        id: Optional[str] = None,
+        **params,
+    ):
+        if id is not None:
+            id = id
+        else:
+            id = f"{uuid.uuid4()}"
+        super(ChatCompletionAudioResponse, self).__init__(
+            data=data, expires_at=expires_at, transcript=transcript, id=id, **params
+        )
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+"""
+Reference:
+ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
+"""
+
+
+def add_provider_specific_fields(
+    object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]]
+):
+    if not provider_specific_fields:  # set if provider_specific_fields is not empty
+        return
+    setattr(object, "provider_specific_fields", provider_specific_fields)
+
+
+class Message(OpenAIObject):
+    content: Optional[str]
+    role: Literal["assistant", "user", "system", "tool", "function"]
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]]
+    function_call: Optional[FunctionCall]
+    audio: Optional[ChatCompletionAudioResponse] = None
+    reasoning_content: Optional[str] = None
+    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    provider_specific_fields: Optional[Dict[str, Any]] = Field(
+        default=None, exclude=True
+    )
+
+    def __init__(
+        self,
+        content: Optional[str] = None,
+        role: Literal["assistant"] = "assistant",
+        function_call=None,
+        tool_calls: Optional[list] = None,
+        audio: Optional[ChatCompletionAudioResponse] = None,
+        provider_specific_fields: Optional[Dict[str, Any]] = None,
+        reasoning_content: Optional[str] = None,
+        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        **params,
+    ):
+        init_values: Dict[str, Any] = {
+            "content": content,
+            "role": role or "assistant",  # handle null input
+            "function_call": (
+                FunctionCall(**function_call) if function_call is not None else None
+            ),
+            "tool_calls": (
+                [
+                    (
+                        ChatCompletionMessageToolCall(**tool_call)
+                        if isinstance(tool_call, dict)
+                        else tool_call
+                    )
+                    for tool_call in tool_calls
+                ]
+                if tool_calls is not None and len(tool_calls) > 0
+                else None
+            ),
+        }
+
+        if audio is not None:
+            init_values["audio"] = audio
+
+        if thinking_blocks is not None:
+            init_values["thinking_blocks"] = thinking_blocks
+
+        if reasoning_content is not None:
+            init_values["reasoning_content"] = reasoning_content
+
+        super(Message, self).__init__(
+            **init_values,  # type: ignore
+            **params,
+        )
+
+        if audio is None:
+            # delete audio from self
+            # OpenAI compatible APIs like mistral API will raise an error if audio is passed in
+            del self.audio
+
+        if reasoning_content is None:
+            # ensure default response matches OpenAI spec
+            del self.reasoning_content
+
+        if thinking_blocks is None:
+            # ensure default response matches OpenAI spec
+            del self.thinking_blocks
+
+        add_provider_specific_fields(self, provider_specific_fields)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Delta(OpenAIObject):
+    reasoning_content: Optional[str] = None
+    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
+
+    def __init__(
+        self,
+        content=None,
+        role=None,
+        function_call=None,
+        tool_calls=None,
+        audio: Optional[ChatCompletionAudioResponse] = None,
+        reasoning_content: Optional[str] = None,
+        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        **params,
+    ):
+        super(Delta, self).__init__(**params)
+        add_provider_specific_fields(self, params.get("provider_specific_fields", {}))
+        self.content = content
+        self.role = role
+        # Set default values and correct types
+        self.function_call: Optional[Union[FunctionCall, Any]] = None
+        self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
+        self.audio: Optional[ChatCompletionAudioResponse] = None
+
+        if reasoning_content is not None:
+            self.reasoning_content = reasoning_content
+        else:
+            # ensure default response matches OpenAI spec
+            del self.reasoning_content
+
+        if thinking_blocks is not None:
+            self.thinking_blocks = thinking_blocks
+        else:
+            # ensure default response matches OpenAI spec
+            del self.thinking_blocks
+
+        if function_call is not None and isinstance(function_call, dict):
+            self.function_call = FunctionCall(**function_call)
+        else:
+            self.function_call = function_call
+        if tool_calls is not None and isinstance(tool_calls, list):
+            self.tool_calls = []
+            for tool_call in tool_calls:
+                if isinstance(tool_call, dict):
+                    if tool_call.get("index", None) is None:
+                        tool_call["index"] = 0
+                    self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+                elif isinstance(tool_call, ChatCompletionDeltaToolCall):
+                    self.tool_calls.append(tool_call)
+        else:
+            self.tool_calls = tool_calls
+
+        self.audio = audio
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class Choices(OpenAIObject):
+    def __init__(
+        self,
+        finish_reason=None,
+        index=0,
+        message: Optional[Union[Message, dict]] = None,
+        logprobs=None,
+        enhancements=None,
+        **params,
+    ):
+        super(Choices, self).__init__(**params)
+        if finish_reason is not None:
+            self.finish_reason = map_finish_reason(
+                finish_reason
+            )  # set finish_reason for all responses
+        else:
+            self.finish_reason = "stop"
+        self.index = index
+        if message is None:
+            self.message = Message()
+        else:
+            if isinstance(message, Message):
+                self.message = message
+            elif isinstance(message, dict):
+                self.message = Message(**message)
+        if logprobs is not None:
+            if isinstance(logprobs, dict):
+                self.logprobs = ChoiceLogprobs(**logprobs)
+            else:
+                self.logprobs = logprobs
+        if enhancements is not None:
+            self.enhancements = enhancements
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class CompletionTokensDetailsWrapper(
+    CompletionTokensDetails
+):  # wrapper for older openai versions
+    text_tokens: Optional[int] = None
+    """Text tokens generated by the model."""
+
+
+class PromptTokensDetailsWrapper(
+    PromptTokensDetails
+):  # wrapper for older openai versions
+    text_tokens: Optional[int] = None
+    """Text tokens sent to the model."""
+
+    image_tokens: Optional[int] = None
+    """Image tokens sent to the model."""
+
+
+class Usage(CompletionUsage):
+    _cache_creation_input_tokens: int = PrivateAttr(
+        0
+    )  # hidden param for prompt caching. Might change, once openai introduces their equivalent.
+    _cache_read_input_tokens: int = PrivateAttr(
+        0
+    )  # hidden param for prompt caching. Might change, once openai introduces their equivalent.
+
+    def __init__(
+        self,
+        prompt_tokens: Optional[int] = None,
+        completion_tokens: Optional[int] = None,
+        total_tokens: Optional[int] = None,
+        reasoning_tokens: Optional[int] = None,
+        prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None,
+        completion_tokens_details: Optional[
+            Union[CompletionTokensDetailsWrapper, dict]
+        ] = None,
+        **params,
+    ):
+        # handle reasoning_tokens
+        _completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
+        if reasoning_tokens:
+            completion_tokens_details = CompletionTokensDetailsWrapper(
+                reasoning_tokens=reasoning_tokens
+            )
+
+        # Ensure completion_tokens_details is properly handled
+        if completion_tokens_details:
+            if isinstance(completion_tokens_details, dict):
+                _completion_tokens_details = CompletionTokensDetailsWrapper(
+                    **completion_tokens_details
+                )
+            elif isinstance(completion_tokens_details, CompletionTokensDetails):
+                _completion_tokens_details = completion_tokens_details
+
+        ## DEEPSEEK MAPPING ##
+        if "prompt_cache_hit_tokens" in params and isinstance(
+            params["prompt_cache_hit_tokens"], int
+        ):
+            if prompt_tokens_details is None:
+                prompt_tokens_details = PromptTokensDetailsWrapper(
+                    cached_tokens=params["prompt_cache_hit_tokens"]
+                )
+
+        ## ANTHROPIC MAPPING ##
+        if "cache_read_input_tokens" in params and isinstance(
+            params["cache_read_input_tokens"], int
+        ):
+            if prompt_tokens_details is None:
+                prompt_tokens_details = PromptTokensDetailsWrapper(
+                    cached_tokens=params["cache_read_input_tokens"]
+                )
+
+        # handle prompt_tokens_details
+        _prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
+        if prompt_tokens_details:
+            if isinstance(prompt_tokens_details, dict):
+                _prompt_tokens_details = PromptTokensDetailsWrapper(
+                    **prompt_tokens_details
+                )
+            elif isinstance(prompt_tokens_details, PromptTokensDetails):
+                _prompt_tokens_details = prompt_tokens_details
+
+        super().__init__(
+            prompt_tokens=prompt_tokens or 0,
+            completion_tokens=completion_tokens or 0,
+            total_tokens=total_tokens or 0,
+            completion_tokens_details=_completion_tokens_details or None,
+            prompt_tokens_details=_prompt_tokens_details or None,
+        )
+
+        ## ANTHROPIC MAPPING ##
+        if "cache_creation_input_tokens" in params and isinstance(
+            params["cache_creation_input_tokens"], int
+        ):
+            self._cache_creation_input_tokens = params["cache_creation_input_tokens"]
+
+        if "cache_read_input_tokens" in params and isinstance(
+            params["cache_read_input_tokens"], int
+        ):
+            self._cache_read_input_tokens = params["cache_read_input_tokens"]
+
+        ## DEEPSEEK MAPPING ##
+        if "prompt_cache_hit_tokens" in params and isinstance(
+            params["prompt_cache_hit_tokens"], int
+        ):
+            self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]
+
+        for k, v in params.items():
+            setattr(self, k, v)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class StreamingChoices(OpenAIObject):
+    def __init__(
+        self,
+        finish_reason=None,
+        index=0,
+        delta: Optional[Delta] = None,
+        logprobs=None,
+        enhancements=None,
+        **params,
+    ):
+        # Fix Perplexity return both delta and message cause OpenWebUI repect text
+        # https://github.com/BerriAI/litellm/issues/8455
+        params.pop("message", None)
+        super(StreamingChoices, self).__init__(**params)
+        if finish_reason:
+            self.finish_reason = map_finish_reason(finish_reason)
+        else:
+            self.finish_reason = None
+        self.index = index
+        if delta is not None:
+
+            if isinstance(delta, Delta):
+                self.delta = delta
+            elif isinstance(delta, dict):
+                self.delta = Delta(**delta)
+        else:
+            self.delta = Delta()
+        if enhancements is not None:
+            self.enhancements = enhancements
+
+        if logprobs is not None and isinstance(logprobs, dict):
+            self.logprobs = ChoiceLogprobs(**logprobs)
+        else:
+            self.logprobs = logprobs  # type: ignore
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
+    def __init__(self, **kwargs):
+
+        new_choices = []
+        for choice in kwargs["choices"]:
+            new_choice = StreamingChoices(**choice).model_dump()
+            new_choices.append(new_choice)
+        kwargs["choices"] = new_choices
+
+        super().__init__(**kwargs)
+
+
+from openai.types.chat import ChatCompletionChunk
+
+
+class ModelResponseBase(OpenAIObject):
+    id: str
+    """A unique identifier for the completion."""
+
+    created: int
+    """The Unix timestamp (in seconds) of when the completion was created."""
+
+    model: Optional[str] = None
+    """The model used for completion."""
+
+    object: str
+    """The object type, which is always "text_completion" """
+
+    system_fingerprint: Optional[str] = None
+    """This fingerprint represents the backend configuration that the model runs with.
+
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
+
+    _hidden_params: dict = {}
+
+    _response_headers: Optional[dict] = None
+
+
+class ModelResponseStream(ModelResponseBase):
+    choices: List[StreamingChoices]
+    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
+
+    def __init__(
+        self,
+        choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None,
+        id: Optional[str] = None,
+        created: Optional[int] = None,
+        provider_specific_fields: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ):
+        if choices is not None and isinstance(choices, list):
+            new_choices = []
+            for choice in choices:
+                _new_choice = None
+                if isinstance(choice, StreamingChoices):
+                    _new_choice = choice
+                elif isinstance(choice, dict):
+                    _new_choice = StreamingChoices(**choice)
+                elif isinstance(choice, BaseModel):
+                    _new_choice = StreamingChoices(**choice.model_dump())
+                new_choices.append(_new_choice)
+            kwargs["choices"] = new_choices
+        else:
+            kwargs["choices"] = [StreamingChoices()]
+
+        if id is None:
+            id = _generate_id()
+        else:
+            id = id
+        if created is None:
+            created = int(time.time())
+        else:
+            created = created
+
+        if (
+            "usage" in kwargs
+            and kwargs["usage"] is not None
+            and isinstance(kwargs["usage"], dict)
+        ):
+            kwargs["usage"] = Usage(**kwargs["usage"])
+
+        kwargs["id"] = id
+        kwargs["created"] = created
+        kwargs["object"] = "chat.completion.chunk"
+        kwargs["provider_specific_fields"] = provider_specific_fields
+
+        super().__init__(**kwargs)
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class ModelResponse(ModelResponseBase):
+    choices: List[Union[Choices, StreamingChoices]]
+    """The list of completion choices the model generated for the input prompt."""
+
+    def __init__(
+        self,
+        id=None,
+        choices=None,
+        created=None,
+        model=None,
+        object=None,
+        system_fingerprint=None,
+        usage=None,
+        stream=None,
+        stream_options=None,
+        response_ms=None,
+        hidden_params=None,
+        _response_headers=None,
+        **params,
+    ) -> None:
+        if stream is not None and stream is True:
+            object = "chat.completion.chunk"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    _new_choice = None
+                    if isinstance(choice, StreamingChoices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = StreamingChoices(**choice)
+                    elif isinstance(choice, BaseModel):
+                        _new_choice = StreamingChoices(**choice.model_dump())
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [StreamingChoices()]
+        else:
+            object = "chat.completion"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    if isinstance(choice, Choices):
+                        _new_choice = choice  # type: ignore
+                    elif isinstance(choice, dict):
+                        _new_choice = Choices(**choice)  # type: ignore
+                    else:
+                        _new_choice = choice
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [Choices()]
+        if id is None:
+            id = _generate_id()
+        else:
+            id = id
+        if created is None:
+            created = int(time.time())
+        else:
+            created = created
+        model = model
+        if usage is not None:
+            if isinstance(usage, dict):
+                usage = Usage(**usage)
+            else:
+                usage = usage
+        elif stream is None or stream is False:
+            usage = Usage()
+        if hidden_params:
+            self._hidden_params = hidden_params
+
+        if _response_headers:
+            self._response_headers = _response_headers
+
+        init_values = {
+            "id": id,
+            "choices": choices,
+            "created": created,
+            "model": model,
+            "object": object,
+            "system_fingerprint": system_fingerprint,
+        }
+
+        if usage is not None:
+            init_values["usage"] = usage
+
+        super().__init__(
+            **init_values,
+            **params,
+        )
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Embedding(OpenAIObject):
+    embedding: Union[list, str] = []
+    index: int
+    object: Literal["embedding"]
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+class EmbeddingResponse(OpenAIObject):
+    model: Optional[str] = None
+    """The model used for embedding."""
+
+    data: List
+    """The actual embedding value"""
+
+    object: Literal["list"]
+    """The object type, which is always "list" """
+
+    usage: Optional[Usage] = None
+    """Usage statistics for the embedding request."""
+
+    _hidden_params: dict = {}
+    _response_headers: Optional[Dict] = None
+    _response_ms: Optional[float] = None
+
+    def __init__(
+        self,
+        model: Optional[str] = None,
+        usage: Optional[Usage] = None,
+        response_ms=None,
+        data: Optional[Union[List, List[Embedding]]] = None,
+        hidden_params=None,
+        _response_headers=None,
+        **params,
+    ):
+        object = "list"
+        if response_ms:
+            _response_ms = response_ms
+        else:
+            _response_ms = None
+        if data:
+            data = data
+        else:
+            data = []
+
+        if usage:
+            usage = usage
+        else:
+            usage = Usage()
+
+        if _response_headers:
+            self._response_headers = _response_headers
+
+        model = model
+        super().__init__(model=model, object=object, data=data, usage=usage)  # type: ignore
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class Logprobs(OpenAIObject):
+    text_offset: Optional[List[int]]
+    token_logprobs: Optional[List[Union[float, None]]]
+    tokens: Optional[List[str]]
+    top_logprobs: Optional[List[Union[Dict[str, float], None]]]
+
+
+class TextChoices(OpenAIObject):
+    def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
+        super(TextChoices, self).__init__(**params)
+        if finish_reason:
+            self.finish_reason = map_finish_reason(finish_reason)
+        else:
+            self.finish_reason = None
+        self.index = index
+        if text is not None:
+            self.text = text
+        else:
+            self.text = None
+        if logprobs is None:
+            self.logprobs = None
+        else:
+            if isinstance(logprobs, dict):
+                self.logprobs = Logprobs(**logprobs)
+            else:
+                self.logprobs = logprobs
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class TextCompletionResponse(OpenAIObject):
+    """
+    {
+        "id": response["id"],
+        "object": "text_completion",
+        "created": response["created"],
+        "model": response["model"],
+        "choices": [
+        {
+            "text": response["choices"][0]["message"]["content"],
+            "index": response["choices"][0]["index"],
+            "logprobs": transformed_logprobs,
+            "finish_reason": response["choices"][0]["finish_reason"]
+        }
+        ],
+        "usage": response["usage"]
+    }
+    """
+
+    id: str
+    object: str
+    created: int
+    model: Optional[str]
+    choices: List[TextChoices]
+    usage: Optional[Usage]
+    _response_ms: Optional[int] = None
+    _hidden_params: HiddenParams
+
+    def __init__(
+        self,
+        id=None,
+        choices=None,
+        created=None,
+        model=None,
+        usage=None,
+        stream=False,
+        response_ms=None,
+        object=None,
+        **params,
+    ):
+        if stream:
+            object = "text_completion.chunk"
+            choices = [TextChoices()]
+        else:
+            object = "text_completion"
+            if choices is not None and isinstance(choices, list):
+                new_choices = []
+                for choice in choices:
+                    _new_choice = None
+                    if isinstance(choice, TextChoices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = TextChoices(**choice)
+                    new_choices.append(_new_choice)
+                choices = new_choices
+            else:
+                choices = [TextChoices()]
+        if object is not None:
+            object = object
+        if id is None:
+            id = _generate_id()
+        else:
+            id = id
+        if created is None:
+            created = int(time.time())
+        else:
+            created = created
+
+        model = model
+        if usage:
+            usage = usage
+        else:
+            usage = Usage()
+
+        super(TextCompletionResponse, self).__init__(
+            id=id,  # type: ignore
+            object=object,  # type: ignore
+            created=created,  # type: ignore
+            model=model,  # type: ignore
+            choices=choices,  # type: ignore
+            usage=usage,  # type: ignore
+            **params,
+        )
+
+        if response_ms:
+            self._response_ms = response_ms
+        else:
+            self._response_ms = None
+        self._hidden_params = HiddenParams()
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+
+from openai.types.images_response import Image as OpenAIImage
+
+
+class ImageObject(OpenAIImage):
+    """
+    Represents the url or the content of an image generated by the OpenAI API.
+
+    Attributes:
+    b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
+    url: The URL of the generated image, if response_format is url (default).
+    revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
+
+    https://platform.openai.com/docs/api-reference/images/object
+    """
+
+    b64_json: Optional[str] = None
+    url: Optional[str] = None
+    revised_prompt: Optional[str] = None
+
+    def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs):
+        super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)  # type: ignore
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+from openai.types.images_response import ImagesResponse as OpenAIImageResponse
+
+
+class ImageResponse(OpenAIImageResponse):
+    _hidden_params: dict = {}
+    usage: Usage
+
+    def __init__(
+        self,
+        created: Optional[int] = None,
+        data: Optional[List[ImageObject]] = None,
+        response_ms=None,
+        usage: Optional[Usage] = None,
+        hidden_params: Optional[dict] = None,
+    ):
+        if response_ms:
+            _response_ms = response_ms
+        else:
+            _response_ms = None
+        if data:
+            data = data
+        else:
+            data = []
+
+        if created:
+            created = created
+        else:
+            created = int(time.time())
+
+        _data: List[OpenAIImage] = []
+        for d in data:
+            if isinstance(d, dict):
+                _data.append(ImageObject(**d))
+            elif isinstance(d, BaseModel):
+                _data.append(ImageObject(**d.model_dump()))
+        _usage = usage or Usage(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+        )
+        super().__init__(created=created, data=_data, usage=_usage)  # type: ignore
+        self._hidden_params = hidden_params or {}
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class TranscriptionResponse(OpenAIObject):
+    text: Optional[str] = None
+
+    _hidden_params: dict = {}
+    _response_headers: Optional[dict] = None
+
+    def __init__(self, text=None):
+        super().__init__(text=text)  # type: ignore
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        # Allow dictionary-style assignment of attributes
+        setattr(self, key, value)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class GenericImageParsingChunk(TypedDict):
+    type: str
+    media_type: str
+    data: str
+
+
+class ResponseFormatChunk(TypedDict, total=False):
+    type: Required[Literal["json_object", "text"]]
+    response_schema: dict
+
+
+class LoggedLiteLLMParams(TypedDict, total=False):
+    force_timeout: Optional[float]
+    custom_llm_provider: Optional[str]
+    api_base: Optional[str]
+    litellm_call_id: Optional[str]
+    model_alias_map: Optional[dict]
+    metadata: Optional[dict]
+    model_info: Optional[dict]
+    proxy_server_request: Optional[dict]
+    acompletion: Optional[bool]
+    preset_cache_key: Optional[str]
+    no_log: Optional[bool]
+    input_cost_per_second: Optional[float]
+    input_cost_per_token: Optional[float]
+    output_cost_per_token: Optional[float]
+    output_cost_per_second: Optional[float]
+    cooldown_time: Optional[float]
+
+
+class AdapterCompletionStreamWrapper:
+    def __init__(self, completion_stream):
+        self.completion_stream = completion_stream
+
+    def __iter__(self):
+        return self
+
+    def __aiter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            for chunk in self.completion_stream:
+                if chunk == "None" or chunk is None:
+                    raise Exception
+                return chunk
+            raise StopIteration
+        except StopIteration:
+            raise StopIteration
+        except Exception as e:
+            print(f"AdapterCompletionStreamWrapper - {e}")  # noqa
+
+    async def __anext__(self):
+        try:
+            async for chunk in self.completion_stream:
+                if chunk == "None" or chunk is None:
+                    raise Exception
+                return chunk
+            raise StopIteration
+        except StopIteration:
+            raise StopAsyncIteration
+
+
+class StandardLoggingUserAPIKeyMetadata(TypedDict):
+    user_api_key_hash: Optional[str]  # hash of the litellm virtual key used
+    user_api_key_alias: Optional[str]
+    user_api_key_org_id: Optional[str]
+    user_api_key_team_id: Optional[str]
+    user_api_key_user_id: Optional[str]
+    user_api_key_user_email: Optional[str]
+    user_api_key_team_alias: Optional[str]
+    user_api_key_end_user_id: Optional[str]
+
+
+class StandardLoggingPromptManagementMetadata(TypedDict):
+    prompt_id: str
+    prompt_variables: Optional[dict]
+    prompt_integration: str
+
+
+class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
+    """
+    Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management
+    """
+
+    spend_logs_metadata: Optional[
+        dict
+    ]  # special param to log k,v pairs to spendlogs for a call
+    requester_ip_address: Optional[str]
+    requester_metadata: Optional[dict]
+    prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
+    applied_guardrails: Optional[List[str]]
+
+
+class StandardLoggingAdditionalHeaders(TypedDict, total=False):
+    x_ratelimit_limit_requests: int
+    x_ratelimit_limit_tokens: int
+    x_ratelimit_remaining_requests: int
+    x_ratelimit_remaining_tokens: int
+
+
+class StandardLoggingHiddenParams(TypedDict):
+    model_id: Optional[
+        str
+    ]  # id of the model in the router, separates multiple models with the same name but different credentials
+    cache_key: Optional[str]
+    api_base: Optional[str]
+    response_cost: Optional[str]
+    litellm_overhead_time_ms: Optional[float]
+    additional_headers: Optional[StandardLoggingAdditionalHeaders]
+    batch_models: Optional[List[str]]
+    litellm_model_name: Optional[str]  # the model name sent to the provider by litellm
+
+
+class StandardLoggingModelInformation(TypedDict):
+    model_map_key: str
+    model_map_value: Optional[ModelInfo]
+
+
+class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False):
+    """
+    Debug information, if cost tracking fails.
+
+    Avoid logging sensitive information like response or optional params
+    """
+
+    error_str: Required[str]
+    traceback_str: Required[str]
+    model: str
+    cache_hit: Optional[bool]
+    custom_llm_provider: Optional[str]
+    base_model: Optional[str]
+    call_type: str
+    custom_pricing: Optional[bool]
+
+
+class StandardLoggingPayloadErrorInformation(TypedDict, total=False):
+    error_code: Optional[str]
+    error_class: Optional[str]
+    llm_provider: Optional[str]
+    traceback: Optional[str]
+    error_message: Optional[str]
+
+
+class StandardLoggingGuardrailInformation(TypedDict, total=False):
+    guardrail_name: Optional[str]
+    guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]]
+    guardrail_response: Optional[Union[dict, str]]
+    guardrail_status: Literal["success", "failure"]
+
+
+StandardLoggingPayloadStatus = Literal["success", "failure"]
+
+
+class StandardLoggingPayload(TypedDict):
+    id: str
+    trace_id: str  # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
+    call_type: str
+    stream: Optional[bool]
+    response_cost: float
+    response_cost_failure_debug_info: Optional[
+        StandardLoggingModelCostFailureDebugInformation
+    ]
+    status: StandardLoggingPayloadStatus
+    custom_llm_provider: Optional[str]
+    total_tokens: int
+    prompt_tokens: int
+    completion_tokens: int
+    startTime: float  # Note: making this camelCase was a mistake, everything should be snake case
+    endTime: float
+    completionStartTime: float
+    response_time: float
+    model_map_information: StandardLoggingModelInformation
+    model: str
+    model_id: Optional[str]
+    model_group: Optional[str]
+    api_base: str
+    metadata: StandardLoggingMetadata
+    cache_hit: Optional[bool]
+    cache_key: Optional[str]
+    saved_cache_cost: float
+    request_tags: list
+    end_user: Optional[str]
+    requester_ip_address: Optional[str]
+    messages: Optional[Union[str, list, dict]]
+    response: Optional[Union[str, list, dict]]
+    error_str: Optional[str]
+    error_information: Optional[StandardLoggingPayloadErrorInformation]
+    model_parameters: dict
+    hidden_params: StandardLoggingHiddenParams
+    guardrail_information: Optional[StandardLoggingGuardrailInformation]
+
+
+from typing import AsyncIterator, Iterator
+
+
+class CustomStreamingDecoder:
+    async def aiter_bytes(
+        self, iterator: AsyncIterator[bytes]
+    ) -> AsyncIterator[
+        Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]
+    ]:
+        raise NotImplementedError
+
+    def iter_bytes(
+        self, iterator: Iterator[bytes]
+    ) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]:
+        raise NotImplementedError
+
+
+class StandardPassThroughResponseObject(TypedDict):
+    response: str
+
+
+OPENAI_RESPONSE_HEADERS = [
+    "x-ratelimit-remaining-requests",
+    "x-ratelimit-remaining-tokens",
+    "x-ratelimit-limit-requests",
+    "x-ratelimit-limit-tokens",
+    "x-ratelimit-reset-requests",
+    "x-ratelimit-reset-tokens",
+]
+
+
+class StandardCallbackDynamicParams(TypedDict, total=False):
+    # Langfuse dynamic params
+    langfuse_public_key: Optional[str]
+    langfuse_secret: Optional[str]
+    langfuse_secret_key: Optional[str]
+    langfuse_host: Optional[str]
+
+    # GCS dynamic params
+    gcs_bucket_name: Optional[str]
+    gcs_path_service_account: Optional[str]
+
+    # Langsmith dynamic params
+    langsmith_api_key: Optional[str]
+    langsmith_project: Optional[str]
+    langsmith_base_url: Optional[str]
+
+    # Humanloop dynamic params
+    humanloop_api_key: Optional[str]
+
+    # Arize dynamic params
+    arize_api_key: Optional[str]
+    arize_space_key: Optional[str]
+
+    # Logging settings
+    turn_off_message_logging: Optional[bool]  # when true will not log messages
+
+
+all_litellm_params = [
+    "metadata",
+    "litellm_metadata",
+    "litellm_trace_id",
+    "tags",
+    "acompletion",
+    "aimg_generation",
+    "atext_completion",
+    "text_completion",
+    "caching",
+    "mock_response",
+    "mock_timeout",
+    "disable_add_transform_inline_image_block",
+    "api_key",
+    "api_version",
+    "prompt_id",
+    "provider_specific_header",
+    "prompt_variables",
+    "api_base",
+    "force_timeout",
+    "logger_fn",
+    "verbose",
+    "custom_llm_provider",
+    "litellm_logging_obj",
+    "litellm_call_id",
+    "use_client",
+    "id",
+    "fallbacks",
+    "azure",
+    "headers",
+    "model_list",
+    "num_retries",
+    "context_window_fallback_dict",
+    "retry_policy",
+    "retry_strategy",
+    "roles",
+    "final_prompt_value",
+    "bos_token",
+    "eos_token",
+    "request_timeout",
+    "complete_response",
+    "self",
+    "client",
+    "rpm",
+    "tpm",
+    "max_parallel_requests",
+    "input_cost_per_token",
+    "output_cost_per_token",
+    "input_cost_per_second",
+    "output_cost_per_second",
+    "hf_model_name",
+    "model_info",
+    "proxy_server_request",
+    "preset_cache_key",
+    "caching_groups",
+    "ttl",
+    "cache",
+    "no-log",
+    "base_model",
+    "stream_timeout",
+    "supports_system_message",
+    "region_name",
+    "allowed_model_region",
+    "model_config",
+    "fastest_response",
+    "cooldown_time",
+    "cache_key",
+    "max_retries",
+    "azure_ad_token_provider",
+    "tenant_id",
+    "client_id",
+    "azure_username",
+    "azure_password",
+    "client_secret",
+    "user_continue_message",
+    "configurable_clientside_auth_params",
+    "weight",
+    "ensure_alternating_roles",
+    "assistant_continue_message",
+    "user_continue_message",
+    "fallback_depth",
+    "max_fallbacks",
+    "max_budget",
+    "budget_duration",
+    "use_in_pass_through",
+    "merge_reasoning_content_in_choices",
+    "litellm_credential_name",
+] + list(StandardCallbackDynamicParams.__annotations__.keys())
+
+
+class KeyGenerationConfig(TypedDict, total=False):
+    required_params: List[
+        str
+    ]  # specify params that must be present in the key generation request
+
+
+class TeamUIKeyGenerationConfig(KeyGenerationConfig):
+    allowed_team_member_roles: List[str]
+
+
+class PersonalUIKeyGenerationConfig(KeyGenerationConfig):
+    allowed_user_roles: List[str]
+
+
+class StandardKeyGenerationConfig(TypedDict, total=False):
+    team_key_generation: TeamUIKeyGenerationConfig
+    personal_key_generation: PersonalUIKeyGenerationConfig
+
+
+class BudgetConfig(BaseModel):
+    max_budget: Optional[float] = None
+    budget_duration: Optional[str] = None
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None
+
+    def __init__(self, **data: Any) -> None:
+        # Map time_period to budget_duration if present
+        if "time_period" in data:
+            data["budget_duration"] = data.pop("time_period")
+
+        # Map budget_limit to max_budget if present
+        if "budget_limit" in data:
+            data["max_budget"] = data.pop("budget_limit")
+
+        super().__init__(**data)
+
+
+GenericBudgetConfigType = Dict[str, BudgetConfig]
+
+
+class LlmProviders(str, Enum):
+    OPENAI = "openai"
+    OPENAI_LIKE = "openai_like"  # embedding only
+    JINA_AI = "jina_ai"
+    XAI = "xai"
+    CUSTOM_OPENAI = "custom_openai"
+    TEXT_COMPLETION_OPENAI = "text-completion-openai"
+    COHERE = "cohere"
+    COHERE_CHAT = "cohere_chat"
+    CLARIFAI = "clarifai"
+    ANTHROPIC = "anthropic"
+    ANTHROPIC_TEXT = "anthropic_text"
+    REPLICATE = "replicate"
+    HUGGINGFACE = "huggingface"
+    TOGETHER_AI = "together_ai"
+    OPENROUTER = "openrouter"
+    VERTEX_AI = "vertex_ai"
+    VERTEX_AI_BETA = "vertex_ai_beta"
+    GEMINI = "gemini"
+    AI21 = "ai21"
+    BASETEN = "baseten"
+    AZURE = "azure"
+    AZURE_TEXT = "azure_text"
+    AZURE_AI = "azure_ai"
+    SAGEMAKER = "sagemaker"
+    SAGEMAKER_CHAT = "sagemaker_chat"
+    BEDROCK = "bedrock"
+    VLLM = "vllm"
+    NLP_CLOUD = "nlp_cloud"
+    PETALS = "petals"
+    OOBABOOGA = "oobabooga"
+    OLLAMA = "ollama"
+    OLLAMA_CHAT = "ollama_chat"
+    DEEPINFRA = "deepinfra"
+    PERPLEXITY = "perplexity"
+    MISTRAL = "mistral"
+    GROQ = "groq"
+    NVIDIA_NIM = "nvidia_nim"
+    CEREBRAS = "cerebras"
+    AI21_CHAT = "ai21_chat"
+    VOLCENGINE = "volcengine"
+    CODESTRAL = "codestral"
+    TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
+    DEEPSEEK = "deepseek"
+    SAMBANOVA = "sambanova"
+    MARITALK = "maritalk"
+    VOYAGE = "voyage"
+    CLOUDFLARE = "cloudflare"
+    XINFERENCE = "xinference"
+    FIREWORKS_AI = "fireworks_ai"
+    FRIENDLIAI = "friendliai"
+    WATSONX = "watsonx"
+    WATSONX_TEXT = "watsonx_text"
+    TRITON = "triton"
+    PREDIBASE = "predibase"
+    DATABRICKS = "databricks"
+    EMPOWER = "empower"
+    GITHUB = "github"
+    CUSTOM = "custom"
+    LITELLM_PROXY = "litellm_proxy"
+    HOSTED_VLLM = "hosted_vllm"
+    LM_STUDIO = "lm_studio"
+    GALADRIEL = "galadriel"
+    INFINITY = "infinity"
+    DEEPGRAM = "deepgram"
+    AIOHTTP_OPENAI = "aiohttp_openai"
+    LANGFUSE = "langfuse"
+    HUMANLOOP = "humanloop"
+    TOPAZ = "topaz"
+    ASSEMBLYAI = "assemblyai"
+    SNOWFLAKE = "snowflake"
+
+
+# Create a set of all provider values for quick lookup
+LlmProvidersSet = {provider.value for provider in LlmProviders}
+
+
+class LiteLLMLoggingBaseClass:
+    """
+    Base class for logging pre and post call
+
+    Meant to simplify type checking for logging obj.
+    """
+
+    def pre_call(self, input, api_key, model=None, additional_args={}):
+        pass
+
+    def post_call(
+        self, original_response, input=None, api_key=None, additional_args={}
+    ):
+        pass
+
+
+class CustomHuggingfaceTokenizer(TypedDict):
+    identifier: str
+    revision: str  # usually 'main'
+    auth_token: Optional[str]
+
+
+class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum):
+    """
+    Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint.
+    """
+
+    OPENAI = LlmProviders.OPENAI.value
+    TOPAZ = LlmProviders.TOPAZ.value
+
+
+class HttpHandlerRequestFields(TypedDict, total=False):
+    data: dict  # request body
+    params: dict  # query params
+    files: dict  # file uploads
+    content: Any  # raw content
+
+
+class ProviderSpecificHeader(TypedDict):
+    custom_llm_provider: str
+    extra_headers: dict
+
+
+class SelectTokenizerResponse(TypedDict):
+    type: Literal["openai_tokenizer", "huggingface_tokenizer"]
+    tokenizer: Any
+
+
+class LiteLLMBatch(Batch):
+    _hidden_params: dict = {}
+    usage: Optional[Usage] = None
+
+    def __contains__(self, key):
+        # Define custom behavior for the 'in' operator
+        return hasattr(self, key)
+
+    def get(self, key, default=None):
+        # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+        return getattr(self, key, default)
+
+    def __getitem__(self, key):
+        # Allow dictionary-style access to attributes
+        return getattr(self, key)
+
+    def json(self, **kwargs):  # type: ignore
+        try:
+            return self.model_dump()  # noqa
+        except Exception:
+            # if using pydantic v1
+            return self.dict()
+
+
+class RawRequestTypedDict(TypedDict, total=False):
+    raw_request_api_base: Optional[str]
+    raw_request_body: Optional[dict]
+    raw_request_headers: Optional[dict]
+    error: Optional[str]
+
+
+class CredentialBase(BaseModel):
+    credential_name: str
+    credential_info: dict
+
+
+class CredentialItem(CredentialBase):
+    credential_values: dict
+
+
+class CreateCredentialItem(CredentialBase):
+    credential_values: Optional[dict] = None
+    model_id: Optional[str] = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_credential_params(cls, values):
+        if not values.get("credential_values") and not values.get("model_id"):
+            raise ValueError("Either credential_values or model_id must be set")
+        return values