aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/litellm/types
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/types
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/types')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/adapter.py10
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/caching.py78
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/completion.py193
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/embedding.py21
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/files.py283
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py5
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/guardrails.py168
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py21
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py15
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py9
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py6
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py29
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py54
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py28
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py7
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py61
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py62
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py294
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py186
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py366
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py17
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py503
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py46
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py24
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py10
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py21
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py12
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py29
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py1040
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py19
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py486
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py33
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py20
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/rerank.py78
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/router.py707
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/services.py39
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/types/utils.py2081
37 files changed, 7061 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/adapter.py b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py
new file mode 100644
index 00000000..2995cfbc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from typing_extensions import Dict, Required, TypedDict, override
+
+from litellm.integrations.custom_logger import CustomLogger
+
+
+class AdapterItem(TypedDict):
+ id: str
+ adapter: CustomLogger
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/caching.py b/.venv/lib/python3.12/site-packages/litellm/types/caching.py
new file mode 100644
index 00000000..c15d4d15
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/caching.py
@@ -0,0 +1,78 @@
+from enum import Enum
+from typing import Any, Dict, Literal, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+
+
+class LiteLLMCacheType(str, Enum):
+ LOCAL = "local"
+ REDIS = "redis"
+ REDIS_SEMANTIC = "redis-semantic"
+ S3 = "s3"
+ DISK = "disk"
+ QDRANT_SEMANTIC = "qdrant-semantic"
+
+
+CachingSupportedCallTypes = Literal[
+ "completion",
+ "acompletion",
+ "embedding",
+ "aembedding",
+ "atranscription",
+ "transcription",
+ "atext_completion",
+ "text_completion",
+ "arerank",
+ "rerank",
+]
+
+
+class RedisPipelineIncrementOperation(TypedDict):
+ """
+ TypeDict for 1 Redis Pipeline Increment Operation
+ """
+
+ key: str
+ increment_value: float
+ ttl: Optional[int]
+
+
+DynamicCacheControl = TypedDict(
+ "DynamicCacheControl",
+ {
+ # Will cache the response for the user-defined amount of time (in seconds).
+ "ttl": Optional[int],
+ # Namespace to use for caching
+ "namespace": Optional[str],
+ # Max Age to use for caching
+ "s-maxage": Optional[int],
+ "s-max-age": Optional[int],
+ # Will not return a cached response, but instead call the actual endpoint.
+ "no-cache": Optional[bool],
+ # Will not store the response in the cache.
+ "no-store": Optional[bool],
+ },
+)
+
+
+class CachePingResponse(BaseModel):
+ status: str
+ cache_type: str
+ ping_response: Optional[bool] = None
+ set_cache_response: Optional[str] = None
+ litellm_cache_params: Optional[str] = None
+
+ # intentionally a dict, since we run masker.mask_dict() on HealthCheckCacheParams
+ health_check_cache_params: Optional[dict] = None
+
+
+class HealthCheckCacheParams(BaseModel):
+ """
+ Cache Params returned on /cache/ping call
+ """
+
+ host: Optional[str] = None
+ port: Optional[Union[str, int]] = None
+ redis_kwargs: Optional[Dict[str, Any]] = None
+ namespace: Optional[str] = None
+ redis_version: Optional[Union[str, int, float]] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/completion.py b/.venv/lib/python3.12/site-packages/litellm/types/completion.py
new file mode 100644
index 00000000..b06bb733
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/completion.py
@@ -0,0 +1,193 @@
+from typing import Iterable, List, Optional, Union
+
+from pydantic import BaseModel, ConfigDict
+from typing_extensions import Literal, Required, TypedDict
+
+
+class ChatCompletionSystemMessageParam(TypedDict, total=False):
+ content: Required[str]
+ """The contents of the system message."""
+
+ role: Required[Literal["system"]]
+ """The role of the messages author, in this case `system`."""
+
+ name: str
+ """An optional name for the participant.
+
+ Provides the model information to differentiate between participants of the same
+ role.
+ """
+
+
+class ChatCompletionContentPartTextParam(TypedDict, total=False):
+ text: Required[str]
+ """The text content."""
+
+ type: Required[Literal["text"]]
+ """The type of the content part."""
+
+
+class ImageURL(TypedDict, total=False):
+ url: Required[str]
+ """Either a URL of the image or the base64 encoded image data."""
+
+ detail: Literal["auto", "low", "high"]
+ """Specifies the detail level of the image.
+
+ Learn more in the
+ [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+ """
+
+
+class ChatCompletionContentPartImageParam(TypedDict, total=False):
+ image_url: Required[ImageURL]
+
+ type: Required[Literal["image_url"]]
+ """The type of the content part."""
+
+
+ChatCompletionContentPartParam = Union[
+ ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam
+]
+
+
+class ChatCompletionUserMessageParam(TypedDict, total=False):
+ content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+ """The contents of the user message."""
+
+ role: Required[Literal["user"]]
+ """The role of the messages author, in this case `user`."""
+
+ name: str
+ """An optional name for the participant.
+
+ Provides the model information to differentiate between participants of the same
+ role.
+ """
+
+
+class FunctionCall(TypedDict, total=False):
+ arguments: Required[str]
+ """
+ The arguments to call the function with, as generated by the model in JSON
+ format. Note that the model does not always generate valid JSON, and may
+ hallucinate parameters not defined by your function schema. Validate the
+ arguments in your code before calling your function.
+ """
+
+ name: Required[str]
+ """The name of the function to call."""
+
+
+class Function(TypedDict, total=False):
+ arguments: Required[str]
+ """
+ The arguments to call the function with, as generated by the model in JSON
+ format. Note that the model does not always generate valid JSON, and may
+ hallucinate parameters not defined by your function schema. Validate the
+ arguments in your code before calling your function.
+ """
+
+ name: Required[str]
+ """The name of the function to call."""
+
+
+class ChatCompletionToolMessageParam(TypedDict, total=False):
+ content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+ """The contents of the tool message."""
+
+ role: Required[Literal["tool"]]
+ """The role of the messages author, in this case `tool`."""
+
+ tool_call_id: Required[str]
+ """Tool call that this message is responding to."""
+
+
+class ChatCompletionFunctionMessageParam(TypedDict, total=False):
+ content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+ """The contents of the function message."""
+
+ name: Required[str]
+ """The name of the function to call."""
+
+ role: Required[Literal["function"]]
+ """The role of the messages author, in this case `function`."""
+
+
+class ChatCompletionMessageToolCallParam(TypedDict, total=False):
+ id: Required[str]
+ """The ID of the tool call."""
+
+ function: Required[Function]
+ """The function that the model called."""
+
+ type: Required[Literal["function"]]
+ """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChatCompletionAssistantMessageParam(TypedDict, total=False):
+ role: Required[Literal["assistant"]]
+ """The role of the messages author, in this case `assistant`."""
+
+ content: Optional[str]
+ """The contents of the assistant message.
+
+ Required unless `tool_calls` or `function_call` is specified.
+ """
+
+ function_call: FunctionCall
+ """Deprecated and replaced by `tool_calls`.
+
+ The name and arguments of a function that should be called, as generated by the
+ model.
+ """
+
+ name: str
+ """An optional name for the participant.
+
+ Provides the model information to differentiate between participants of the same
+ role.
+ """
+
+ tool_calls: Iterable[ChatCompletionMessageToolCallParam]
+ """The tool calls generated by the model, such as function calls."""
+
+
+ChatCompletionMessageParam = Union[
+ ChatCompletionSystemMessageParam,
+ ChatCompletionUserMessageParam,
+ ChatCompletionAssistantMessageParam,
+ ChatCompletionFunctionMessageParam,
+ ChatCompletionToolMessageParam,
+]
+
+
+class CompletionRequest(BaseModel):
+ model: str
+ messages: List[str] = []
+ timeout: Optional[Union[float, int]] = None
+ temperature: Optional[float] = None
+ top_p: Optional[float] = None
+ n: Optional[int] = None
+ stream: Optional[bool] = None
+ stop: Optional[dict] = None
+ max_tokens: Optional[int] = None
+ presence_penalty: Optional[float] = None
+ frequency_penalty: Optional[float] = None
+ logit_bias: Optional[dict] = None
+ user: Optional[str] = None
+ response_format: Optional[dict] = None
+ seed: Optional[int] = None
+ tools: Optional[List[str]] = None
+ tool_choice: Optional[str] = None
+ logprobs: Optional[bool] = None
+ top_logprobs: Optional[int] = None
+ deployment_id: Optional[str] = None
+ functions: Optional[List[str]] = None
+ function_call: Optional[str] = None
+ base_url: Optional[str] = None
+ api_version: Optional[str] = None
+ api_key: Optional[str] = None
+ model_list: Optional[List[str]] = None
+
+ model_config = ConfigDict(protected_namespaces=(), extra="allow")
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/embedding.py b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py
new file mode 100644
index 00000000..f8fdebc5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py
@@ -0,0 +1,21 @@
+from typing import List, Optional, Union
+
+from pydantic import BaseModel, ConfigDict
+
+
+class EmbeddingRequest(BaseModel):
+ model: str
+ input: List[str] = []
+ timeout: int = 600
+ api_base: Optional[str] = None
+ api_version: Optional[str] = None
+ api_key: Optional[str] = None
+ api_type: Optional[str] = None
+ caching: bool = False
+ user: Optional[str] = None
+ custom_llm_provider: Optional[Union[str, dict]] = None
+ litellm_call_id: Optional[str] = None
+ litellm_logging_obj: Optional[dict] = None
+ logger_fn: Optional[str] = None
+
+ model_config = ConfigDict(extra="allow")
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/files.py b/.venv/lib/python3.12/site-packages/litellm/types/files.py
new file mode 100644
index 00000000..600ad806
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/files.py
@@ -0,0 +1,283 @@
+from enum import Enum
+from types import MappingProxyType
+from typing import List, Set, Mapping
+
+"""
+Base Enums/Consts
+"""
+
+
+class FileType(Enum):
+ AAC = "AAC"
+ CSV = "CSV"
+ DOC = "DOC"
+ DOCX = "DOCX"
+ FLAC = "FLAC"
+ FLV = "FLV"
+ GIF = "GIF"
+ GOOGLE_DOC = "GOOGLE_DOC"
+ GOOGLE_DRAWINGS = "GOOGLE_DRAWINGS"
+ GOOGLE_SHEETS = "GOOGLE_SHEETS"
+ GOOGLE_SLIDES = "GOOGLE_SLIDES"
+ HEIC = "HEIC"
+ HEIF = "HEIF"
+ HTML = "HTML"
+ JPEG = "JPEG"
+ JSON = "JSON"
+ M4A = "M4A"
+ M4V = "M4V"
+ MOV = "MOV"
+ MP3 = "MP3"
+ MP4 = "MP4"
+ MPEG = "MPEG"
+ MPEGPS = "MPEGPS"
+ MPG = "MPG"
+ MPA = "MPA"
+ MPGA = "MPGA"
+ OGG = "OGG"
+ OPUS = "OPUS"
+ PDF = "PDF"
+ PCM = "PCM"
+ PNG = "PNG"
+ PPT = "PPT"
+ PPTX = "PPTX"
+ RTF = "RTF"
+ THREE_GPP = "3GPP"
+ TXT = "TXT"
+ WAV = "WAV"
+ WEBM = "WEBM"
+ WEBP = "WEBP"
+ WMV = "WMV"
+ XLS = "XLS"
+ XLSX = "XLSX"
+
+
+FILE_EXTENSIONS: Mapping[FileType, List[str]] = MappingProxyType(
+ {
+ FileType.AAC: ["aac"],
+ FileType.CSV: ["csv"],
+ FileType.DOC: ["doc"],
+ FileType.DOCX: ["docx"],
+ FileType.FLAC: ["flac"],
+ FileType.FLV: ["flv"],
+ FileType.GIF: ["gif"],
+ FileType.GOOGLE_DOC: ["gdoc"],
+ FileType.GOOGLE_DRAWINGS: ["gdraw"],
+ FileType.GOOGLE_SHEETS: ["gsheet"],
+ FileType.GOOGLE_SLIDES: ["gslides"],
+ FileType.HEIC: ["heic"],
+ FileType.HEIF: ["heif"],
+ FileType.HTML: ["html", "htm"],
+ FileType.JPEG: ["jpeg", "jpg"],
+ FileType.JSON: ["json"],
+ FileType.M4A: ["m4a"],
+ FileType.M4V: ["m4v"],
+ FileType.MOV: ["mov"],
+ FileType.MP3: ["mp3"],
+ FileType.MP4: ["mp4"],
+ FileType.MPEG: ["mpeg"],
+ FileType.MPEGPS: ["mpegps"],
+ FileType.MPG: ["mpg"],
+ FileType.MPA: ["mpa"],
+ FileType.MPGA: ["mpga"],
+ FileType.OGG: ["ogg"],
+ FileType.OPUS: ["opus"],
+ FileType.PDF: ["pdf"],
+ FileType.PCM: ["pcm"],
+ FileType.PNG: ["png"],
+ FileType.PPT: ["ppt"],
+ FileType.PPTX: ["pptx"],
+ FileType.RTF: ["rtf"],
+ FileType.THREE_GPP: ["3gpp"],
+ FileType.TXT: ["txt"],
+ FileType.WAV: ["wav"],
+ FileType.WEBM: ["webm"],
+ FileType.WEBP: ["webp"],
+ FileType.WMV: ["wmv"],
+ FileType.XLS: ["xls"],
+ FileType.XLSX: ["xlsx"],
+ }
+)
+
+FILE_MIME_TYPES: Mapping[FileType, str] = MappingProxyType(
+ {
+ FileType.AAC: "audio/aac",
+ FileType.CSV: "text/csv",
+ FileType.DOC: "application/msword",
+ FileType.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ FileType.FLAC: "audio/flac",
+ FileType.FLV: "video/x-flv",
+ FileType.GIF: "image/gif",
+ FileType.GOOGLE_DOC: "application/vnd.google-apps.document",
+ FileType.GOOGLE_DRAWINGS: "application/vnd.google-apps.drawing",
+ FileType.GOOGLE_SHEETS: "application/vnd.google-apps.spreadsheet",
+ FileType.GOOGLE_SLIDES: "application/vnd.google-apps.presentation",
+ FileType.HEIC: "image/heic",
+ FileType.HEIF: "image/heif",
+ FileType.HTML: "text/html",
+ FileType.JPEG: "image/jpeg",
+ FileType.JSON: "application/json",
+ FileType.M4A: "audio/x-m4a",
+ FileType.M4V: "video/x-m4v",
+ FileType.MOV: "video/quicktime",
+ FileType.MP3: "audio/mpeg",
+ FileType.MP4: "video/mp4",
+ FileType.MPEG: "video/mpeg",
+ FileType.MPEGPS: "video/mpegps",
+ FileType.MPG: "video/mpg",
+ FileType.MPA: "audio/m4a",
+ FileType.MPGA: "audio/mpga",
+ FileType.OGG: "audio/ogg",
+ FileType.OPUS: "audio/opus",
+ FileType.PDF: "application/pdf",
+ FileType.PCM: "audio/pcm",
+ FileType.PNG: "image/png",
+ FileType.PPT: "application/vnd.ms-powerpoint",
+ FileType.PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+ FileType.RTF: "application/rtf",
+ FileType.THREE_GPP: "video/3gpp",
+ FileType.TXT: "text/plain",
+ FileType.WAV: "audio/wav",
+ FileType.WEBM: "video/webm",
+ FileType.WEBP: "image/webp",
+ FileType.WMV: "video/wmv",
+ FileType.XLS: "application/vnd.ms-excel",
+ FileType.XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ }
+)
+
+"""
+Util Functions
+"""
+
+
+def get_file_extension_from_mime_type(mime_type: str) -> str:
+ for file_type, mime in FILE_MIME_TYPES.items():
+ if mime.lower() == mime_type.lower():
+ return FILE_EXTENSIONS[file_type][0]
+ raise ValueError(f"Unknown extension for mime type: {mime_type}")
+
+
+def get_file_type_from_extension(extension: str) -> FileType:
+ for file_type, extensions in FILE_EXTENSIONS.items():
+ if extension.lower() in extensions:
+ return file_type
+
+ raise ValueError(f"Unknown file type for extension: {extension}")
+
+
+def get_file_extension_for_file_type(file_type: FileType) -> str:
+ return FILE_EXTENSIONS[file_type][0]
+
+
+def get_file_mime_type_for_file_type(file_type: FileType) -> str:
+ return FILE_MIME_TYPES[file_type]
+
+
+def get_file_mime_type_from_extension(extension: str) -> str:
+ file_type = get_file_type_from_extension(extension)
+ return get_file_mime_type_for_file_type(file_type)
+
+
+"""
+FileType Type Groupings (Videos, Images, etc)
+"""
+
+# Images
+IMAGE_FILE_TYPES = {
+ FileType.PNG,
+ FileType.JPEG,
+ FileType.GIF,
+ FileType.WEBP,
+ FileType.HEIC,
+ FileType.HEIF,
+}
+
+
+def is_image_file_type(file_type):
+ return file_type in IMAGE_FILE_TYPES
+
+
+# Videos
+VIDEO_FILE_TYPES = {
+ FileType.MOV,
+ FileType.MP4,
+ FileType.MPEG,
+ FileType.M4V,
+ FileType.FLV,
+ FileType.MPEGPS,
+ FileType.MPG,
+ FileType.WEBM,
+ FileType.WMV,
+ FileType.THREE_GPP,
+}
+
+
+def is_video_file_type(file_type):
+ return file_type in VIDEO_FILE_TYPES
+
+
+# Audio
+AUDIO_FILE_TYPES = {
+ FileType.AAC,
+ FileType.FLAC,
+ FileType.MP3,
+ FileType.MPA,
+ FileType.MPGA,
+ FileType.OPUS,
+ FileType.PCM,
+ FileType.WAV,
+}
+
+
+def is_audio_file_type(file_type):
+ return file_type in AUDIO_FILE_TYPES
+
+
+# Text
+TEXT_FILE_TYPES = {FileType.CSV, FileType.HTML, FileType.RTF, FileType.TXT}
+
+
+def is_text_file_type(file_type):
+ return file_type in TEXT_FILE_TYPES
+
+
+"""
+Other FileType Groupings
+"""
+# Accepted file types for GEMINI 1.5 through Vertex AI
+# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-images-nodejs
+GEMINI_1_5_ACCEPTED_FILE_TYPES: Set[FileType] = {
+ # Image
+ FileType.PNG,
+ FileType.JPEG,
+ FileType.WEBP,
+ # Audio
+ FileType.AAC,
+ FileType.FLAC,
+ FileType.MP3,
+ FileType.MPA,
+ FileType.MPEG,
+ FileType.MPGA,
+ FileType.OPUS,
+ FileType.PCM,
+ FileType.WAV,
+ FileType.WEBM,
+ # Video
+ FileType.FLV,
+ FileType.MOV,
+ FileType.MPEG,
+ FileType.MPEGPS,
+ FileType.MPG,
+ FileType.MP4,
+ FileType.WEBM,
+ FileType.WMV,
+ FileType.THREE_GPP,
+ # PDF
+ FileType.PDF,
+ FileType.TXT,
+}
+
+
+def is_gemini_1_5_accepted_file_type(file_type: FileType) -> bool:
+ return file_type in GEMINI_1_5_ACCEPTED_FILE_TYPES
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py
new file mode 100644
index 00000000..af99d88c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py
@@ -0,0 +1,5 @@
+from openai.types.fine_tuning.fine_tuning_job import Hyperparameters
+
+
+class OpenAIFineTuningHyperparameters(Hyperparameters):
+ model_config = {"extra": "allow"}
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py
new file mode 100644
index 00000000..b7018fe2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py
@@ -0,0 +1,168 @@
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+
+from pydantic import BaseModel, ConfigDict, Field, SecretStr
+from typing_extensions import Required, TypedDict
+
+"""
+Pydantic object defining how to set guardrails on litellm proxy
+
+guardrails:
+ - guardrail_name: "bedrock-pre-guard"
+ litellm_params:
+ guardrail: bedrock # supported values: "aporia", "bedrock", "lakera"
+ mode: "during_call"
+ guardrailIdentifier: ff6ujrregl1q
+ guardrailVersion: "DRAFT"
+ default_on: true
+"""
+
+
+class SupportedGuardrailIntegrations(Enum):
+ APORIA = "aporia"
+ BEDROCK = "bedrock"
+ GURDRAILS_AI = "guardrails_ai"
+ LAKERA = "lakera"
+ PRESIDIO = "presidio"
+ HIDE_SECRETS = "hide-secrets"
+ AIM = "aim"
+
+
+class Role(Enum):
+ SYSTEM = "system"
+ ASSISTANT = "assistant"
+ USER = "user"
+
+
+default_roles = [Role.SYSTEM, Role.ASSISTANT, Role.USER]
+
+
+class GuardrailItemSpec(TypedDict, total=False):
+ callbacks: Required[List[str]]
+ default_on: bool
+ logging_only: Optional[bool]
+ enabled_roles: Optional[List[Role]]
+ callback_args: Dict[str, Dict]
+
+
+class GuardrailItem(BaseModel):
+ callbacks: List[str]
+ default_on: bool
+ logging_only: Optional[bool]
+ guardrail_name: str
+ callback_args: Dict[str, Dict]
+ enabled_roles: Optional[List[Role]]
+
+ model_config = ConfigDict(use_enum_values=True)
+
+ def __init__(
+ self,
+ callbacks: List[str],
+ guardrail_name: str,
+ default_on: bool = False,
+ logging_only: Optional[bool] = None,
+ enabled_roles: Optional[List[Role]] = default_roles,
+ callback_args: Dict[str, Dict] = {},
+ ):
+ super().__init__(
+ callbacks=callbacks,
+ default_on=default_on,
+ logging_only=logging_only,
+ guardrail_name=guardrail_name,
+ enabled_roles=enabled_roles,
+ callback_args=callback_args,
+ )
+
+
+# Define the TypedDicts
+class LakeraCategoryThresholds(TypedDict, total=False):
+ prompt_injection: float
+ jailbreak: float
+
+
+class LitellmParams(TypedDict):
+ guardrail: str
+ mode: str
+ api_key: Optional[str]
+ api_base: Optional[str]
+
+ # Lakera specific params
+ category_thresholds: Optional[LakeraCategoryThresholds]
+
+ # Bedrock specific params
+ guardrailIdentifier: Optional[str]
+ guardrailVersion: Optional[str]
+
+ # Presidio params
+ output_parse_pii: Optional[bool]
+ presidio_ad_hoc_recognizers: Optional[str]
+ mock_redacted_text: Optional[dict]
+
+ # hide secrets params
+ detect_secrets_config: Optional[dict]
+
+ # guardrails ai params
+ guard_name: Optional[str]
+ default_on: Optional[bool]
+
+
+class Guardrail(TypedDict, total=False):
+ guardrail_name: str
+ litellm_params: LitellmParams
+ guardrail_info: Optional[Dict]
+
+
+class guardrailConfig(TypedDict):
+ guardrails: List[Guardrail]
+
+
+class GuardrailEventHooks(str, Enum):
+ pre_call = "pre_call"
+ post_call = "post_call"
+ during_call = "during_call"
+ logging_only = "logging_only"
+
+
+class BedrockTextContent(TypedDict, total=False):
+ text: str
+
+
+class BedrockContentItem(TypedDict, total=False):
+ text: BedrockTextContent
+
+
+class BedrockRequest(TypedDict, total=False):
+ source: Literal["INPUT", "OUTPUT"]
+ content: List[BedrockContentItem]
+
+
+class DynamicGuardrailParams(TypedDict):
+ extra_body: Dict[str, Any]
+
+
+class GuardrailLiteLLMParamsResponse(BaseModel):
+ """The returned LiteLLM Params object for /guardrails/list"""
+
+ guardrail: str
+ mode: Union[str, List[str]]
+ default_on: bool = Field(default=False)
+
+ def __init__(self, **kwargs):
+ default_on = kwargs.get("default_on")
+ if default_on is None:
+ default_on = False
+
+ super().__init__(**kwargs)
+
+
+class GuardrailInfoResponse(BaseModel):
+ guardrail_name: str
+ litellm_params: GuardrailLiteLLMParamsResponse
+ guardrail_info: Optional[Dict]
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+
+class ListGuardrailsResponse(BaseModel):
+ guardrails: List[GuardrailInfoResponse]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py
new file mode 100644
index 00000000..6c0de762
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py
@@ -0,0 +1,21 @@
+import os
+from datetime import datetime as dt
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
+
+
+class ArgillaItem(TypedDict):
+ fields: Dict[str, Any]
+
+
+class ArgillaPayload(TypedDict):
+ items: List[ArgillaItem]
+
+
+class ArgillaCredentialsObject(TypedDict):
+ ARGILLA_API_KEY: str
+ ARGILLA_DATASET_NAME: str
+ ARGILLA_BASE_URL: str
+
+
+SUPPORTED_PAYLOAD_FIELDS = ["messages", "response"]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py
new file mode 100644
index 00000000..e1ec1755
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py
@@ -0,0 +1,15 @@
+from typing import TYPE_CHECKING, Any, Literal, Optional
+
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+ Protocol = Literal["otlp_grpc", "otlp_http"]
+else:
+ Protocol = Any
+
+
+class ArizeConfig(BaseModel):
+ space_key: Optional[str] = None
+ api_key: Optional[str] = None
+ protocol: Protocol
+ endpoint: str
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py
new file mode 100644
index 00000000..4566022d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py
@@ -0,0 +1,9 @@
+from typing import TYPE_CHECKING, Literal, Optional
+
+from pydantic import BaseModel
+from .arize import Protocol
+
+class ArizePhoenixConfig(BaseModel):
+ otlp_auth_headers: Optional[str] = None
+ protocol: Protocol
+ endpoint: str
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py
new file mode 100644
index 00000000..b69529d1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py
@@ -0,0 +1,6 @@
+from typing import Literal, Optional, TypedDict
+
+
+class IntegrationHealthCheckStatus(TypedDict):
+ status: Literal["healthy", "unhealthy"]
+ error_message: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py
new file mode 100644
index 00000000..79d4eded
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py
@@ -0,0 +1,29 @@
+from enum import Enum
+from typing import Optional, TypedDict
+
+
+class DataDogStatus(str, Enum):
+ INFO = "info"
+ WARN = "warning"
+ ERROR = "error"
+
+
+class DatadogPayload(TypedDict, total=False):
+ ddsource: str
+ ddtags: str
+ hostname: str
+ message: str
+ service: str
+ status: str
+
+
+class DD_ERRORS(Enum):
+ DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`"
+
+
+class DatadogProxyFailureHookJsonMessage(TypedDict, total=False):
+ exception: str
+ error_class: str
+ status_code: Optional[int]
+ traceback: str
+ user_api_key_dict: dict
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py
new file mode 100644
index 00000000..9298b157
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py
@@ -0,0 +1,54 @@
+"""
+Payloads for Datadog LLM Observability Service (LLMObs)
+
+API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=example#api-standards
+"""
+
+from typing import Any, Dict, List, Literal, Optional, TypedDict
+
+
+class InputMeta(TypedDict):
+ messages: List[Any]
+
+
+class OutputMeta(TypedDict):
+ messages: List[Any]
+
+
+class Meta(TypedDict):
+ # The span kind: "agent", "workflow", "llm", "tool", "task", "embedding", or "retrieval".
+ kind: Literal["llm", "tool", "task", "embedding", "retrieval"]
+ input: InputMeta # The span’s input information.
+ output: OutputMeta # The span’s output information.
+ metadata: Dict[str, Any]
+
+
+class LLMMetrics(TypedDict, total=False):
+ input_tokens: float
+ output_tokens: float
+ total_tokens: float
+ time_to_first_token: float
+ time_per_output_token: float
+
+
+class LLMObsPayload(TypedDict):
+ parent_id: str
+ trace_id: str
+ span_id: str
+ name: str
+ meta: Meta
+ start_ns: int
+ duration: int
+ metrics: LLMMetrics
+ tags: List
+
+
+class DDSpanAttributes(TypedDict):
+ ml_app: str
+ tags: List[str]
+ spans: List[LLMObsPayload]
+
+
+class DDIntakePayload(TypedDict):
+ type: str
+ attributes: DDSpanAttributes
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py
new file mode 100644
index 00000000..a4fd8a6a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py
@@ -0,0 +1,28 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict
+
+from litellm.types.utils import StandardLoggingPayload
+
+if TYPE_CHECKING:
+ from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+else:
+ VertexBase = Any
+
+
+class GCSLoggingConfig(TypedDict):
+ """
+ Internal LiteLLM Config for GCS Bucket logging
+ """
+
+ bucket_name: str
+ vertex_instance: VertexBase
+ path_service_account: Optional[str]
+
+
+class GCSLogQueueItem(TypedDict):
+ """
+ Internal Type, used for queueing logs to be sent to GCS Bucket
+ """
+
+ payload: StandardLoggingPayload
+ kwargs: Dict[str, Any]
+ response_obj: Optional[Any]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py
new file mode 100644
index 00000000..ecf42d8c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py
@@ -0,0 +1,7 @@
+from typing import Optional, TypedDict
+
+
+class LangfuseLoggingConfig(TypedDict):
+ langfuse_secret: Optional[str]
+ langfuse_public_key: Optional[str]
+ langfuse_host: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py
new file mode 100644
index 00000000..48c8e2e0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py
@@ -0,0 +1,61 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, NamedTuple, Optional, TypedDict
+
+from pydantic import BaseModel
+
+
+class LangsmithInputs(BaseModel):
+ model: Optional[str] = None
+ messages: Optional[List[Any]] = None
+ stream: Optional[bool] = None
+ call_type: Optional[str] = None
+ litellm_call_id: Optional[str] = None
+ completion_start_time: Optional[datetime] = None
+ temperature: Optional[float] = None
+ max_tokens: Optional[int] = None
+ custom_llm_provider: Optional[str] = None
+ input: Optional[List[Any]] = None
+ log_event_type: Optional[str] = None
+ original_response: Optional[Any] = None
+ response_cost: Optional[float] = None
+
+ # LiteLLM Virtual Key specific fields
+ user_api_key: Optional[str] = None
+ user_api_key_user_id: Optional[str] = None
+ user_api_key_team_alias: Optional[str] = None
+
+
+class LangsmithCredentialsObject(TypedDict):
+ LANGSMITH_API_KEY: str
+ LANGSMITH_PROJECT: str
+ LANGSMITH_BASE_URL: str
+
+
+class LangsmithQueueObject(TypedDict):
+ """
+ Langsmith Queue Object - this is what gets stored in the internal system queue before flushing to Langsmith
+
+ We need to store:
+ - data[Dict] - data that should get logged on langsmith
+ - credentials[LangsmithCredentialsObject] - credentials to use for logging to langsmith
+ """
+
+ data: Dict
+ credentials: LangsmithCredentialsObject
+
+
+class CredentialsKey(NamedTuple):
+ """Immutable key for grouping credentials"""
+
+ api_key: str
+ project: str
+ base_url: str
+
+
+@dataclass
+class BatchGroup:
+ """Groups credentials with their associated queue objects"""
+
+ credentials: LangsmithCredentialsObject
+ queue_objects: List[LangsmithQueueObject]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py
new file mode 100644
index 00000000..22fd1665
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py
@@ -0,0 +1,62 @@
+from datetime import datetime
+from typing import List, Literal, Optional, TypedDict, Union
+
+from litellm.types.utils import StandardLoggingUserAPIKeyMetadata
+
+
+class LinkDict(TypedDict, total=False):
+ href: str
+ text: Optional[str]
+
+
+class ImageDict(TypedDict, total=False):
+ src: str
+ href: Optional[str]
+ alt: Optional[str]
+
+
+class PagerDutyPayload(TypedDict, total=False):
+ summary: str
+ timestamp: Optional[str] # ISO 8601 date-time format
+ severity: Literal["critical", "warning", "error", "info"]
+ source: str
+ component: Optional[str]
+ group: Optional[str]
+ class_: Optional[str] # Using class_ since 'class' is a reserved keyword
+ custom_details: Optional[dict]
+
+
+class PagerDutyRequestBody(TypedDict, total=False):
+ payload: PagerDutyPayload
+ routing_key: str
+ event_action: Literal["trigger", "acknowledge", "resolve"]
+ dedup_key: Optional[str]
+ client: Optional[str]
+ client_url: Optional[str]
+ links: Optional[List[LinkDict]]
+ images: Optional[List[ImageDict]]
+
+
+class AlertingConfig(TypedDict, total=False):
+ """
+ Config for alerting thresholds
+ """
+
+ # Requests failing threshold
+ failure_threshold: int # Number of requests failing in a window
+ failure_threshold_window_seconds: int # Window in seconds
+
+ # Requests hanging threshold
+ hanging_threshold_seconds: float # Number of seconds of waiting for a response before a request is considered hanging
+ hanging_threshold_fails: int # Number of requests hanging in a window
+ hanging_threshold_window_seconds: int # Window in seconds
+
+
+class PagerDutyInternalEvent(StandardLoggingUserAPIKeyMetadata, total=False):
+ """Simple structure to hold timestamp and error info."""
+
+ failure_event_type: Literal["failed_response", "hanging_response"]
+ timestamp: datetime
+ error_class: Optional[str]
+ error_code: Optional[str]
+ error_llm_provider: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py
new file mode 100644
index 00000000..8fdcce4c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py
@@ -0,0 +1,294 @@
+from enum import Enum
+from typing import Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+import litellm
+
+REQUESTED_MODEL = "requested_model"
+EXCEPTION_STATUS = "exception_status"
+EXCEPTION_CLASS = "exception_class"
+STATUS_CODE = "status_code"
+EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
+LATENCY_BUCKETS = (
+ 0.005,
+ 0.00625,
+ 0.0125,
+ 0.025,
+ 0.05,
+ 0.1,
+ 0.5,
+ 1.0,
+ 1.5,
+ 2.0,
+ 2.5,
+ 3.0,
+ 3.5,
+ 4.0,
+ 4.5,
+ 5.0,
+ 5.5,
+ 6.0,
+ 6.5,
+ 7.0,
+ 7.5,
+ 8.0,
+ 8.5,
+ 9.0,
+ 9.5,
+ 10.0,
+ 15.0,
+ 20.0,
+ 25.0,
+ 30.0,
+ 60.0,
+ 120.0,
+ 180.0,
+ 240.0,
+ 300.0,
+ float("inf"),
+)
+
+
+class UserAPIKeyLabelNames(Enum):
+ END_USER = "end_user"
+ USER = "user"
+ USER_EMAIL = "user_email"
+ API_KEY_HASH = "hashed_api_key"
+ API_KEY_ALIAS = "api_key_alias"
+ TEAM = "team"
+ TEAM_ALIAS = "team_alias"
+ REQUESTED_MODEL = REQUESTED_MODEL
+ v1_LITELLM_MODEL_NAME = "model"
+ v2_LITELLM_MODEL_NAME = "litellm_model_name"
+ TAG = "tag"
+ MODEL_ID = "model_id"
+ API_BASE = "api_base"
+ API_PROVIDER = "api_provider"
+ EXCEPTION_STATUS = EXCEPTION_STATUS
+ EXCEPTION_CLASS = EXCEPTION_CLASS
+ STATUS_CODE = "status_code"
+ FALLBACK_MODEL = "fallback_model"
+
+
+DEFINED_PROMETHEUS_METRICS = Literal[
+ "litellm_llm_api_latency_metric",
+ "litellm_request_total_latency_metric",
+ "litellm_proxy_total_requests_metric",
+ "litellm_proxy_failed_requests_metric",
+ "litellm_deployment_latency_per_output_token",
+ "litellm_requests_metric",
+ "litellm_input_tokens_metric",
+ "litellm_output_tokens_metric",
+ "litellm_deployment_successful_fallbacks",
+ "litellm_deployment_failed_fallbacks",
+ "litellm_remaining_team_budget_metric",
+ "litellm_team_max_budget_metric",
+ "litellm_team_budget_remaining_hours_metric",
+ "litellm_remaining_api_key_budget_metric",
+ "litellm_api_key_max_budget_metric",
+ "litellm_api_key_budget_remaining_hours_metric",
+]
+
+
+class PrometheusMetricLabels:
+ litellm_llm_api_latency_metric = [
+ UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.USER.value,
+ ]
+
+ litellm_request_total_latency_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+ ]
+
+ litellm_proxy_total_requests_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.STATUS_CODE.value,
+ UserAPIKeyLabelNames.USER_EMAIL.value,
+ ]
+
+ litellm_proxy_failed_requests_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.EXCEPTION_STATUS.value,
+ UserAPIKeyLabelNames.EXCEPTION_CLASS.value,
+ ]
+
+ litellm_deployment_latency_per_output_token = [
+ UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value,
+ UserAPIKeyLabelNames.MODEL_ID.value,
+ UserAPIKeyLabelNames.API_BASE.value,
+ UserAPIKeyLabelNames.API_PROVIDER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ ]
+
+ litellm_requests_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.USER_EMAIL.value,
+ ]
+
+ litellm_input_tokens_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ ]
+
+ litellm_output_tokens_metric = [
+ UserAPIKeyLabelNames.END_USER.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.USER.value,
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ ]
+
+ litellm_deployment_successful_fallbacks = [
+ UserAPIKeyLabelNames.REQUESTED_MODEL.value,
+ UserAPIKeyLabelNames.FALLBACK_MODEL.value,
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ UserAPIKeyLabelNames.EXCEPTION_STATUS.value,
+ UserAPIKeyLabelNames.EXCEPTION_CLASS.value,
+ ]
+
+ litellm_deployment_failed_fallbacks = litellm_deployment_successful_fallbacks
+
+ litellm_remaining_team_budget_metric = [
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ ]
+
+ litellm_team_max_budget_metric = [
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ ]
+
+ litellm_team_budget_remaining_hours_metric = [
+ UserAPIKeyLabelNames.TEAM.value,
+ UserAPIKeyLabelNames.TEAM_ALIAS.value,
+ ]
+
+ litellm_remaining_api_key_budget_metric = [
+ UserAPIKeyLabelNames.API_KEY_HASH.value,
+ UserAPIKeyLabelNames.API_KEY_ALIAS.value,
+ ]
+
+ litellm_api_key_max_budget_metric = litellm_remaining_api_key_budget_metric
+
+ litellm_api_key_budget_remaining_hours_metric = (
+ litellm_remaining_api_key_budget_metric
+ )
+
+ @staticmethod
+ def get_labels(label_name: DEFINED_PROMETHEUS_METRICS) -> List[str]:
+ default_labels = getattr(PrometheusMetricLabels, label_name)
+ return default_labels + [
+ metric.replace(".", "_")
+ for metric in litellm.custom_prometheus_metadata_labels
+ ]
+
+
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class UserAPIKeyLabelValues(BaseModel):
+ end_user: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.END_USER.value)
+ ] = None
+ user: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER.value)
+ ] = None
+ user_email: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER_EMAIL.value)
+ ] = None
+ hashed_api_key: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_HASH.value)
+ ] = None
+ api_key_alias: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_ALIAS.value)
+ ] = None
+ team: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM.value)
+ ] = None
+ team_alias: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM_ALIAS.value)
+ ] = None
+ requested_model: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.REQUESTED_MODEL.value)
+ ] = None
+ model: Annotated[
+ Optional[str],
+ Field(..., alias=UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value),
+ ] = None
+ litellm_model_name: Annotated[
+ Optional[str],
+ Field(..., alias=UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value),
+ ] = None
+ tags: List[str] = []
+ custom_metadata_labels: Dict[str, str] = {}
+ model_id: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.MODEL_ID.value)
+ ] = None
+ api_base: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_BASE.value)
+ ] = None
+ api_provider: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_PROVIDER.value)
+ ] = None
+ exception_status: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_STATUS.value)
+ ] = None
+ exception_class: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_CLASS.value)
+ ] = None
+ status_code: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.STATUS_CODE.value)
+ ] = None
+ fallback_model: Annotated[
+ Optional[str], Field(..., alias=UserAPIKeyLabelNames.FALLBACK_MODEL.value)
+ ] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py
new file mode 100644
index 00000000..9019b098
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py
@@ -0,0 +1,186 @@
+import os
+from datetime import datetime as dt
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
+
+from pydantic import BaseModel, Field
+
+from litellm.types.utils import LiteLLMPydanticObjectBase
+
+
+class BaseOutageModel(TypedDict):
+ alerts: List[int]
+ minor_alert_sent: bool
+ major_alert_sent: bool
+ last_updated_at: float
+
+
+class OutageModel(BaseOutageModel):
+ model_id: str
+
+
+class ProviderRegionOutageModel(BaseOutageModel):
+ provider_region_id: str
+ deployment_ids: Set[str]
+
+
+# we use this for the email header, please send a test email if you change this. verify it looks good on email
+LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
+LITELLM_SUPPORT_CONTACT = "support@berri.ai"
+
+
+class SlackAlertingArgsEnum(Enum):
+ daily_report_frequency = 12 * 60 * 60
+ report_check_interval = 5 * 60
+ budget_alert_ttl = 24 * 60 * 60
+ outage_alert_ttl = 1 * 60
+ region_outage_alert_ttl = 1 * 60
+ minor_outage_alert_threshold = 1 * 5
+ major_outage_alert_threshold = 1 * 10
+ max_outage_alert_list_size = 1 * 10
+
+
+class SlackAlertingArgs(LiteLLMPydanticObjectBase):
+ daily_report_frequency: int = Field(
+ default=int(
+ os.getenv(
+ "SLACK_DAILY_REPORT_FREQUENCY",
+ int(SlackAlertingArgsEnum.daily_report_frequency.value),
+ )
+ ),
+ description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.",
+ )
+ report_check_interval: int = Field(
+ default=SlackAlertingArgsEnum.report_check_interval.value,
+ description="Frequency of checking cache if report should be sent. Background process. Default is once per hour. Value is in seconds.",
+ ) # 5 minutes
+ budget_alert_ttl: int = Field(
+ default=SlackAlertingArgsEnum.budget_alert_ttl.value,
+ description="Cache ttl for budgets alerts. Prevents spamming same alert, each time budget is crossed. Value is in seconds.",
+ ) # 24 hours
+ outage_alert_ttl: int = Field(
+ default=SlackAlertingArgsEnum.outage_alert_ttl.value,
+ description="Cache ttl for model outage alerts. Sets time-window for errors. Default is 1 minute. Value is in seconds.",
+ ) # 1 minute ttl
+ region_outage_alert_ttl: int = Field(
+ default=SlackAlertingArgsEnum.region_outage_alert_ttl.value,
+ description="Cache ttl for provider-region based outage alerts. Alert sent if 2+ models in same region report errors. Sets time-window for errors. Default is 1 minute. Value is in seconds.",
+ ) # 1 minute ttl
+ minor_outage_alert_threshold: int = Field(
+ default=SlackAlertingArgsEnum.minor_outage_alert_threshold.value,
+ description="The number of errors that count as a model/region minor outage. ('400' error code is not counted).",
+ )
+ major_outage_alert_threshold: int = Field(
+ default=SlackAlertingArgsEnum.major_outage_alert_threshold.value,
+ description="The number of errors that countas a model/region major outage. ('400' error code is not counted).",
+ )
+ max_outage_alert_list_size: int = Field(
+ default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
+ description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
+ ) # prevent memory leak
+ log_to_console: bool = Field(
+ default=False,
+ description="If true, the alerting payload will be printed to the console.",
+ )
+
+
+class DeploymentMetrics(LiteLLMPydanticObjectBase):
+ """
+ Metrics per deployment, stored in cache
+
+ Used for daily reporting
+ """
+
+ id: str
+ """id of deployment in router model list"""
+
+ failed_request: bool
+ """did it fail the request?"""
+
+ latency_per_output_token: Optional[float]
+ """latency/output token of deployment"""
+
+ updated_at: dt
+ """Current time of deployment being updated"""
+
+
+class SlackAlertingCacheKeys(Enum):
+ """
+ Enum for deployment daily metrics keys - {deployment_id}:{enum}
+ """
+
+ failed_requests_key = "failed_requests_daily_metrics"
+ latency_key = "latency_daily_metrics"
+ report_sent_key = "daily_metrics_report_sent"
+
+
+class AlertType(str, Enum):
+ """
+ Enum for alert types and management event types
+ """
+
+ # LLM-related alerts
+ llm_exceptions = "llm_exceptions"
+ llm_too_slow = "llm_too_slow"
+ llm_requests_hanging = "llm_requests_hanging"
+
+ # Budget and spend alerts
+ budget_alerts = "budget_alerts"
+ spend_reports = "spend_reports"
+ failed_tracking_spend = "failed_tracking_spend"
+
+ # Database alerts
+ db_exceptions = "db_exceptions"
+
+ # Report alerts
+ daily_reports = "daily_reports"
+
+ # Deployment alerts
+ cooldown_deployment = "cooldown_deployment"
+ new_model_added = "new_model_added"
+
+ # Outage alerts
+ outage_alerts = "outage_alerts"
+ region_outage_alerts = "region_outage_alerts"
+
+ # Fallback alerts
+ fallback_reports = "fallback_reports"
+
+ # Virtual Key Events
+ new_virtual_key_created = "new_virtual_key_created"
+ virtual_key_updated = "virtual_key_updated"
+ virtual_key_deleted = "virtual_key_deleted"
+
+ # Team Events
+ new_team_created = "new_team_created"
+ team_updated = "team_updated"
+ team_deleted = "team_deleted"
+
+ # Internal User Events
+ new_internal_user_created = "new_internal_user_created"
+ internal_user_updated = "internal_user_updated"
+ internal_user_deleted = "internal_user_deleted"
+
+
+DEFAULT_ALERT_TYPES: List[AlertType] = [
+ # LLM related alerts
+ AlertType.llm_exceptions,
+ AlertType.llm_too_slow,
+ AlertType.llm_requests_hanging,
+ # Budget and spend alerts
+ AlertType.budget_alerts,
+ AlertType.spend_reports,
+ AlertType.failed_tracking_spend,
+ # Database alerts
+ AlertType.db_exceptions,
+ # Report alerts
+ AlertType.daily_reports,
+ # Deployment alerts
+ AlertType.cooldown_deployment,
+ AlertType.new_model_added,
+ # Outage alerts
+ AlertType.outage_alerts,
+ AlertType.region_outage_alerts,
+ # Fallback alerts
+ AlertType.fallback_reports,
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py
new file mode 100644
index 00000000..367b2421
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py
@@ -0,0 +1,366 @@
+from typing import Any, Dict, Iterable, List, Optional, Union
+
+from pydantic import BaseModel, validator
+from typing_extensions import Literal, Required, TypedDict
+
+from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock
+
+
+class AnthropicMessagesToolChoice(TypedDict, total=False):
+ type: Required[Literal["auto", "any", "tool"]]
+ name: str
+ disable_parallel_tool_use: bool # default is false
+
+
+class AnthropicInputSchema(TypedDict, total=False):
+ type: Optional[str]
+ properties: Optional[dict]
+ additionalProperties: Optional[bool]
+
+
+class AnthropicMessagesTool(TypedDict, total=False):
+ name: Required[str]
+ description: str
+ input_schema: Optional[AnthropicInputSchema]
+ type: Literal["custom"]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class AnthropicComputerTool(TypedDict, total=False):
+ display_width_px: Required[int]
+ display_height_px: Required[int]
+ display_number: int
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+ type: Required[str]
+ name: Required[str]
+
+
+class AnthropicHostedTools(TypedDict, total=False): # for bash_tool and text_editor
+ type: Required[str]
+ name: Required[str]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AllAnthropicToolsValues = Union[
+ AnthropicComputerTool, AnthropicHostedTools, AnthropicMessagesTool
+]
+
+
+class AnthropicMessagesTextParam(TypedDict, total=False):
+ type: Required[Literal["text"]]
+ text: Required[str]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class AnthropicMessagesToolUseParam(TypedDict):
+ type: Required[Literal["tool_use"]]
+ id: str
+ name: str
+ input: dict
+
+
+AnthropicMessagesAssistantMessageValues = Union[
+ AnthropicMessagesTextParam,
+ AnthropicMessagesToolUseParam,
+ ChatCompletionThinkingBlock,
+]
+
+
+class AnthopicMessagesAssistantMessageParam(TypedDict, total=False):
+ content: Required[Union[str, Iterable[AnthropicMessagesAssistantMessageValues]]]
+ """The contents of the system message."""
+
+ role: Required[Literal["assistant"]]
+ """The role of the messages author, in this case `author`."""
+
+ name: str
+ """An optional name for the participant.
+
+ Provides the model information to differentiate between participants of the same
+ role.
+ """
+
+
+class AnthropicContentParamSource(TypedDict):
+ type: Literal["base64"]
+ media_type: str
+ data: str
+
+
+class AnthropicMessagesImageParam(TypedDict, total=False):
+ type: Required[Literal["image"]]
+ source: Required[AnthropicContentParamSource]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class CitationsObject(TypedDict):
+ enabled: bool
+
+
+class AnthropicMessagesDocumentParam(TypedDict, total=False):
+ type: Required[Literal["document"]]
+ source: Required[AnthropicContentParamSource]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+ title: str
+ context: str
+ citations: Optional[CitationsObject]
+
+
+class AnthropicMessagesToolResultContent(TypedDict):
+ type: Literal["text"]
+ text: str
+
+
+class AnthropicMessagesToolResultParam(TypedDict, total=False):
+ type: Required[Literal["tool_result"]]
+ tool_use_id: Required[str]
+ is_error: bool
+ content: Union[
+ str,
+ Iterable[
+ Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]
+ ],
+ ]
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AnthropicMessagesUserMessageValues = Union[
+ AnthropicMessagesTextParam,
+ AnthropicMessagesImageParam,
+ AnthropicMessagesToolResultParam,
+ AnthropicMessagesDocumentParam,
+]
+
+
+class AnthropicMessagesUserMessageParam(TypedDict, total=False):
+ role: Required[Literal["user"]]
+ content: Required[Union[str, Iterable[AnthropicMessagesUserMessageValues]]]
+
+
+class AnthropicMetadata(TypedDict, total=False):
+ user_id: str
+
+
+class AnthropicSystemMessageContent(TypedDict, total=False):
+ type: str
+ text: str
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+AllAnthropicMessageValues = Union[
+ AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam
+]
+
+
+class AnthropicMessageRequestBase(TypedDict, total=False):
+ messages: Required[List[AllAnthropicMessageValues]]
+ max_tokens: Required[int]
+ metadata: AnthropicMetadata
+ stop_sequences: List[str]
+ stream: bool
+ system: Union[str, List]
+ temperature: float
+ tool_choice: AnthropicMessagesToolChoice
+ tools: List[AllAnthropicToolsValues]
+ top_k: int
+ top_p: float
+
+
+class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False):
+ model: Required[str]
+ # litellm param - used for tracking litellm proxy metadata in the request
+ litellm_metadata: dict
+
+
+class ContentTextBlockDelta(TypedDict):
+ """
+ 'delta': {'type': 'text_delta', 'text': 'Hello'}
+ """
+
+ type: str
+ text: str
+
+
+class ContentCitationsBlockDelta(TypedDict):
+ type: Literal["citations"]
+ citation: dict
+
+
+class ContentJsonBlockDelta(TypedDict):
+ """
+ "delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}}
+ """
+
+ type: str
+ partial_json: str
+
+
+class ContentBlockDelta(TypedDict):
+ type: Literal["content_block_delta"]
+ index: int
+ delta: Union[
+ ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta
+ ]
+
+
+class ContentBlockStop(TypedDict):
+ type: Literal["content_block_stop"]
+ index: int
+
+
+class ToolUseBlock(TypedDict):
+ """
+ "content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}
+ """
+
+ id: str
+
+ input: dict
+
+ name: str
+
+ type: Literal["tool_use"]
+
+
+class TextBlock(TypedDict):
+ text: str
+
+ type: Literal["text"]
+
+
+class ContentBlockStart(TypedDict):
+ """
+ event: content_block_start
+ data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
+ """
+
+ type: str
+ index: int
+ content_block: Union[ToolUseBlock, TextBlock]
+
+
+class MessageDelta(TypedDict, total=False):
+ stop_reason: Optional[str]
+
+
+class UsageDelta(TypedDict, total=False):
+ input_tokens: int
+ output_tokens: int
+
+
+class MessageBlockDelta(TypedDict):
+ """
+ Anthropic
+ chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
+ """
+
+ type: Literal["message_delta"]
+ delta: MessageDelta
+ usage: UsageDelta
+
+
+class MessageChunk(TypedDict, total=False):
+ id: str
+ type: str
+ role: str
+ model: str
+ content: List
+ stop_reason: Optional[str]
+ stop_sequence: Optional[str]
+ usage: UsageDelta
+
+
+class MessageStartBlock(TypedDict):
+ """
+ Anthropic
+ chunk = {
+ "type": "message_start",
+ "message": {
+ "id": "msg_vrtx_011PqREFEMzd3REdCoUFAmdG",
+ "type": "message",
+ "role": "assistant",
+ "model": "claude-3-sonnet-20240229",
+ "content": [],
+ "stop_reason": null,
+ "stop_sequence": null,
+ "usage": {
+ "input_tokens": 270,
+ "output_tokens": 1
+ }
+ }
+ }
+ """
+
+ type: Literal["message_start"]
+ message: MessageChunk
+
+
+class AnthropicResponseContentBlockText(BaseModel):
+ type: Literal["text"]
+ text: str
+
+
+class AnthropicResponseContentBlockToolUse(BaseModel):
+ type: Literal["tool_use"]
+ id: str
+ name: str
+ input: dict
+
+
+class AnthropicResponseUsageBlock(BaseModel):
+ input_tokens: int
+ output_tokens: int
+
+
+AnthropicFinishReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]
+
+
+class AnthropicResponse(BaseModel):
+ id: str
+ """Unique object identifier."""
+
+ type: Literal["message"]
+ """For Messages, this is always "message"."""
+
+ role: Literal["assistant"]
+ """Conversational role of the generated message. This will always be "assistant"."""
+
+ content: List[
+ Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
+ ]
+ """Content generated by the model."""
+
+ model: str
+ """The model that handled the request."""
+
+ stop_reason: Optional[AnthropicFinishReason]
+ """The reason that we stopped."""
+
+ stop_sequence: Optional[str]
+ """Which custom stop sequence was generated, if any."""
+
+ usage: AnthropicResponseUsageBlock
+ """Billing and rate-limit usage."""
+
+
+from .openai import ChatCompletionUsageBlock
+
+
+class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False):
+ cache_creation_input_tokens: int
+ cache_read_input_tokens: int
+
+
+ANTHROPIC_API_HEADERS = {
+ "anthropic-version",
+ "anthropic-beta",
+}
+
+ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock
+ "anthropic-beta",
+}
+
+
+class AnthropicThinkingParam(TypedDict, total=False):
+ type: Literal["enabled"]
+ budget_tokens: int
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py
new file mode 100644
index 00000000..2d597aef
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py
@@ -0,0 +1,17 @@
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union
+
+from typing_extensions import Required, TypedDict
+
+
+class ImageEmbeddingInput(TypedDict, total=False):
+ image: Required[str]
+ text: str
+
+
+EncodingFormat = Literal["base64", "binary", "float", "int8", "ubinary", "uint8"]
+
+
+class ImageEmbeddingRequest(TypedDict, total=False):
+ input: Required[List[ImageEmbeddingInput]]
+ dimensions: int
+ encoding_format: EncodingFormat
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py
new file mode 100644
index 00000000..57fb04c8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py
@@ -0,0 +1,503 @@
+import json
+from typing import Any, List, Literal, Optional, TypedDict, Union
+
+from typing_extensions import (
+ TYPE_CHECKING,
+ Protocol,
+ Required,
+ Self,
+ TypeGuard,
+ get_origin,
+ override,
+ runtime_checkable,
+)
+
+from .openai import ChatCompletionToolCallChunk
+
+
+class CachePointBlock(TypedDict, total=False):
+ type: Literal["default"]
+
+
+class SystemContentBlock(TypedDict, total=False):
+ text: str
+ cachePoint: CachePointBlock
+
+
+class SourceBlock(TypedDict):
+ bytes: Optional[str] # base 64 encoded string
+
+
+BedrockImageTypes = Literal["png", "jpeg", "gif", "webp"]
+
+
+class ImageBlock(TypedDict):
+ format: Union[BedrockImageTypes, str]
+ source: SourceBlock
+
+
+BedrockDocumentTypes = Literal[
+ "pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"
+]
+
+
+class DocumentBlock(TypedDict):
+ format: Union[BedrockDocumentTypes, str]
+ source: SourceBlock
+ name: str
+
+
+class ToolResultContentBlock(TypedDict, total=False):
+ image: ImageBlock
+ document: DocumentBlock
+ json: dict
+ text: str
+
+
+class ToolResultBlock(TypedDict, total=False):
+ content: Required[List[ToolResultContentBlock]]
+ toolUseId: Required[str]
+ status: Literal["success", "error"]
+
+
+class ToolUseBlock(TypedDict):
+ input: dict
+ name: str
+ toolUseId: str
+
+
+class BedrockConverseReasoningTextBlock(TypedDict, total=False):
+ text: Required[str]
+ signature: str
+
+
+class BedrockConverseReasoningContentBlock(TypedDict, total=False):
+ reasoningText: BedrockConverseReasoningTextBlock
+ redactedContent: str
+
+
+class BedrockConverseReasoningContentBlockDelta(TypedDict, total=False):
+ signature: str
+ redactedContent: str
+ text: str
+
+
+class ContentBlock(TypedDict, total=False):
+ text: str
+ image: ImageBlock
+ document: DocumentBlock
+ toolResult: ToolResultBlock
+ toolUse: ToolUseBlock
+ cachePoint: CachePointBlock
+ reasoningContent: BedrockConverseReasoningContentBlock
+
+
+class MessageBlock(TypedDict):
+ content: List[ContentBlock]
+ role: Literal["user", "assistant"]
+
+
+class ConverseMetricsBlock(TypedDict):
+ latencyMs: float # time in ms
+
+
+class ConverseResponseOutputBlock(TypedDict):
+ message: Optional[MessageBlock]
+
+
+class ConverseTokenUsageBlock(TypedDict):
+ inputTokens: int
+ outputTokens: int
+ totalTokens: int
+ cacheReadInputTokenCount: int
+ cacheReadInputTokens: int
+ cacheWriteInputTokenCount: int
+ cacheWriteInputTokens: int
+
+
+class ConverseResponseBlock(TypedDict):
+ additionalModelResponseFields: dict
+ metrics: ConverseMetricsBlock
+ output: ConverseResponseOutputBlock
+ stopReason: (
+ str # end_turn | tool_use | max_tokens | stop_sequence | content_filtered
+ )
+ usage: ConverseTokenUsageBlock
+
+
+class ToolInputSchemaBlock(TypedDict):
+ json: Optional[dict]
+
+
+class ToolSpecBlock(TypedDict, total=False):
+ inputSchema: Required[ToolInputSchemaBlock]
+ name: Required[str]
+ description: str
+
+
+class ToolBlock(TypedDict):
+ toolSpec: Optional[ToolSpecBlock]
+
+
+class SpecificToolChoiceBlock(TypedDict):
+ name: str
+
+
+class ToolChoiceValuesBlock(TypedDict, total=False):
+ any: dict
+ auto: dict
+ tool: SpecificToolChoiceBlock
+
+
+class ToolConfigBlock(TypedDict, total=False):
+ tools: Required[List[ToolBlock]]
+ toolChoice: Union[str, ToolChoiceValuesBlock]
+
+
+class GuardrailConfigBlock(TypedDict, total=False):
+ guardrailIdentifier: str
+ guardrailVersion: str
+ trace: Literal["enabled", "disabled"]
+
+
+class InferenceConfig(TypedDict, total=False):
+ maxTokens: int
+ stopSequences: List[str]
+ temperature: float
+ topP: float
+ topK: int
+
+
+class ToolBlockDeltaEvent(TypedDict):
+ input: str
+
+
+class ToolUseBlockStartEvent(TypedDict):
+ name: str
+ toolUseId: str
+
+
+class ContentBlockStartEvent(TypedDict, total=False):
+ toolUse: Optional[ToolUseBlockStartEvent]
+
+
+class ContentBlockDeltaEvent(TypedDict, total=False):
+ """
+ Either 'text' or 'toolUse' will be specified for Converse API streaming response.
+ """
+
+ text: str
+ toolUse: ToolBlockDeltaEvent
+ reasoningContent: BedrockConverseReasoningContentBlockDelta
+
+
+class CommonRequestObject(
+ TypedDict, total=False
+): # common request object across sync + async flows
+ additionalModelRequestFields: dict
+ additionalModelResponseFieldPaths: List[str]
+ inferenceConfig: InferenceConfig
+ system: List[SystemContentBlock]
+ toolConfig: ToolConfigBlock
+ guardrailConfig: Optional[GuardrailConfigBlock]
+
+
+class RequestObject(CommonRequestObject, total=False):
+ messages: Required[List[MessageBlock]]
+
+
+class BedrockInvokeNovaRequest(TypedDict, total=False):
+ """
+ Request object for sending `nova` requests to `/bedrock/invoke/`
+ """
+
+ messages: List[MessageBlock]
+ inferenceConfig: InferenceConfig
+ system: List[SystemContentBlock]
+ toolConfig: ToolConfigBlock
+ guardrailConfig: Optional[GuardrailConfigBlock]
+
+
+class GenericStreamingChunk(TypedDict):
+ text: Required[str]
+ tool_use: Optional[ChatCompletionToolCallChunk]
+ is_finished: Required[bool]
+ finish_reason: Required[str]
+ usage: Optional[ConverseTokenUsageBlock]
+ index: int
+
+
+class Document(TypedDict):
+ title: str
+ snippet: str
+
+
+class ServerSentEvent:
+ def __init__(
+ self,
+ *,
+ event: Optional[str] = None,
+ data: Optional[str] = None,
+ id: Optional[str] = None,
+ retry: Optional[int] = None,
+ ) -> None:
+ if data is None:
+ data = ""
+
+ self._id = id
+ self._data = data
+ self._event = event or None
+ self._retry = retry
+
+ @property
+ def event(self) -> Optional[str]:
+ return self._event
+
+ @property
+ def id(self) -> Optional[str]:
+ return self._id
+
+ @property
+ def retry(self) -> Optional[int]:
+ return self._retry
+
+ @property
+ def data(self) -> str:
+ return self._data
+
+ def json(self) -> Any:
+ return json.loads(self.data)
+
+ @override
+ def __repr__(self) -> str:
+ return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
+
+
+COHERE_EMBEDDING_INPUT_TYPES = Literal[
+ "search_document", "search_query", "classification", "clustering", "image"
+]
+
+
+class CohereEmbeddingRequest(TypedDict, total=False):
+ texts: List[str]
+ images: List[str]
+ input_type: Required[COHERE_EMBEDDING_INPUT_TYPES]
+ truncate: Literal["NONE", "START", "END"]
+ embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"]
+
+
+class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest):
+ model: Required[str]
+
+
+class CohereEmbeddingResponse(TypedDict):
+ embeddings: List[List[float]]
+ id: str
+ response_type: Literal["embedding_floats"]
+ texts: List[str]
+
+
+class AmazonTitanV2EmbeddingRequest(TypedDict):
+ inputText: str
+ dimensions: int
+ normalize: bool
+
+
+class AmazonTitanV2EmbeddingResponse(TypedDict):
+ embedding: List[float]
+ inputTextTokenCount: int
+
+
+class AmazonTitanG1EmbeddingRequest(TypedDict):
+ inputText: str
+
+
+class AmazonTitanG1EmbeddingResponse(TypedDict):
+ embedding: List[float]
+ inputTextTokenCount: int
+
+
+class AmazonTitanMultimodalEmbeddingConfig(TypedDict):
+ outputEmbeddingLength: Literal[256, 384, 1024]
+
+
+class AmazonTitanMultimodalEmbeddingRequest(TypedDict, total=False):
+ inputText: str
+ inputImage: str
+ embeddingConfig: AmazonTitanMultimodalEmbeddingConfig
+
+
+class AmazonTitanMultimodalEmbeddingResponse(TypedDict):
+ embedding: List[float]
+ inputTextTokenCount: int
+ message: str # Specifies any errors that occur during generation.
+
+
+AmazonEmbeddingRequest = Union[
+ AmazonTitanMultimodalEmbeddingRequest,
+ AmazonTitanV2EmbeddingRequest,
+ AmazonTitanG1EmbeddingRequest,
+]
+
+
+class AmazonStability3TextToImageRequest(TypedDict, total=False):
+ """
+ Request for Amazon Stability 3 Text to Image API
+
+ Ref here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html
+ """
+
+ prompt: str
+ aspect_ratio: Literal[
+ "16:9", "1:1", "21:9", "2:3", "3:2", "4:5", "5:4", "9:16", "9:21"
+ ]
+ mode: Literal["image-to-image", "text-to-image"]
+ output_format: Literal["JPEG", "PNG"]
+ seed: int
+ negative_prompt: str
+
+
+class AmazonStability3TextToImageResponse(TypedDict, total=False):
+ """
+ Response for Amazon Stability 3 Text to Image API
+
+ Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html
+ """
+
+ images: List[str]
+ seeds: List[str]
+ finish_reasons: List[str]
+
+
+class AmazonNovaCanvasRequestBase(TypedDict, total=False):
+ """
+ Base class for Amazon Nova Canvas API requests
+ """
+
+ pass
+
+
+class AmazonNovaCanvasImageGenerationConfig(TypedDict, total=False):
+ """
+ Config for Amazon Nova Canvas Text to Image API
+
+ Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+ """
+
+ cfgScale: int
+ seed: int
+ quality: Literal["standard", "premium"]
+ width: int
+ height: int
+ numberOfImages: int
+
+
+class AmazonNovaCanvasTextToImageParams(TypedDict, total=False):
+ """
+ Params for Amazon Nova Canvas Text to Image API
+ """
+
+ text: str
+ negativeText: str
+ controlStrength: float
+ controlMode: Literal["CANNY_EDIT", "SEGMENTATION"]
+ conditionImage: str
+
+
+class AmazonNovaCanvasTextToImageRequest(
+ AmazonNovaCanvasRequestBase, TypedDict, total=False
+):
+ """
+ Request for Amazon Nova Canvas Text to Image API
+
+ Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+ """
+
+ textToImageParams: AmazonNovaCanvasTextToImageParams
+ taskType: Literal["TEXT_IMAGE"]
+ imageGenerationConfig: AmazonNovaCanvasImageGenerationConfig
+
+
+class AmazonNovaCanvasTextToImageResponse(TypedDict, total=False):
+ """
+ Response for Amazon Nova Canvas Text to Image API
+
+ Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
+ """
+
+ images: List[str]
+
+
+if TYPE_CHECKING:
+ from botocore.awsrequest import AWSPreparedRequest
+else:
+ AWSPreparedRequest = Any
+
+from pydantic import BaseModel
+
+
+class BedrockPreparedRequest(TypedDict):
+ """
+ Internal/Helper class for preparing the request for bedrock image generation
+ """
+
+ endpoint_url: str
+ prepped: AWSPreparedRequest
+ body: bytes
+ data: dict
+
+
+class BedrockRerankTextQuery(TypedDict):
+ text: str
+
+
+class BedrockRerankQuery(TypedDict):
+ textQuery: BedrockRerankTextQuery
+ type: Literal["TEXT"]
+
+
+class BedrockRerankModelConfiguration(TypedDict, total=False):
+ modelArn: Required[str]
+ modelConfiguration: dict
+
+
+class BedrockRerankBedrockRerankingConfiguration(TypedDict):
+ modelConfiguration: BedrockRerankModelConfiguration
+ numberOfResults: int
+
+
+class BedrockRerankConfiguration(TypedDict):
+ bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration
+ type: Literal["BEDROCK_RERANKING_MODEL"]
+
+
+class BedrockRerankTextDocument(TypedDict, total=False):
+ text: str
+
+
+class BedrockRerankInlineDocumentSource(TypedDict, total=False):
+ jsonDocument: dict
+ textDocument: BedrockRerankTextDocument
+ type: Literal["TEXT", "JSON"]
+
+
+class BedrockRerankSource(TypedDict):
+ inlineDocumentSource: BedrockRerankInlineDocumentSource
+ type: Literal["INLINE"]
+
+
+class BedrockRerankRequest(TypedDict):
+ """
+ Request for Bedrock Rerank API
+ """
+
+ queries: List[BedrockRerankQuery]
+ rerankingConfiguration: BedrockRerankConfiguration
+ sources: List[BedrockRerankSource]
+
+
+class AmazonDeepSeekR1StreamingResponse(TypedDict):
+ generation: str
+ generation_token_count: int
+ stop_reason: Optional[str]
+ prompt_token_count: int
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py
new file mode 100644
index 00000000..7112a242
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py
@@ -0,0 +1,46 @@
+from typing import Iterable, List, Optional, Union
+
+from typing_extensions import Literal, Required, TypedDict
+
+
+class CallObject(TypedDict):
+ name: str
+ parameters: dict
+
+
+class ToolResultObject(TypedDict):
+ call: CallObject
+ outputs: List[dict]
+
+
+class ChatHistoryToolResult(TypedDict, total=False):
+ role: Required[Literal["TOOL"]]
+ tool_results: List[ToolResultObject]
+
+
+class ToolCallObject(TypedDict):
+ name: str
+ parameters: dict
+
+
+class ChatHistoryUser(TypedDict, total=False):
+ role: Required[Literal["USER"]]
+ message: str
+ tool_calls: List[ToolCallObject]
+
+
+class ChatHistorySystem(TypedDict, total=False):
+ role: Required[Literal["SYSTEM"]]
+ message: str
+ tool_calls: List[ToolCallObject]
+
+
+class ChatHistoryChatBot(TypedDict, total=False):
+ role: Required[Literal["CHATBOT"]]
+ message: str
+ tool_calls: List[ToolCallObject]
+
+
+ChatHistory = List[
+ Union[ChatHistorySystem, ChatHistoryChatBot, ChatHistoryUser, ChatHistoryToolResult]
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py
new file mode 100644
index 00000000..5eec187d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py
@@ -0,0 +1,24 @@
+import ssl
+from enum import Enum
+from typing import Union
+
+
+class httpxSpecialProvider(str, Enum):
+ """
+ Httpx Clients can be created for these litellm internal providers
+
+ Example:
+ - langsmith logging would need a custom async httpx client
+ - pass through endpoint would need a custom async httpx client
+ """
+
+ LoggingCallback = "logging_callback"
+ GuardrailCallback = "guardrail_callback"
+ Caching = "caching"
+ Oauth2Check = "oauth2_check"
+ SecretManager = "secret_manager"
+ PassThroughEndpoint = "pass_through_endpoint"
+ PromptFactory = "prompt_factory"
+
+
+VerifyTypes = Union[str, bool, ssl.SSLContext]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py
new file mode 100644
index 00000000..d5499a41
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py
@@ -0,0 +1,10 @@
+from typing import List
+
+from typing_extensions import Dict, Required, TypedDict, override
+
+from litellm.llms.custom_llm import CustomLLM
+
+
+class CustomLLMItem(TypedDict):
+ provider: str
+ custom_handler: CustomLLM
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py
new file mode 100644
index 00000000..770e05fe
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py
@@ -0,0 +1,21 @@
+from typing import TypedDict, Any, Union, Optional
+import json
+from typing_extensions import (
+ Self,
+ Protocol,
+ TypeGuard,
+ override,
+ get_origin,
+ runtime_checkable,
+ Required,
+)
+from pydantic import BaseModel
+
+
+class GenericStreamingChunk(TypedDict, total=False):
+ text: Required[str]
+ is_finished: Required[bool]
+ finish_reason: Required[Optional[str]]
+ logprobs: Optional[BaseModel]
+ original_chunk: Optional[BaseModel]
+ usage: Optional[BaseModel]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py
new file mode 100644
index 00000000..e9563a9a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py
@@ -0,0 +1,12 @@
+from typing import List, Literal, Optional, TypedDict, Union
+
+
+class FunctionCall(TypedDict):
+ name: Optional[str]
+ arguments: Optional[Union[str, dict]]
+
+
+class MistralToolCallMessage(TypedDict):
+ id: Optional[str]
+ type: Literal["function"]
+ function: Optional[FunctionCall]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py
new file mode 100644
index 00000000..9d71904c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py
@@ -0,0 +1,29 @@
+import json
+from typing import Any, List, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+from typing_extensions import (
+ Protocol,
+ Required,
+ Self,
+ TypeGuard,
+ get_origin,
+ override,
+ runtime_checkable,
+)
+
+
+class OllamaToolCallFunction(
+ TypedDict
+): # follows - https://github.com/ollama/ollama/blob/6bd8a4b0a1ac15d5718f52bbe1cd56f827beb694/api/types.go#L148
+ name: str
+ arguments: dict
+
+
+class OllamaToolCall(TypedDict):
+ function: OllamaToolCallFunction
+
+
+class OllamaVisionModelObject(TypedDict):
+ prompt: str
+ images: List[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py
new file mode 100644
index 00000000..4b0be9d5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py
@@ -0,0 +1,1040 @@
+from enum import Enum
+from os import PathLike
+from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
+
+import httpx
+from openai._legacy_response import (
+ HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
+)
+from openai.lib.streaming._assistants import (
+ AssistantEventHandler,
+ AssistantStreamManager,
+ AsyncAssistantEventHandler,
+ AsyncAssistantStreamManager,
+)
+from openai.pagination import AsyncCursorPage, SyncCursorPage
+from openai.types import Batch, EmbeddingCreateParams, FileObject
+from openai.types.beta.assistant import Assistant
+from openai.types.beta.assistant_tool_param import AssistantToolParam
+from openai.types.beta.thread_create_params import (
+ Message as OpenAICreateThreadParamsMessage,
+)
+from openai.types.beta.threads.message import Message as OpenAIMessage
+from openai.types.beta.threads.message_content import MessageContent
+from openai.types.beta.threads.run import Run
+from openai.types.chat import ChatCompletionChunk
+from openai.types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from openai.types.chat.chat_completion_content_part_input_audio_param import (
+ ChatCompletionContentPartInputAudioParam,
+)
+from openai.types.chat.chat_completion_modality import ChatCompletionModality
+from openai.types.chat.chat_completion_prediction_content_param import (
+ ChatCompletionPredictionContentParam,
+)
+from openai.types.embedding import Embedding as OpenAIEmbedding
+from openai.types.fine_tuning.fine_tuning_job import FineTuningJob
+from openai.types.responses.response import (
+ IncompleteDetails,
+ Response,
+ ResponseOutputItem,
+ ResponseTextConfig,
+ Tool,
+ ToolChoice,
+)
+from openai.types.responses.response_create_params import (
+ Reasoning,
+ ResponseIncludable,
+ ResponseInputParam,
+ ResponseTextConfigParam,
+ ToolChoice,
+ ToolParam,
+)
+from pydantic import BaseModel, Discriminator, Field, PrivateAttr
+from typing_extensions import Annotated, Dict, Required, TypedDict, override
+
+FileContent = Union[IO[bytes], bytes, PathLike]
+
+FileTypes = Union[
+ # file (or bytes)
+ FileContent,
+ # (filename, file (or bytes))
+ Tuple[Optional[str], FileContent],
+ # (filename, file (or bytes), content_type)
+ Tuple[Optional[str], FileContent, Optional[str]],
+ # (filename, file (or bytes), content_type, headers)
+ Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
+]
+
+
+EmbeddingInput = Union[str, List[str]]
+
+
+class HttpxBinaryResponseContent(_HttpxBinaryResponseContent):
+ _hidden_params: dict = {}
+ pass
+
+
+class NotGiven:
+ """
+ A sentinel singleton class used to distinguish omitted keyword arguments
+ from those passed in with the value None (which may have different behavior).
+
+ For example:
+
+ ```py
+ def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
+ ...
+
+
+ get(timeout=1) # 1s timeout
+ get(timeout=None) # No timeout
+ get() # Default timeout behavior, which may not be statically known at the method definition.
+ ```
+ """
+
+ def __bool__(self) -> Literal[False]:
+ return False
+
+ @override
+ def __repr__(self) -> str:
+ return "NOT_GIVEN"
+
+
+NOT_GIVEN = NotGiven()
+
+
+class ToolResourcesCodeInterpreter(TypedDict, total=False):
+ file_ids: List[str]
+ """
+ A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made
+ available to the `code_interpreter` tool. There can be a maximum of 20 files
+ associated with the tool.
+ """
+
+
+class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+ file_ids: List[str]
+ """
+ A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
+ add to the vector store. There can be a maximum of 10000 files in a vector
+ store.
+ """
+
+ metadata: object
+ """Set of 16 key-value pairs that can be attached to a vector store.
+
+ This can be useful for storing additional information about the vector store in
+ a structured format. Keys can be a maximum of 64 characters long and values can
+ be a maxium of 512 characters long.
+ """
+
+
+class ToolResourcesFileSearch(TypedDict, total=False):
+ vector_store_ids: List[str]
+ """
+ The
+ [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+ attached to this thread. There can be a maximum of 1 vector store attached to
+ the thread.
+ """
+
+ vector_stores: Iterable[ToolResourcesFileSearchVectorStore]
+ """
+ A helper to create a
+ [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object)
+ with file_ids and attach it to this thread. There can be a maximum of 1 vector
+ store attached to the thread.
+ """
+
+
+class OpenAICreateThreadParamsToolResources(TypedDict, total=False):
+ code_interpreter: ToolResourcesCodeInterpreter
+
+ file_search: ToolResourcesFileSearch
+
+
+class FileSearchToolParam(TypedDict, total=False):
+ type: Required[Literal["file_search"]]
+ """The type of tool being defined: `file_search`"""
+
+
+class CodeInterpreterToolParam(TypedDict, total=False):
+ type: Required[Literal["code_interpreter"]]
+ """The type of tool being defined: `code_interpreter`"""
+
+
+AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+
+
+class Attachment(TypedDict, total=False):
+ file_id: str
+ """The ID of the file to attach to the message."""
+
+ tools: Iterable[AttachmentTool]
+ """The tools to add this file to."""
+
+
+class ImageFileObject(TypedDict):
+ file_id: Required[str]
+ detail: Optional[str]
+
+
+class ImageURLObject(TypedDict):
+ url: Required[str]
+ detail: Optional[str]
+
+
+class MessageContentTextObject(TypedDict):
+ type: Required[Literal["text"]]
+ text: str
+
+
+class MessageContentImageFileObject(TypedDict):
+ type: Literal["image_file"]
+ image_file: ImageFileObject
+
+
+class MessageContentImageURLObject(TypedDict):
+ type: Required[str]
+ image_url: ImageURLObject
+
+
+class MessageData(TypedDict):
+ role: Literal["user", "assistant"]
+ content: Union[
+ str,
+ List[
+ Union[
+ MessageContentTextObject,
+ MessageContentImageFileObject,
+ MessageContentImageURLObject,
+ ]
+ ],
+ ]
+ attachments: Optional[List[Attachment]]
+ metadata: Optional[dict]
+
+
+class Thread(BaseModel):
+ id: str
+ """The identifier, which can be referenced in API endpoints."""
+
+ created_at: int
+ """The Unix timestamp (in seconds) for when the thread was created."""
+
+ metadata: Optional[object] = None
+ """Set of 16 key-value pairs that can be attached to an object.
+
+ This can be useful for storing additional information about the object in a
+ structured format. Keys can be a maximum of 64 characters long and values can be
+ a maxium of 512 characters long.
+ """
+
+ object: Literal["thread"]
+ """The object type, which is always `thread`."""
+
+
+# OpenAI Files Types
+class CreateFileRequest(TypedDict, total=False):
+ """
+ CreateFileRequest
+ Used by Assistants API, Batches API, and Fine-Tunes API
+
+ Required Params:
+ file: FileTypes
+ purpose: Literal['assistants', 'batch', 'fine-tune']
+
+ Optional Params:
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]] = None
+ timeout: Optional[float] = None
+ """
+
+ file: FileTypes
+ purpose: Literal["assistants", "batch", "fine-tune"]
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+class FileContentRequest(TypedDict, total=False):
+ """
+ FileContentRequest
+ Used by Assistants API, Batches API, and Fine-Tunes API
+
+ Required Params:
+ file_id: str
+
+ Optional Params:
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]] = None
+ timeout: Optional[float] = None
+ """
+
+ file_id: str
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+# OpenAI Batches Types
+class CreateBatchRequest(TypedDict, total=False):
+ """
+ CreateBatchRequest
+ """
+
+ completion_window: Literal["24h"]
+ endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]
+ input_file_id: str
+ metadata: Optional[Dict[str, str]]
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+class RetrieveBatchRequest(TypedDict, total=False):
+ """
+ RetrieveBatchRequest
+ """
+
+ batch_id: str
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+class CancelBatchRequest(TypedDict, total=False):
+ """
+ CancelBatchRequest
+ """
+
+ batch_id: str
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+class ListBatchRequest(TypedDict, total=False):
+ """
+ ListBatchRequest - List your organization's batches
+ Calls https://api.openai.com/v1/batches
+ """
+
+ after: Union[str, NotGiven]
+ limit: Union[int, NotGiven]
+ extra_headers: Optional[Dict[str, str]]
+ extra_body: Optional[Dict[str, str]]
+ timeout: Optional[float]
+
+
+BatchJobStatus = Literal[
+ "validating",
+ "failed",
+ "in_progress",
+ "finalizing",
+ "completed",
+ "expired",
+ "cancelling",
+ "cancelled",
+]
+
+
+class ChatCompletionAudioDelta(TypedDict, total=False):
+ data: str
+ transcript: str
+ expires_at: int
+ id: str
+
+
+class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
+ name: Optional[str]
+ arguments: str
+
+
+class ChatCompletionAssistantToolCall(TypedDict):
+ id: Optional[str]
+ type: Literal["function"]
+ function: ChatCompletionToolCallFunctionChunk
+
+
+class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call
+ id: Optional[str]
+ type: Literal["function"]
+ function: ChatCompletionToolCallFunctionChunk
+ index: int
+
+
+class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
+ id: str
+ type: Literal["function"]
+ function: ChatCompletionToolCallFunctionChunk
+ index: int
+
+
+class ChatCompletionCachedContent(TypedDict):
+ type: Literal["ephemeral"]
+
+
+class ChatCompletionThinkingBlock(TypedDict, total=False):
+ type: Required[Literal["thinking"]]
+ thinking: str
+ signature: str
+ cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
+class OpenAIChatCompletionTextObject(TypedDict):
+ type: Literal["text"]
+ text: str
+
+
+class ChatCompletionTextObject(
+ OpenAIChatCompletionTextObject, total=False
+): # litellm wrapper on top of openai object for handling cached content
+ cache_control: ChatCompletionCachedContent
+
+
+class ChatCompletionImageUrlObject(TypedDict, total=False):
+ url: Required[str]
+ detail: str
+ format: str
+
+
+class ChatCompletionImageObject(TypedDict):
+ type: Literal["image_url"]
+ image_url: Union[str, ChatCompletionImageUrlObject]
+
+
+class ChatCompletionVideoUrlObject(TypedDict, total=False):
+ url: Required[str]
+ detail: str
+
+
+class ChatCompletionVideoObject(TypedDict):
+ type: Literal["video_url"]
+ video_url: Union[str, ChatCompletionVideoUrlObject]
+
+
+class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam):
+ pass
+
+
+class DocumentObject(TypedDict):
+ type: Literal["text"]
+ media_type: str
+ data: str
+
+
+class CitationsObject(TypedDict):
+ enabled: bool
+
+
+class ChatCompletionDocumentObject(TypedDict):
+ type: Literal["document"]
+ source: DocumentObject
+ title: str
+ context: str
+ citations: Optional[CitationsObject]
+
+
+class ChatCompletionFileObjectFile(TypedDict):
+ file_data: Optional[str]
+ file_id: Optional[str]
+ filename: Optional[str]
+
+
+class ChatCompletionFileObject(TypedDict):
+ type: Literal["file"]
+ file: ChatCompletionFileObjectFile
+
+
+OpenAIMessageContentListBlock = Union[
+ ChatCompletionTextObject,
+ ChatCompletionImageObject,
+ ChatCompletionAudioObject,
+ ChatCompletionDocumentObject,
+ ChatCompletionVideoObject,
+ ChatCompletionFileObject,
+]
+
+OpenAIMessageContent = Union[
+ str,
+ Iterable[OpenAIMessageContentListBlock],
+]
+
+# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
+AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]
+
+
+class OpenAIChatCompletionUserMessage(TypedDict):
+ role: Literal["user"]
+ content: OpenAIMessageContent
+
+
+class OpenAITextCompletionUserMessage(TypedDict):
+ role: Literal["user"]
+ content: AllPromptValues
+
+
+class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False):
+ cache_control: ChatCompletionCachedContent
+
+
+class OpenAIChatCompletionAssistantMessage(TypedDict, total=False):
+ role: Required[Literal["assistant"]]
+ content: Optional[
+ Union[
+ str, Iterable[Union[ChatCompletionTextObject, ChatCompletionThinkingBlock]]
+ ]
+ ]
+ name: Optional[str]
+ tool_calls: Optional[List[ChatCompletionAssistantToolCall]]
+ function_call: Optional[ChatCompletionToolCallFunctionChunk]
+
+
+class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False):
+ cache_control: ChatCompletionCachedContent
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+
+
+class ChatCompletionToolMessage(TypedDict):
+ role: Literal["tool"]
+ content: Union[str, Iterable[ChatCompletionTextObject]]
+ tool_call_id: str
+
+
+class ChatCompletionFunctionMessage(TypedDict):
+ role: Literal["function"]
+ content: Optional[Union[str, Iterable[ChatCompletionTextObject]]]
+ name: str
+ tool_call_id: Optional[str]
+
+
+class OpenAIChatCompletionSystemMessage(TypedDict, total=False):
+ role: Required[Literal["system"]]
+ content: Required[Union[str, List]]
+ name: str
+
+
+class OpenAIChatCompletionDeveloperMessage(TypedDict, total=False):
+ role: Required[Literal["developer"]]
+ content: Required[Union[str, List]]
+ name: str
+
+
+class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False):
+ cache_control: ChatCompletionCachedContent
+
+
+class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total=False):
+ cache_control: ChatCompletionCachedContent
+
+
+ValidUserMessageContentTypes = [
+ "text",
+ "image_url",
+ "input_audio",
+ "document",
+ "video_url",
+ "file",
+] # used for validating user messages. Prevent users from accidentally sending anthropic messages.
+
+AllMessageValues = Union[
+ ChatCompletionUserMessage,
+ ChatCompletionAssistantMessage,
+ ChatCompletionToolMessage,
+ ChatCompletionSystemMessage,
+ ChatCompletionFunctionMessage,
+ ChatCompletionDeveloperMessage,
+]
+
+
+class ChatCompletionToolChoiceFunctionParam(TypedDict):
+ name: str
+
+
+class ChatCompletionToolChoiceObjectParam(TypedDict):
+ type: Literal["function"]
+ function: ChatCompletionToolChoiceFunctionParam
+
+
+ChatCompletionToolChoiceStringValues = Literal["none", "auto", "required"]
+
+ChatCompletionToolChoiceValues = Union[
+ ChatCompletionToolChoiceStringValues, ChatCompletionToolChoiceObjectParam
+]
+
+
+class ChatCompletionToolParamFunctionChunk(TypedDict, total=False):
+ name: Required[str]
+ description: str
+ parameters: dict
+
+
+class OpenAIChatCompletionToolParam(TypedDict):
+ type: Union[Literal["function"], str]
+ function: ChatCompletionToolParamFunctionChunk
+
+
+class ChatCompletionToolParam(OpenAIChatCompletionToolParam, total=False):
+ cache_control: ChatCompletionCachedContent
+
+
+class Function(TypedDict, total=False):
+ name: Required[str]
+ """The name of the function to call."""
+
+
+class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
+ function: Required[Function]
+
+ type: Required[Literal["function"]]
+ """The type of the tool. Currently, only `function` is supported."""
+
+
+class ChatCompletionRequest(TypedDict, total=False):
+ model: Required[str]
+ messages: Required[List[AllMessageValues]]
+ frequency_penalty: float
+ logit_bias: dict
+ logprobs: bool
+ top_logprobs: int
+ max_tokens: int
+ n: int
+ presence_penalty: float
+ response_format: dict
+ seed: int
+ service_tier: str
+ stop: Union[str, List[str]]
+ stream_options: dict
+ temperature: float
+ top_p: float
+ tools: List[ChatCompletionToolParam]
+ tool_choice: ChatCompletionToolChoiceValues
+ parallel_tool_calls: bool
+ function_call: Union[str, dict]
+ functions: List
+ user: str
+ metadata: dict # litellm specific param
+
+
+class ChatCompletionDeltaChunk(TypedDict, total=False):
+ content: Optional[str]
+ tool_calls: List[ChatCompletionDeltaToolCallChunk]
+ role: str
+
+
+ChatCompletionAssistantContentValue = (
+ str # keep as var, used in stream_chunk_builder as well
+)
+
+
+class ChatCompletionResponseMessage(TypedDict, total=False):
+ content: Optional[ChatCompletionAssistantContentValue]
+ tool_calls: Optional[List[ChatCompletionToolCallChunk]]
+ role: Literal["assistant"]
+ function_call: Optional[ChatCompletionToolCallFunctionChunk]
+ provider_specific_fields: Optional[dict]
+ reasoning_content: Optional[str]
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+
+
+class ChatCompletionUsageBlock(TypedDict):
+ prompt_tokens: int
+ completion_tokens: int
+ total_tokens: int
+
+
+class OpenAIChatCompletionChunk(ChatCompletionChunk):
+ def __init__(self, **kwargs):
+ # Set the 'object' kwarg to 'chat.completion.chunk'
+ kwargs["object"] = "chat.completion.chunk"
+ super().__init__(**kwargs)
+
+
+class Hyperparameters(BaseModel):
+ batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch."
+ learning_rate_multiplier: Optional[Union[str, float]] = (
+ None # Scaling factor for the learning rate
+ )
+ n_epochs: Optional[Union[str, int]] = (
+ None # "The number of epochs to train the model for"
+ )
+
+
+class FineTuningJobCreate(BaseModel):
+ """
+ FineTuningJobCreate - Create a fine-tuning job
+
+ Example Request
+ ```
+ {
+ "model": "gpt-3.5-turbo",
+ "training_file": "file-abc123",
+ "hyperparameters": {
+ "batch_size": "auto",
+ "learning_rate_multiplier": 0.1,
+ "n_epochs": 3
+ },
+ "suffix": "custom-model-name",
+ "validation_file": "file-xyz789",
+ "integrations": ["slack"],
+ "seed": 42
+ }
+ ```
+ """
+
+ model: str # "The name of the model to fine-tune."
+ training_file: str # "The ID of an uploaded file that contains training data."
+ hyperparameters: Optional[Hyperparameters] = (
+ None # "The hyperparameters used for the fine-tuning job."
+ )
+ suffix: Optional[str] = (
+ None # "A string of up to 18 characters that will be added to your fine-tuned model name."
+ )
+ validation_file: Optional[str] = (
+ None # "The ID of an uploaded file that contains validation data."
+ )
+ integrations: Optional[List[str]] = (
+ None # "A list of integrations to enable for your fine-tuning job."
+ )
+ seed: Optional[int] = None # "The seed controls the reproducibility of the job."
+
+
+class LiteLLMFineTuningJobCreate(FineTuningJobCreate):
+ custom_llm_provider: Literal["openai", "azure", "vertex_ai"]
+
+ class Config:
+ extra = "allow" # This allows the model to accept additional fields
+
+
+AllEmbeddingInputValues = Union[str, List[str], List[int], List[List[int]]]
+
+OpenAIAudioTranscriptionOptionalParams = Literal[
+ "language", "prompt", "temperature", "response_format", "timestamp_granularities"
+]
+
+
+OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
+
+
+class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
+ """TypedDict for Optional parameters supported by the responses API."""
+
+ include: Optional[List[ResponseIncludable]]
+ instructions: Optional[str]
+ max_output_tokens: Optional[int]
+ metadata: Optional[Dict[str, Any]]
+ parallel_tool_calls: Optional[bool]
+ previous_response_id: Optional[str]
+ reasoning: Optional[Reasoning]
+ store: Optional[bool]
+ stream: Optional[bool]
+ temperature: Optional[float]
+ text: Optional[ResponseTextConfigParam]
+ tool_choice: Optional[ToolChoice]
+ tools: Optional[Iterable[ToolParam]]
+ top_p: Optional[float]
+ truncation: Optional[Literal["auto", "disabled"]]
+ user: Optional[str]
+
+
+class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
+ """TypedDict for request parameters supported by the responses API."""
+
+ input: Union[str, ResponseInputParam]
+ model: str
+
+
+class BaseLiteLLMOpenAIResponseObject(BaseModel):
+ def __getitem__(self, key):
+ return self.__dict__[key]
+
+ def get(self, key, default=None):
+ return self.__dict__.get(key, default)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def items(self):
+ return self.__dict__.items()
+
+
+class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
+ reasoning_tokens: int
+
+ model_config = {"extra": "allow"}
+
+
+class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject):
+ input_tokens: int
+ """The number of input tokens."""
+
+ output_tokens: int
+ """The number of output tokens."""
+
+ output_tokens_details: Optional[OutputTokensDetails]
+ """A detailed breakdown of the output tokens."""
+
+ total_tokens: int
+ """The total number of tokens used."""
+
+ model_config = {"extra": "allow"}
+
+
+class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
+ id: str
+ created_at: float
+ error: Optional[dict]
+ incomplete_details: Optional[IncompleteDetails]
+ instructions: Optional[str]
+ metadata: Optional[Dict]
+ model: Optional[str]
+ object: Optional[str]
+ output: List[ResponseOutputItem]
+ parallel_tool_calls: bool
+ temperature: Optional[float]
+ tool_choice: ToolChoice
+ tools: List[Tool]
+ top_p: Optional[float]
+ max_output_tokens: Optional[int]
+ previous_response_id: Optional[str]
+ reasoning: Optional[Reasoning]
+ status: Optional[str]
+ text: Optional[ResponseTextConfig]
+ truncation: Optional[Literal["auto", "disabled"]]
+ usage: Optional[ResponseAPIUsage]
+ user: Optional[str]
+ # Define private attributes using PrivateAttr
+ _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponsesAPIStreamEvents(str, Enum):
+ """
+ Enum representing all supported OpenAI stream event types for the Responses API.
+
+ Inherits from str to allow direct string comparison and usage as dictionary keys.
+ """
+
+ # Response lifecycle events
+ RESPONSE_CREATED = "response.created"
+ RESPONSE_IN_PROGRESS = "response.in_progress"
+ RESPONSE_COMPLETED = "response.completed"
+ RESPONSE_FAILED = "response.failed"
+ RESPONSE_INCOMPLETE = "response.incomplete"
+
+ # Output item events
+ OUTPUT_ITEM_ADDED = "response.output_item.added"
+ OUTPUT_ITEM_DONE = "response.output_item.done"
+
+ # Content part events
+ CONTENT_PART_ADDED = "response.content_part.added"
+ CONTENT_PART_DONE = "response.content_part.done"
+
+ # Output text events
+ OUTPUT_TEXT_DELTA = "response.output_text.delta"
+ OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added"
+ OUTPUT_TEXT_DONE = "response.output_text.done"
+
+ # Refusal events
+ REFUSAL_DELTA = "response.refusal.delta"
+ REFUSAL_DONE = "response.refusal.done"
+
+ # Function call events
+ FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
+ FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
+
+ # File search events
+ FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress"
+ FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching"
+ FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed"
+
+ # Web search events
+ WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress"
+ WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching"
+ WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed"
+
+ # Error event
+ ERROR = "error"
+
+
+class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
+ response: ResponsesAPIResponse
+
+
+class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
+ response: ResponsesAPIResponse
+
+
+class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
+ response: ResponsesAPIResponse
+ _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+
+class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
+ response: ResponsesAPIResponse
+
+
+class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
+ response: ResponsesAPIResponse
+
+
+class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
+ output_index: int
+ item: dict
+
+
+class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
+ output_index: int
+ item: dict
+
+
+class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
+ item_id: str
+ output_index: int
+ content_index: int
+ part: dict
+
+
+class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
+ item_id: str
+ output_index: int
+ content_index: int
+ part: dict
+
+
+class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
+ item_id: str
+ output_index: int
+ content_index: int
+ delta: str
+
+
+class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
+ item_id: str
+ output_index: int
+ content_index: int
+ annotation_index: int
+ annotation: dict
+
+
+class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
+ item_id: str
+ output_index: int
+ content_index: int
+ text: str
+
+
+class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
+ item_id: str
+ output_index: int
+ content_index: int
+ delta: str
+
+
+class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
+ item_id: str
+ output_index: int
+ content_index: int
+ refusal: str
+
+
+class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
+ item_id: str
+ output_index: int
+ delta: str
+
+
+class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
+ item_id: str
+ output_index: int
+ arguments: str
+
+
+class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
+ output_index: int
+ item_id: str
+
+
+class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
+ output_index: int
+ item_id: str
+
+
+class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
+ output_index: int
+ item_id: str
+
+
+class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
+ output_index: int
+ item_id: str
+
+
+class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
+ output_index: int
+ item_id: str
+
+
+class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
+ output_index: int
+ item_id: str
+
+
+class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
+ type: Literal[ResponsesAPIStreamEvents.ERROR]
+ code: Optional[str]
+ message: str
+ param: Optional[str]
+
+
+# Union type for all possible streaming responses
+ResponsesAPIStreamingResponse = Annotated[
+ Union[
+ ResponseCreatedEvent,
+ ResponseInProgressEvent,
+ ResponseCompletedEvent,
+ ResponseFailedEvent,
+ ResponseIncompleteEvent,
+ OutputItemAddedEvent,
+ OutputItemDoneEvent,
+ ContentPartAddedEvent,
+ ContentPartDoneEvent,
+ OutputTextDeltaEvent,
+ OutputTextAnnotationAddedEvent,
+ OutputTextDoneEvent,
+ RefusalDeltaEvent,
+ RefusalDoneEvent,
+ FunctionCallArgumentsDeltaEvent,
+ FunctionCallArgumentsDoneEvent,
+ FileSearchCallInProgressEvent,
+ FileSearchCallSearchingEvent,
+ FileSearchCallCompletedEvent,
+ WebSearchCallInProgressEvent,
+ WebSearchCallSearchingEvent,
+ WebSearchCallCompletedEvent,
+ ErrorEvent,
+ ],
+ Discriminator("type"),
+]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py
new file mode 100644
index 00000000..f781af88
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py
@@ -0,0 +1,19 @@
+import json
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
+
+from typing_extensions import (
+ Protocol,
+ Required,
+ Self,
+ TypeGuard,
+ get_origin,
+ override,
+ runtime_checkable,
+)
+
+
+class InfinityRerankResult(TypedDict):
+ index: int
+ relevance_score: float
+ document: Optional[str]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py
new file mode 100644
index 00000000..7024909a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py
@@ -0,0 +1,486 @@
+import json
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
+
+from typing_extensions import (
+ Protocol,
+ Required,
+ Self,
+ TypeGuard,
+ get_origin,
+ override,
+ runtime_checkable,
+)
+
+
+class FunctionResponse(TypedDict):
+ name: str
+ response: Optional[dict]
+
+
+class FunctionCall(TypedDict):
+ name: str
+ args: Optional[dict]
+
+
+class FileDataType(TypedDict):
+ mime_type: str
+ file_uri: str # the cloud storage uri of storing this file
+
+
+class BlobType(TypedDict):
+ mime_type: Required[str]
+ data: Required[str]
+
+
+class PartType(TypedDict, total=False):
+ text: str
+ inline_data: BlobType
+ file_data: FileDataType
+ function_call: FunctionCall
+ function_response: FunctionResponse
+
+
+class HttpxFunctionCall(TypedDict):
+ name: str
+ args: dict
+
+
+class HttpxExecutableCode(TypedDict):
+ code: str
+ language: str
+
+
+class HttpxCodeExecutionResult(TypedDict):
+ outcome: str
+ output: str
+
+
+class HttpxPartType(TypedDict, total=False):
+ text: str
+ inline_data: BlobType
+ file_data: FileDataType
+ functionCall: HttpxFunctionCall
+ function_response: FunctionResponse
+ executableCode: HttpxExecutableCode
+ codeExecutionResult: HttpxCodeExecutionResult
+
+
+class HttpxContentType(TypedDict, total=False):
+ role: Literal["user", "model"]
+ parts: List[HttpxPartType]
+
+
+class ContentType(TypedDict, total=False):
+ role: Literal["user", "model"]
+ parts: Required[List[PartType]]
+
+
+class SystemInstructions(TypedDict):
+ parts: Required[List[PartType]]
+
+
+class Schema(TypedDict, total=False):
+ type: Literal["STRING", "INTEGER", "BOOLEAN", "NUMBER", "ARRAY", "OBJECT"]
+ description: str
+ enum: List[str]
+ items: List["Schema"]
+ properties: "Schema"
+ required: List[str]
+ nullable: bool
+
+
+class FunctionDeclaration(TypedDict, total=False):
+ name: Required[str]
+ description: str
+ parameters: Union[Schema, dict]
+ response: Schema
+
+
+class VertexAISearch(TypedDict, total=False):
+ datastore: Required[str]
+
+
+class Retrieval(TypedDict):
+ source: VertexAISearch
+
+
+class FunctionCallingConfig(TypedDict, total=False):
+ mode: Literal["ANY", "AUTO", "NONE"]
+ allowed_function_names: List[str]
+
+
+HarmCategory = Literal[
+ "HARM_CATEGORY_UNSPECIFIED",
+ "HARM_CATEGORY_HATE_SPEECH",
+ "HARM_CATEGORY_DANGEROUS_CONTENT",
+ "HARM_CATEGORY_HARASSMENT",
+ "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+]
+HarmBlockThreshold = Literal[
+ "HARM_BLOCK_THRESHOLD_UNSPECIFIED",
+ "BLOCK_LOW_AND_ABOVE",
+ "BLOCK_MEDIUM_AND_ABOVE",
+ "BLOCK_ONLY_HIGH",
+ "BLOCK_NONE",
+]
+HarmBlockMethod = Literal["HARM_BLOCK_METHOD_UNSPECIFIED", "SEVERITY", "PROBABILITY"]
+
+HarmProbability = Literal[
+ "HARM_PROBABILITY_UNSPECIFIED", "NEGLIGIBLE", "LOW", "MEDIUM", "HIGH"
+]
+
+HarmSeverity = Literal[
+ "HARM_SEVERITY_UNSPECIFIED",
+ "HARM_SEVERITY_NEGLIGIBLE",
+ "HARM_SEVERITY_LOW",
+ "HARM_SEVERITY_MEDIUM",
+ "HARM_SEVERITY_HIGH",
+]
+
+
+class SafetSettingsConfig(TypedDict, total=False):
+ category: HarmCategory
+ threshold: HarmBlockThreshold
+ max_influential_terms: int
+ method: HarmBlockMethod
+
+
+class GenerationConfig(TypedDict, total=False):
+ temperature: float
+ top_p: float
+ top_k: float
+ candidate_count: int
+ max_output_tokens: int
+ stop_sequences: List[str]
+ presence_penalty: float
+ frequency_penalty: float
+ response_mime_type: Literal["text/plain", "application/json"]
+ response_schema: dict
+ seed: int
+ responseLogprobs: bool
+ logprobs: int
+
+
+class Tools(TypedDict, total=False):
+ function_declarations: List[FunctionDeclaration]
+ googleSearch: dict
+ googleSearchRetrieval: dict
+ code_execution: dict
+ retrieval: Retrieval
+
+
+class ToolConfig(TypedDict):
+ functionCallingConfig: FunctionCallingConfig
+
+
+class TTL(TypedDict, total=False):
+ seconds: Required[float]
+ nano: float
+
+
+class UsageMetadata(TypedDict, total=False):
+ promptTokenCount: int
+ totalTokenCount: int
+ candidatesTokenCount: int
+ cachedContentTokenCount: int
+
+
+class CachedContent(TypedDict, total=False):
+ ttl: TTL
+ expire_time: str
+ contents: List[ContentType]
+ tools: List[Tools]
+ createTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+ updateTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+ usageMetadata: UsageMetadata
+ expireTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z"
+ name: str
+ displayName: str
+ model: str
+ systemInstruction: ContentType
+ toolConfig: ToolConfig
+
+
+class RequestBody(TypedDict, total=False):
+ contents: Required[List[ContentType]]
+ system_instruction: SystemInstructions
+ tools: Tools
+ toolConfig: ToolConfig
+ safetySettings: List[SafetSettingsConfig]
+ generationConfig: GenerationConfig
+ cachedContent: str
+
+
+class CachedContentRequestBody(TypedDict, total=False):
+ contents: Required[List[ContentType]]
+ system_instruction: SystemInstructions
+ tools: Tools
+ toolConfig: ToolConfig
+ model: Required[str] # Format: models/{model}
+ ttl: str # ending in 's' - Example: "3.5s".
+ displayName: str
+
+
+class CachedContentListAllResponseBody(TypedDict, total=False):
+ cachedContents: List[CachedContent]
+ nextPageToken: str
+
+
+class SafetyRatings(TypedDict):
+ category: HarmCategory
+ probability: HarmProbability
+ probabilityScore: int
+ severity: HarmSeverity
+ blocked: bool
+
+
+class Date(TypedDict):
+ year: int
+ month: int
+ date: int
+
+
+class Citation(TypedDict):
+ startIndex: int
+ endIndex: int
+ uri: str
+ title: str
+ license: str
+ publicationDate: Date
+
+
+class CitationMetadata(TypedDict):
+ citations: List[Citation]
+
+
+class SearchEntryPoint(TypedDict, total=False):
+ renderedContent: str
+ sdkBlob: str
+
+
+class GroundingMetadata(TypedDict, total=False):
+ webSearchQueries: List[str]
+ searchEntryPoint: SearchEntryPoint
+ groundingAttributions: List[dict]
+
+
+class LogprobsCandidate(TypedDict):
+ token: str
+ tokenId: int
+ logProbability: float
+
+
+class LogprobsTopCandidate(TypedDict):
+ candidates: List[LogprobsCandidate]
+
+
+class LogprobsResult(TypedDict, total=False):
+ topCandidates: List[LogprobsTopCandidate]
+ chosenCandidates: List[LogprobsCandidate]
+
+
+class Candidates(TypedDict, total=False):
+ index: int
+ content: HttpxContentType
+ finishReason: Literal[
+ "FINISH_REASON_UNSPECIFIED",
+ "STOP",
+ "MAX_TOKENS",
+ "SAFETY",
+ "RECITATION",
+ "OTHER",
+ "BLOCKLIST",
+ "PROHIBITED_CONTENT",
+ "SPII",
+ ]
+ safetyRatings: List[SafetyRatings]
+ citationMetadata: CitationMetadata
+ groundingMetadata: GroundingMetadata
+ finishMessage: str
+ logprobsResult: LogprobsResult
+
+
+class PromptFeedback(TypedDict):
+ blockReason: str
+ safetyRatings: List[SafetyRatings]
+ blockReasonMessage: str
+
+
+class GenerateContentResponseBody(TypedDict, total=False):
+ candidates: List[Candidates]
+ promptFeedback: PromptFeedback
+ usageMetadata: Required[UsageMetadata]
+
+
+class FineTuneHyperparameters(TypedDict, total=False):
+ epoch_count: Optional[int]
+ learning_rate_multiplier: Optional[float]
+ adapter_size: Optional[
+ Literal[
+ "ADAPTER_SIZE_UNSPECIFIED",
+ "ADAPTER_SIZE_ONE",
+ "ADAPTER_SIZE_FOUR",
+ "ADAPTER_SIZE_EIGHT",
+ "ADAPTER_SIZE_SIXTEEN",
+ ]
+ ]
+
+
+class FineTunesupervisedTuningSpec(TypedDict, total=False):
+ training_dataset_uri: str
+ validation_dataset: Optional[str]
+ tuned_model_display_name: Optional[str]
+ hyperParameters: Optional[FineTuneHyperparameters]
+
+
+class FineTuneJobCreate(TypedDict, total=False):
+ baseModel: str
+ supervisedTuningSpec: FineTunesupervisedTuningSpec
+ tunedModelDisplayName: Optional[str]
+
+
+class ResponseSupervisedTuningSpec(TypedDict, total=False):
+ trainingDatasetUri: Optional[str]
+ hyperParameters: Optional[FineTuneHyperparameters]
+
+
+class ResponseTuningJob(TypedDict):
+ name: Optional[str]
+ tunedModelDisplayName: Optional[str]
+ baseModel: Optional[str]
+ supervisedTuningSpec: Optional[ResponseSupervisedTuningSpec]
+ state: Optional[
+ Literal[
+ "JOB_STATE_PENDING",
+ "JOB_STATE_RUNNING",
+ "JOB_STATE_SUCCEEDED",
+ "JOB_STATE_FAILED",
+ "JOB_STATE_CANCELLED",
+ ]
+ ]
+ createTime: Optional[str]
+ updateTime: Optional[str]
+
+
+class InstanceVideo(TypedDict, total=False):
+ gcsUri: str
+ videoSegmentConfig: Tuple[float, float, float]
+
+
+class InstanceImage(TypedDict, total=False):
+ gcsUri: Optional[str]
+ bytesBase64Encoded: Optional[str]
+ mimeType: Optional[str]
+
+
+class Instance(TypedDict, total=False):
+ text: str
+ image: InstanceImage
+ video: InstanceVideo
+
+
+class VertexMultimodalEmbeddingRequest(TypedDict, total=False):
+ instances: List[Instance]
+
+
+class VideoEmbedding(TypedDict):
+ startOffsetSec: int
+ endOffsetSec: int
+ embedding: List[float]
+
+
+class MultimodalPrediction(TypedDict, total=False):
+ textEmbedding: List[float]
+ imageEmbedding: List[float]
+ videoEmbeddings: List[VideoEmbedding]
+
+
+class MultimodalPredictions(TypedDict, total=False):
+ predictions: List[MultimodalPrediction]
+
+
+class VertexAICachedContentResponseObject(TypedDict):
+ name: str
+ model: str
+
+
+class TaskTypeEnum(Enum):
+ TASK_TYPE_UNSPECIFIED = "TASK_TYPE_UNSPECIFIED"
+ RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
+ RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
+ SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
+ CLASSIFICATION = "CLASSIFICATION"
+ CLUSTERING = "CLUSTERING"
+ QUESTION_ANSWERING = "QUESTION_ANSWERING"
+ FACT_VERIFICATION = "FACT_VERIFICATION"
+
+
+class VertexAITextEmbeddingsRequestBody(TypedDict, total=False):
+ content: Required[ContentType]
+ taskType: TaskTypeEnum
+ title: str
+ outputDimensionality: int
+
+
+class ContentEmbeddings(TypedDict):
+ values: List[int]
+
+
+class VertexAITextEmbeddingsResponseObject(TypedDict):
+ embedding: ContentEmbeddings
+
+
+class EmbedContentRequest(VertexAITextEmbeddingsRequestBody):
+ model: Required[str]
+
+
+class VertexAIBatchEmbeddingsRequestBody(TypedDict, total=False):
+ requests: List[EmbedContentRequest]
+
+
+class VertexAIBatchEmbeddingsResponseObject(TypedDict):
+ embeddings: List[ContentEmbeddings]
+
+
+# Vertex AI Batch Prediction
+
+
+class GcsSource(TypedDict):
+ uris: str
+
+
+class InputConfig(TypedDict):
+ instancesFormat: str
+ gcsSource: GcsSource
+
+
+class GcsDestination(TypedDict):
+ outputUriPrefix: str
+
+
+class OutputConfig(TypedDict, total=False):
+ predictionsFormat: str
+ gcsDestination: GcsDestination
+
+
+class VertexAIBatchPredictionJob(TypedDict):
+ displayName: str
+ model: str
+ inputConfig: InputConfig
+ outputConfig: OutputConfig
+
+
+class VertexBatchPredictionResponse(TypedDict, total=False):
+ name: str
+ displayName: str
+ model: str
+ inputConfig: InputConfig
+ outputConfig: OutputConfig
+ state: str
+ createTime: str
+ updateTime: str
+ modelVersionId: str
+
+
+VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py
new file mode 100644
index 00000000..7dee2836
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py
@@ -0,0 +1,33 @@
+import json
+from enum import Enum
+from typing import Any, List, Optional, TypedDict, Union
+
+from pydantic import BaseModel
+
+
+class WatsonXAPIParams(TypedDict):
+ project_id: str
+ space_id: Optional[str]
+ region_name: Optional[str]
+
+
+class WatsonXCredentials(TypedDict):
+ api_key: str
+ api_base: str
+ token: Optional[str]
+
+
+class WatsonXAIEndpoint(str, Enum):
+ TEXT_GENERATION = "/ml/v1/text/generation"
+ TEXT_GENERATION_STREAM = "/ml/v1/text/generation_stream"
+ CHAT = "/ml/v1/text/chat"
+ CHAT_STREAM = "/ml/v1/text/chat_stream"
+ DEPLOYMENT_TEXT_GENERATION = "/ml/v1/deployments/{deployment_id}/text/generation"
+ DEPLOYMENT_TEXT_GENERATION_STREAM = (
+ "/ml/v1/deployments/{deployment_id}/text/generation_stream"
+ )
+ DEPLOYMENT_CHAT = "/ml/v1/deployments/{deployment_id}/text/chat"
+ DEPLOYMENT_CHAT_STREAM = "/ml/v1/deployments/{deployment_id}/text/chat_stream"
+ EMBEDDINGS = "/ml/v1/text/embeddings"
+ PROMPTS = "/ml/v1/prompts"
+ AVAILABLE_MODELS = "/ml/v1/foundation_model_specs"
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py
new file mode 100644
index 00000000..90871198
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py
@@ -0,0 +1,20 @@
+"""
+Used for /vertex_ai/ pass through endpoints
+"""
+
+from typing import Optional
+
+from pydantic import BaseModel
+
+from ..llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+
+
+class VertexPassThroughCredentials(BaseModel):
+ # Example: vertex_project = "my-project-123"
+ vertex_project: Optional[str] = None
+
+ # Example: vertex_location = "us-central1"
+ vertex_location: Optional[str] = None
+
+ # Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS"
+ vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py
new file mode 100644
index 00000000..8e2a8cc3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py
@@ -0,0 +1,78 @@
+"""
+LiteLLM Follows the cohere API format for the re rank API
+https://docs.cohere.com/reference/rerank
+
+"""
+
+from typing import List, Optional, Union
+
+from pydantic import BaseModel, PrivateAttr
+from typing_extensions import Required, TypedDict
+
+
+class RerankRequest(BaseModel):
+ model: str
+ query: str
+ top_n: Optional[int] = None
+ documents: List[Union[str, dict]]
+ rank_fields: Optional[List[str]] = None
+ return_documents: Optional[bool] = None
+ max_chunks_per_doc: Optional[int] = None
+ max_tokens_per_doc: Optional[int] = None
+
+
+
+class OptionalRerankParams(TypedDict, total=False):
+ query: str
+ top_n: Optional[int]
+ documents: List[Union[str, dict]]
+ rank_fields: Optional[List[str]]
+ return_documents: Optional[bool]
+ max_chunks_per_doc: Optional[int]
+ max_tokens_per_doc: Optional[int]
+
+
+class RerankBilledUnits(TypedDict, total=False):
+ search_units: Optional[int]
+ total_tokens: Optional[int]
+
+
+class RerankTokens(TypedDict, total=False):
+ input_tokens: Optional[int]
+ output_tokens: Optional[int]
+
+
+class RerankResponseMeta(TypedDict, total=False):
+ api_version: Optional[dict]
+ billed_units: Optional[RerankBilledUnits]
+ tokens: Optional[RerankTokens]
+
+
+class RerankResponseDocument(TypedDict):
+ text: str
+
+
+class RerankResponseResult(TypedDict, total=False):
+ index: Required[int]
+ relevance_score: Required[float]
+ document: RerankResponseDocument
+
+
+class RerankResponse(BaseModel):
+ id: Optional[str] = None
+ results: Optional[List[RerankResponseResult]] = (
+ None # Contains index and relevance_score
+ )
+ meta: Optional[RerankResponseMeta] = None # Contains api_version and billed_units
+
+ # Define private attributes using PrivateAttr
+ _hidden_params: dict = PrivateAttr(default_factory=dict)
+
+ def __getitem__(self, key):
+ return self.__dict__[key]
+
+ def get(self, key, default=None):
+ return self.__dict__.get(key, default)
+
+ def __contains__(self, key):
+ return key in self.__dict__
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/router.py b/.venv/lib/python3.12/site-packages/litellm/types/router.py
new file mode 100644
index 00000000..e34366aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/router.py
@@ -0,0 +1,707 @@
+"""
+litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc
+"""
+
+import datetime
+import enum
+import uuid
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union, get_type_hints
+
+import httpx
+from httpx import AsyncClient, Client
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from pydantic import BaseModel, ConfigDict, Field
+from typing_extensions import Required, TypedDict
+
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+
+from ..exceptions import RateLimitError
+from .completion import CompletionRequest
+from .embedding import EmbeddingRequest
+from .llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+from .utils import ModelResponse, ProviderSpecificModelInfo
+
+
+class ConfigurableClientsideParamsCustomAuth(TypedDict):
+ api_base: str
+
+
+CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = Optional[
+ List[Union[str, ConfigurableClientsideParamsCustomAuth]]
+]
+
+
+class ModelConfig(BaseModel):
+ model_name: str
+ litellm_params: Union[CompletionRequest, EmbeddingRequest]
+ tpm: int
+ rpm: int
+
+ model_config = ConfigDict(protected_namespaces=())
+
+
+class RouterConfig(BaseModel):
+ model_list: List[ModelConfig]
+
+ redis_url: Optional[str] = None
+ redis_host: Optional[str] = None
+ redis_port: Optional[int] = None
+ redis_password: Optional[str] = None
+
+ cache_responses: Optional[bool] = False
+ cache_kwargs: Optional[Dict] = {}
+ caching_groups: Optional[List[Tuple[str, List[str]]]] = None
+ client_ttl: Optional[int] = 3600
+ num_retries: Optional[int] = 0
+ timeout: Optional[float] = None
+ default_litellm_params: Optional[Dict[str, str]] = {}
+ set_verbose: Optional[bool] = False
+ fallbacks: Optional[List] = []
+ allowed_fails: Optional[int] = None
+ context_window_fallbacks: Optional[List] = []
+ model_group_alias: Optional[Dict[str, List[str]]] = {}
+ retry_after: Optional[int] = 0
+ routing_strategy: Literal[
+ "simple-shuffle",
+ "least-busy",
+ "usage-based-routing",
+ "latency-based-routing",
+ ] = "simple-shuffle"
+
+ model_config = ConfigDict(protected_namespaces=())
+
+
+class UpdateRouterConfig(BaseModel):
+ """
+ Set of params that you can modify via `router.update_settings()`.
+ """
+
+ routing_strategy_args: Optional[dict] = None
+ routing_strategy: Optional[str] = None
+ model_group_retry_policy: Optional[dict] = None
+ allowed_fails: Optional[int] = None
+ cooldown_time: Optional[float] = None
+ num_retries: Optional[int] = None
+ timeout: Optional[float] = None
+ max_retries: Optional[int] = None
+ retry_after: Optional[float] = None
+ fallbacks: Optional[List[dict]] = None
+ context_window_fallbacks: Optional[List[dict]] = None
+
+ model_config = ConfigDict(protected_namespaces=())
+
+
+class ModelInfo(BaseModel):
+ id: Optional[
+ str
+ ] # Allow id to be optional on input, but it will always be present as a str in the model instance
+ db_model: bool = (
+ False # used for proxy - to separate models which are stored in the db vs. config.
+ )
+ updated_at: Optional[datetime.datetime] = None
+ updated_by: Optional[str] = None
+
+ created_at: Optional[datetime.datetime] = None
+ created_by: Optional[str] = None
+
+ base_model: Optional[str] = (
+ None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking
+ )
+ tier: Optional[Literal["free", "paid"]] = None
+
+ """
+ Team Model Specific Fields
+ """
+ # the team id that this model belongs to
+ team_id: Optional[str] = None
+
+ # the model_name that can be used by the team when making LLM calls
+ team_public_model_name: Optional[str] = None
+
+ def __init__(self, id: Optional[Union[str, int]] = None, **params):
+ if id is None:
+ id = str(uuid.uuid4()) # Generate a UUID if id is None or not provided
+ elif isinstance(id, int):
+ id = str(id)
+ super().__init__(id=id, **params)
+
+ model_config = ConfigDict(extra="allow")
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class CredentialLiteLLMParams(BaseModel):
+ api_key: Optional[str] = None
+ api_base: Optional[str] = None
+ api_version: Optional[str] = None
+ ## VERTEX AI ##
+ vertex_project: Optional[str] = None
+ vertex_location: Optional[str] = None
+ vertex_credentials: Optional[Union[str, dict]] = None
+ ## UNIFIED PROJECT/REGION ##
+ region_name: Optional[str] = None
+
+ ## AWS BEDROCK / SAGEMAKER ##
+ aws_access_key_id: Optional[str] = None
+ aws_secret_access_key: Optional[str] = None
+ aws_region_name: Optional[str] = None
+ ## IBM WATSONX ##
+ watsonx_region_name: Optional[str] = None
+
+
+class GenericLiteLLMParams(CredentialLiteLLMParams):
+ """
+ LiteLLM Params without 'model' arg (used across completion / assistants api)
+ """
+
+ custom_llm_provider: Optional[str] = None
+ tpm: Optional[int] = None
+ rpm: Optional[int] = None
+ timeout: Optional[Union[float, str, httpx.Timeout]] = (
+ None # if str, pass in as os.environ/
+ )
+ stream_timeout: Optional[Union[float, str]] = (
+ None # timeout when making stream=True calls, if str, pass in as os.environ/
+ )
+ max_retries: Optional[int] = None
+ organization: Optional[str] = None # for openai orgs
+ configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
+
+ ## LOGGING PARAMS ##
+ litellm_trace_id: Optional[str] = None
+
+ ## CUSTOM PRICING ##
+ input_cost_per_token: Optional[float] = None
+ output_cost_per_token: Optional[float] = None
+ input_cost_per_second: Optional[float] = None
+ output_cost_per_second: Optional[float] = None
+
+ max_file_size_mb: Optional[float] = None
+
+ # Deployment budgets
+ max_budget: Optional[float] = None
+ budget_duration: Optional[str] = None
+ use_in_pass_through: Optional[bool] = False
+ model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+ merge_reasoning_content_in_choices: Optional[bool] = False
+ model_info: Optional[Dict] = None
+
+ def __init__(
+ self,
+ custom_llm_provider: Optional[str] = None,
+ max_retries: Optional[Union[int, str]] = None,
+ tpm: Optional[int] = None,
+ rpm: Optional[int] = None,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/
+ stream_timeout: Optional[Union[float, str]] = (
+ None # timeout when making stream=True calls, if str, pass in as os.environ/
+ ),
+ organization: Optional[str] = None, # for openai orgs
+ ## LOGGING PARAMS ##
+ litellm_trace_id: Optional[str] = None,
+ ## UNIFIED PROJECT/REGION ##
+ region_name: Optional[str] = None,
+ ## VERTEX AI ##
+ vertex_project: Optional[str] = None,
+ vertex_location: Optional[str] = None,
+ vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+ ## AWS BEDROCK / SAGEMAKER ##
+ aws_access_key_id: Optional[str] = None,
+ aws_secret_access_key: Optional[str] = None,
+ aws_region_name: Optional[str] = None,
+ ## IBM WATSONX ##
+ watsonx_region_name: Optional[str] = None,
+ input_cost_per_token: Optional[float] = None,
+ output_cost_per_token: Optional[float] = None,
+ input_cost_per_second: Optional[float] = None,
+ output_cost_per_second: Optional[float] = None,
+ max_file_size_mb: Optional[float] = None,
+ # Deployment budgets
+ max_budget: Optional[float] = None,
+ budget_duration: Optional[str] = None,
+ # Pass through params
+ use_in_pass_through: Optional[bool] = False,
+ # This will merge the reasoning content in the choices
+ merge_reasoning_content_in_choices: Optional[bool] = False,
+ model_info: Optional[Dict] = None,
+ **params,
+ ):
+ args = locals()
+ args.pop("max_retries", None)
+ args.pop("self", None)
+ args.pop("params", None)
+ args.pop("__class__", None)
+ if max_retries is not None and isinstance(max_retries, str):
+ max_retries = int(max_retries) # cast to int
+ # We need to keep max_retries in args since it's a parameter of GenericLiteLLMParams
+ args["max_retries"] = (
+ max_retries # Put max_retries back in args after popping it
+ )
+ super().__init__(**args, **params)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class LiteLLM_Params(GenericLiteLLMParams):
+ """
+ LiteLLM Params with 'model' requirement - used for completions
+ """
+
+ model: str
+ model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
+ def __init__(
+ self,
+ model: str,
+ custom_llm_provider: Optional[str] = None,
+ max_retries: Optional[Union[int, str]] = None,
+ tpm: Optional[int] = None,
+ rpm: Optional[int] = None,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/
+ stream_timeout: Optional[Union[float, str]] = (
+ None # timeout when making stream=True calls, if str, pass in as os.environ/
+ ),
+ organization: Optional[str] = None, # for openai orgs
+ ## VERTEX AI ##
+ vertex_project: Optional[str] = None,
+ vertex_location: Optional[str] = None,
+ ## AWS BEDROCK / SAGEMAKER ##
+ aws_access_key_id: Optional[str] = None,
+ aws_secret_access_key: Optional[str] = None,
+ aws_region_name: Optional[str] = None,
+ # OpenAI / Azure Whisper
+ # set a max-size of file that can be passed to litellm proxy
+ max_file_size_mb: Optional[float] = None,
+ # will use deployment on pass-through endpoints if True
+ use_in_pass_through: Optional[bool] = False,
+ **params,
+ ):
+ args = locals()
+ args.pop("max_retries", None)
+ args.pop("self", None)
+ args.pop("params", None)
+ args.pop("__class__", None)
+ if max_retries is not None and isinstance(max_retries, str):
+ max_retries = int(max_retries) # cast to int
+ super().__init__(max_retries=max_retries, **args, **params)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class updateLiteLLMParams(GenericLiteLLMParams):
+ # This class is used to update the LiteLLM_Params
+ # only differece is model is optional
+ model: Optional[str] = None
+
+
+class updateDeployment(BaseModel):
+ model_name: Optional[str] = None
+ litellm_params: Optional[updateLiteLLMParams] = None
+ model_info: Optional[ModelInfo] = None
+
+ model_config = ConfigDict(protected_namespaces=())
+
+
+class LiteLLMParamsTypedDict(TypedDict, total=False):
+ model: str
+ custom_llm_provider: Optional[str]
+ tpm: Optional[int]
+ rpm: Optional[int]
+ order: Optional[int]
+ weight: Optional[int]
+ max_parallel_requests: Optional[int]
+ api_key: Optional[str]
+ api_base: Optional[str]
+ api_version: Optional[str]
+ timeout: Optional[Union[float, str, httpx.Timeout]]
+ stream_timeout: Optional[Union[float, str]]
+ max_retries: Optional[int]
+ organization: Optional[Union[List, str]] # for openai orgs
+ configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS # for allowing api base switching on finetuned models
+ ## DROP PARAMS ##
+ drop_params: Optional[bool]
+ ## UNIFIED PROJECT/REGION ##
+ region_name: Optional[str]
+ ## VERTEX AI ##
+ vertex_project: Optional[str]
+ vertex_location: Optional[str]
+ ## AWS BEDROCK / SAGEMAKER ##
+ aws_access_key_id: Optional[str]
+ aws_secret_access_key: Optional[str]
+ aws_region_name: Optional[str]
+ ## IBM WATSONX ##
+ watsonx_region_name: Optional[str]
+ ## CUSTOM PRICING ##
+ input_cost_per_token: Optional[float]
+ output_cost_per_token: Optional[float]
+ input_cost_per_second: Optional[float]
+ output_cost_per_second: Optional[float]
+ num_retries: Optional[int]
+ ## MOCK RESPONSES ##
+ mock_response: Optional[Union[str, ModelResponse, Exception]]
+
+ # routing params
+ # use this for tag-based routing
+ tags: Optional[List[str]]
+
+ # deployment budgets
+ max_budget: Optional[float]
+ budget_duration: Optional[str]
+
+
+class DeploymentTypedDict(TypedDict, total=False):
+ model_name: Required[str]
+ litellm_params: Required[LiteLLMParamsTypedDict]
+ model_info: dict
+
+
+SPECIAL_MODEL_INFO_PARAMS = [
+ "input_cost_per_token",
+ "output_cost_per_token",
+ "input_cost_per_character",
+ "output_cost_per_character",
+]
+
+
+class Deployment(BaseModel):
+ model_name: str
+ litellm_params: LiteLLM_Params
+ model_info: ModelInfo
+
+ model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+ def __init__(
+ self,
+ model_name: str,
+ litellm_params: LiteLLM_Params,
+ model_info: Optional[Union[ModelInfo, dict]] = None,
+ **params,
+ ):
+ if model_info is None:
+ model_info = ModelInfo()
+ elif isinstance(model_info, dict):
+ model_info = ModelInfo(**model_info)
+
+ for (
+ key
+ ) in (
+ SPECIAL_MODEL_INFO_PARAMS
+ ): # ensures custom pricing info is consistently in 'model_info'
+ field = getattr(litellm_params, key, None)
+ if field is not None:
+ setattr(model_info, key, field)
+
+ super().__init__(
+ model_info=model_info,
+ model_name=model_name,
+ litellm_params=litellm_params,
+ **params,
+ )
+
+ def to_json(self, **kwargs):
+ try:
+ return self.model_dump(**kwargs) # noqa
+ except Exception as e:
+ # if using pydantic v1
+ return self.dict(**kwargs)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class RouterErrors(enum.Enum):
+ """
+ Enum for router specific errors with common codes
+ """
+
+ user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
+ no_deployments_available = "No deployments available for selected model"
+ no_deployments_with_tag_routing = (
+ "Not allowed to access model due to tags configuration"
+ )
+ no_deployments_with_provider_budget_routing = (
+ "No deployments available - crossed budget"
+ )
+
+
+class AllowedFailsPolicy(BaseModel):
+ """
+ Use this to set a custom number of allowed fails/minute before cooling down a deployment
+ If `AuthenticationErrorAllowedFails = 1000`, then 1000 AuthenticationError will be allowed before cooling down a deployment
+
+ Mapping of Exception type to allowed_fails for each exception
+ https://docs.litellm.ai/docs/exception_mapping
+ """
+
+ BadRequestErrorAllowedFails: Optional[int] = None
+ AuthenticationErrorAllowedFails: Optional[int] = None
+ TimeoutErrorAllowedFails: Optional[int] = None
+ RateLimitErrorAllowedFails: Optional[int] = None
+ ContentPolicyViolationErrorAllowedFails: Optional[int] = None
+ InternalServerErrorAllowedFails: Optional[int] = None
+
+
+class RetryPolicy(BaseModel):
+ """
+ Use this to set a custom number of retries per exception type
+ If RateLimitErrorRetries = 3, then 3 retries will be made for RateLimitError
+ Mapping of Exception type to number of retries
+ https://docs.litellm.ai/docs/exception_mapping
+ """
+
+ BadRequestErrorRetries: Optional[int] = None
+ AuthenticationErrorRetries: Optional[int] = None
+ TimeoutErrorRetries: Optional[int] = None
+ RateLimitErrorRetries: Optional[int] = None
+ ContentPolicyViolationErrorRetries: Optional[int] = None
+ InternalServerErrorRetries: Optional[int] = None
+
+
+class AlertingConfig(BaseModel):
+ """
+ Use this configure alerting for the router. Receive alerts on the following events
+ - LLM API Exceptions
+ - LLM Responses Too Slow
+ - LLM Requests Hanging
+
+ Args:
+ webhook_url: str - webhook url for alerting, slack provides a webhook url to send alerts to
+ alerting_threshold: Optional[float] = None - threshold for slow / hanging llm responses (in seconds)
+ """
+
+ webhook_url: str
+ alerting_threshold: Optional[float] = 300
+
+
+class ModelGroupInfo(BaseModel):
+ model_group: str
+ providers: List[str]
+ max_input_tokens: Optional[float] = None
+ max_output_tokens: Optional[float] = None
+ input_cost_per_token: Optional[float] = None
+ output_cost_per_token: Optional[float] = None
+ mode: Optional[
+ Union[
+ str,
+ Literal[
+ "chat",
+ "embedding",
+ "completion",
+ "image_generation",
+ "audio_transcription",
+ "rerank",
+ "moderations",
+ ],
+ ]
+ ] = Field(default="chat")
+ tpm: Optional[int] = None
+ rpm: Optional[int] = None
+ supports_parallel_function_calling: bool = Field(default=False)
+ supports_vision: bool = Field(default=False)
+ supports_function_calling: bool = Field(default=False)
+ supported_openai_params: Optional[List[str]] = Field(default=[])
+ configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None
+
+ def __init__(self, **data):
+ for field_name, field_type in get_type_hints(self.__class__).items():
+ if field_type == bool and data.get(field_name) is None:
+ data[field_name] = False
+ super().__init__(**data)
+
+
+class AssistantsTypedDict(TypedDict):
+ custom_llm_provider: Literal["azure", "openai"]
+ litellm_params: LiteLLMParamsTypedDict
+
+
+class FineTuningConfig(BaseModel):
+
+ custom_llm_provider: Literal["azure", "openai"]
+
+
+class CustomRoutingStrategyBase:
+ async def async_get_available_deployment(
+ self,
+ model: str,
+ messages: Optional[List[Dict[str, str]]] = None,
+ input: Optional[Union[str, List]] = None,
+ specific_deployment: Optional[bool] = False,
+ request_kwargs: Optional[Dict] = None,
+ ):
+ """
+ Asynchronously retrieves the available deployment based on the given parameters.
+
+ Args:
+ model (str): The name of the model.
+ messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+ input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+ specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+ request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+ Returns:
+ Returns an element from litellm.router.model_list
+
+ """
+ pass
+
+ def get_available_deployment(
+ self,
+ model: str,
+ messages: Optional[List[Dict[str, str]]] = None,
+ input: Optional[Union[str, List]] = None,
+ specific_deployment: Optional[bool] = False,
+ request_kwargs: Optional[Dict] = None,
+ ):
+ """
+ Synchronously retrieves the available deployment based on the given parameters.
+
+ Args:
+ model (str): The name of the model.
+ messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+ input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+ specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+ request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+ Returns:
+ Returns an element from litellm.router.model_list
+
+ """
+ pass
+
+
+class RouterGeneralSettings(BaseModel):
+ async_only_mode: bool = Field(
+ default=False
+ ) # this will only initialize async clients. Good for memory utils
+ pass_through_all_models: bool = Field(
+ default=False
+ ) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
+
+
+class RouterRateLimitErrorBasic(ValueError):
+ """
+ Raise a basic error inside helper functions.
+ """
+
+ def __init__(
+ self,
+ model: str,
+ ):
+ self.model = model
+ _message = f"{RouterErrors.no_deployments_available.value}."
+ super().__init__(_message)
+
+
+class RouterRateLimitError(ValueError):
+ def __init__(
+ self,
+ model: str,
+ cooldown_time: float,
+ enable_pre_call_checks: bool,
+ cooldown_list: List,
+ ):
+ self.model = model
+ self.cooldown_time = cooldown_time
+ self.enable_pre_call_checks = enable_pre_call_checks
+ self.cooldown_list = cooldown_list
+ _message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}"
+ super().__init__(_message)
+
+
+class RouterModelGroupAliasItem(TypedDict):
+ model: str
+ hidden: bool # if 'True', don't return on `.get_model_list`
+
+
+VALID_LITELLM_ENVIRONMENTS = [
+ "development",
+ "staging",
+ "production",
+]
+
+
+class RoutingStrategy(enum.Enum):
+ LEAST_BUSY = "least-busy"
+ LATENCY_BASED = "latency-based-routing"
+ COST_BASED = "cost-based-routing"
+ USAGE_BASED_ROUTING_V2 = "usage-based-routing-v2"
+ USAGE_BASED_ROUTING = "usage-based-routing"
+ PROVIDER_BUDGET_LIMITING = "provider-budget-routing"
+
+
+class RouterCacheEnum(enum.Enum):
+ TPM = "global_router:{id}:{model}:tpm:{current_minute}"
+ RPM = "global_router:{id}:{model}:rpm:{current_minute}"
+
+
+class GenericBudgetWindowDetails(BaseModel):
+ """Details about a provider's budget window"""
+
+ budget_start: float
+ spend_key: str
+ start_time_key: str
+ ttl_seconds: int
+
+
+OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]]
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/services.py b/.venv/lib/python3.12/site-packages/litellm/types/services.py
new file mode 100644
index 00000000..3eb283db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/services.py
@@ -0,0 +1,39 @@
+import enum
+import uuid
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ServiceTypes(str, enum.Enum):
+ """
+ Enum for litellm + litellm-adjacent services (redis/postgres/etc.)
+ """
+
+ REDIS = "redis"
+ DB = "postgres"
+ BATCH_WRITE_TO_DB = "batch_write_to_db"
+ RESET_BUDGET_JOB = "reset_budget_job"
+ LITELLM = "self"
+ ROUTER = "router"
+ AUTH = "auth"
+ PROXY_PRE_CALL = "proxy_pre_call"
+
+
+class ServiceLoggerPayload(BaseModel):
+ """
+ The payload logged during service success/failure
+ """
+
+ is_error: bool = Field(description="did an error occur")
+ error: Optional[str] = Field(None, description="what was the error")
+ service: ServiceTypes = Field(description="who is this for? - postgres/redis")
+ duration: float = Field(description="How long did the request take?")
+ call_type: str = Field(description="The call of the service, being made")
+
+ def to_json(self, **kwargs):
+ try:
+ return self.model_dump(**kwargs) # noqa
+ except Exception as e:
+ # if using pydantic v1
+ return self.dict(**kwargs)
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/utils.py b/.venv/lib/python3.12/site-packages/litellm/types/utils.py
new file mode 100644
index 00000000..a6654285
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/types/utils.py
@@ -0,0 +1,2081 @@
+import json
+import time
+import uuid
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+from aiohttp import FormData
+from openai._models import BaseModel as OpenAIObject
+from openai.types.audio.transcription_create_params import FileTypes # type: ignore
+from openai.types.completion_usage import (
+ CompletionTokensDetails,
+ CompletionUsage,
+ PromptTokensDetails,
+)
+from openai.types.moderation import (
+ Categories,
+ CategoryAppliedInputTypes,
+ CategoryScores,
+)
+from openai.types.moderation_create_response import Moderation, ModerationCreateResponse
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
+from typing_extensions import Callable, Dict, Required, TypedDict, override
+
+import litellm
+
+from ..litellm_core_utils.core_helpers import map_finish_reason
+from .guardrails import GuardrailEventHooks
+from .llms.openai import (
+ Batch,
+ ChatCompletionThinkingBlock,
+ ChatCompletionToolCallChunk,
+ ChatCompletionUsageBlock,
+ OpenAIChatCompletionChunk,
+)
+from .rerank import RerankResponse
+
+
+def _generate_id(): # private helper function
+ return "chatcmpl-" + str(uuid.uuid4())
+
+
+class LiteLLMPydanticObjectBase(BaseModel):
+ """
+ Implements default functions, all pydantic objects should have.
+ """
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump(**kwargs) # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict(**kwargs)
+
+ def fields_set(self):
+ try:
+ return self.model_fields_set # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.__fields_set__
+
+ model_config = ConfigDict(protected_namespaces=())
+
+
+class LiteLLMCommonStrings(Enum):
+ redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'"
+ llm_provider_not_provided = "Unmapped LLM provider for this endpoint. You passed model={model}, custom_llm_provider={custom_llm_provider}. Check supported provider and route: https://docs.litellm.ai/docs/providers"
+
+
+SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"]
+
+
+class CostPerToken(TypedDict):
+ input_cost_per_token: float
+ output_cost_per_token: float
+
+
+class ProviderField(TypedDict):
+ field_name: str
+ field_type: Literal["string"]
+ field_description: str
+ field_value: str
+
+
+class ProviderSpecificModelInfo(TypedDict, total=False):
+ supports_system_messages: Optional[bool]
+ supports_response_schema: Optional[bool]
+ supports_vision: Optional[bool]
+ supports_function_calling: Optional[bool]
+ supports_tool_choice: Optional[bool]
+ supports_assistant_prefill: Optional[bool]
+ supports_prompt_caching: Optional[bool]
+ supports_audio_input: Optional[bool]
+ supports_embedding_image_input: Optional[bool]
+ supports_audio_output: Optional[bool]
+ supports_pdf_input: Optional[bool]
+ supports_native_streaming: Optional[bool]
+ supports_parallel_function_calling: Optional[bool]
+
+
+class ModelInfoBase(ProviderSpecificModelInfo, total=False):
+ key: Required[str] # the key in litellm.model_cost which is returned
+
+ max_tokens: Required[Optional[int]]
+ max_input_tokens: Required[Optional[int]]
+ max_output_tokens: Required[Optional[int]]
+ input_cost_per_token: Required[float]
+ cache_creation_input_token_cost: Optional[float]
+ cache_read_input_token_cost: Optional[float]
+ input_cost_per_character: Optional[float] # only for vertex ai models
+ input_cost_per_audio_token: Optional[float]
+ input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
+ input_cost_per_character_above_128k_tokens: Optional[
+ float
+ ] # only for vertex ai models
+ input_cost_per_query: Optional[float] # only for rerank models
+ input_cost_per_image: Optional[float] # only for vertex ai models
+ input_cost_per_audio_per_second: Optional[float] # only for vertex ai models
+ input_cost_per_video_per_second: Optional[float] # only for vertex ai models
+ input_cost_per_second: Optional[float] # for OpenAI Speech models
+ input_cost_per_token_batches: Optional[float]
+ output_cost_per_token_batches: Optional[float]
+ output_cost_per_token: Required[float]
+ output_cost_per_character: Optional[float] # only for vertex ai models
+ output_cost_per_audio_token: Optional[float]
+ output_cost_per_token_above_128k_tokens: Optional[
+ float
+ ] # only for vertex ai models
+ output_cost_per_character_above_128k_tokens: Optional[
+ float
+ ] # only for vertex ai models
+ output_cost_per_image: Optional[float]
+ output_vector_size: Optional[int]
+ output_cost_per_video_per_second: Optional[float] # only for vertex ai models
+ output_cost_per_audio_per_second: Optional[float] # only for vertex ai models
+ output_cost_per_second: Optional[float] # for OpenAI Speech models
+
+ litellm_provider: Required[str]
+ mode: Required[
+ Literal[
+ "completion", "embedding", "image_generation", "chat", "audio_transcription"
+ ]
+ ]
+ tpm: Optional[int]
+ rpm: Optional[int]
+
+
+class ModelInfo(ModelInfoBase, total=False):
+ """
+ Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
+ """
+
+ supported_openai_params: Required[Optional[List[str]]]
+
+
+class GenericStreamingChunk(TypedDict, total=False):
+ text: Required[str]
+ tool_use: Optional[ChatCompletionToolCallChunk]
+ is_finished: Required[bool]
+ finish_reason: Required[str]
+ usage: Required[Optional[ChatCompletionUsageBlock]]
+ index: int
+
+ # use this dict if you want to return any provider specific fields in the response
+ provider_specific_fields: Optional[Dict[str, Any]]
+
+
+from enum import Enum
+
+
+class CallTypes(Enum):
+ embedding = "embedding"
+ aembedding = "aembedding"
+ completion = "completion"
+ acompletion = "acompletion"
+ atext_completion = "atext_completion"
+ text_completion = "text_completion"
+ image_generation = "image_generation"
+ aimage_generation = "aimage_generation"
+ moderation = "moderation"
+ amoderation = "amoderation"
+ atranscription = "atranscription"
+ transcription = "transcription"
+ aspeech = "aspeech"
+ speech = "speech"
+ rerank = "rerank"
+ arerank = "arerank"
+ arealtime = "_arealtime"
+ create_batch = "create_batch"
+ acreate_batch = "acreate_batch"
+ aretrieve_batch = "aretrieve_batch"
+ retrieve_batch = "retrieve_batch"
+ pass_through = "pass_through_endpoint"
+ anthropic_messages = "anthropic_messages"
+ get_assistants = "get_assistants"
+ aget_assistants = "aget_assistants"
+ create_assistants = "create_assistants"
+ acreate_assistants = "acreate_assistants"
+ delete_assistant = "delete_assistant"
+ adelete_assistant = "adelete_assistant"
+ acreate_thread = "acreate_thread"
+ create_thread = "create_thread"
+ aget_thread = "aget_thread"
+ get_thread = "get_thread"
+ a_add_message = "a_add_message"
+ add_message = "add_message"
+ aget_messages = "aget_messages"
+ get_messages = "get_messages"
+ arun_thread = "arun_thread"
+ run_thread = "run_thread"
+ arun_thread_stream = "arun_thread_stream"
+ run_thread_stream = "run_thread_stream"
+ afile_retrieve = "afile_retrieve"
+ file_retrieve = "file_retrieve"
+ afile_delete = "afile_delete"
+ file_delete = "file_delete"
+ afile_list = "afile_list"
+ file_list = "file_list"
+ acreate_file = "acreate_file"
+ create_file = "create_file"
+ afile_content = "afile_content"
+ file_content = "file_content"
+ create_fine_tuning_job = "create_fine_tuning_job"
+ acreate_fine_tuning_job = "acreate_fine_tuning_job"
+ acancel_fine_tuning_job = "acancel_fine_tuning_job"
+ cancel_fine_tuning_job = "cancel_fine_tuning_job"
+ alist_fine_tuning_jobs = "alist_fine_tuning_jobs"
+ list_fine_tuning_jobs = "list_fine_tuning_jobs"
+ aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
+ retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
+ responses = "responses"
+ aresponses = "aresponses"
+
+
+CallTypesLiteral = Literal[
+ "embedding",
+ "aembedding",
+ "completion",
+ "acompletion",
+ "atext_completion",
+ "text_completion",
+ "image_generation",
+ "aimage_generation",
+ "moderation",
+ "amoderation",
+ "atranscription",
+ "transcription",
+ "aspeech",
+ "speech",
+ "rerank",
+ "arerank",
+ "_arealtime",
+ "create_batch",
+ "acreate_batch",
+ "pass_through_endpoint",
+ "anthropic_messages",
+ "aretrieve_batch",
+ "retrieve_batch",
+]
+
+
+class PassthroughCallTypes(Enum):
+ passthrough_image_generation = "passthrough-image-generation"
+
+
+class TopLogprob(OpenAIObject):
+ token: str
+ """The token."""
+
+ bytes: Optional[List[int]] = None
+ """A list of integers representing the UTF-8 bytes representation of the token.
+
+ Useful in instances where characters are represented by multiple tokens and
+ their byte representations must be combined to generate the correct text
+ representation. Can be `null` if there is no bytes representation for the token.
+ """
+
+ logprob: float
+ """The log probability of this token, if it is within the top 20 most likely
+ tokens.
+
+ Otherwise, the value `-9999.0` is used to signify that the token is very
+ unlikely.
+ """
+
+
+class ChatCompletionTokenLogprob(OpenAIObject):
+ token: str
+ """The token."""
+
+ bytes: Optional[List[int]] = None
+ """A list of integers representing the UTF-8 bytes representation of the token.
+
+ Useful in instances where characters are represented by multiple tokens and
+ their byte representations must be combined to generate the correct text
+ representation. Can be `null` if there is no bytes representation for the token.
+ """
+
+ logprob: float
+ """The log probability of this token, if it is within the top 20 most likely
+ tokens.
+
+ Otherwise, the value `-9999.0` is used to signify that the token is very
+ unlikely.
+ """
+
+ top_logprobs: List[TopLogprob]
+ """List of the most likely tokens and their log probability, at this token
+ position.
+
+ In rare cases, there may be fewer than the number of requested `top_logprobs`
+ returned.
+ """
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+
+class ChoiceLogprobs(OpenAIObject):
+ content: Optional[List[ChatCompletionTokenLogprob]] = None
+ """A list of message content tokens with log probability information."""
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+
+class FunctionCall(OpenAIObject):
+ arguments: str
+ name: Optional[str] = None
+
+
+class Function(OpenAIObject):
+ arguments: str
+ name: Optional[
+ str
+ ] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)
+
+ def __init__(
+ self,
+ arguments: Optional[Union[Dict, str]],
+ name: Optional[str] = None,
+ **params,
+ ):
+ if arguments is None:
+ arguments = ""
+ elif isinstance(arguments, Dict):
+ arguments = json.dumps(arguments)
+ else:
+ arguments = arguments
+
+ name = name
+
+ # Build a dictionary with the structure your BaseModel expects
+ data = {"arguments": arguments, "name": name, **params}
+
+ super(Function, self).__init__(**data)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class ChatCompletionDeltaToolCall(OpenAIObject):
+ id: Optional[str] = None
+ function: Function
+ type: Optional[str] = None
+ index: int
+
+
+class HiddenParams(OpenAIObject):
+ original_response: Optional[Union[str, Any]] = None
+ model_id: Optional[str] = None # used in Router for individual deployments
+ api_base: Optional[str] = None # returns api base used for making completion call
+
+ model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class ChatCompletionMessageToolCall(OpenAIObject):
+ def __init__(
+ self,
+ function: Union[Dict, Function],
+ id: Optional[str] = None,
+ type: Optional[str] = None,
+ **params,
+ ):
+ super(ChatCompletionMessageToolCall, self).__init__(**params)
+ if isinstance(function, Dict):
+ self.function = Function(**function)
+ else:
+ self.function = function
+
+ if id is not None:
+ self.id = id
+ else:
+ self.id = f"{uuid.uuid4()}"
+
+ if type is not None:
+ self.type = type
+ else:
+ self.type = "function"
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+from openai.types.chat.chat_completion_audio import ChatCompletionAudio
+
+
+class ChatCompletionAudioResponse(ChatCompletionAudio):
+
+ def __init__(
+ self,
+ data: str,
+ expires_at: int,
+ transcript: str,
+ id: Optional[str] = None,
+ **params,
+ ):
+ if id is not None:
+ id = id
+ else:
+ id = f"{uuid.uuid4()}"
+ super(ChatCompletionAudioResponse, self).__init__(
+ data=data, expires_at=expires_at, transcript=transcript, id=id, **params
+ )
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+"""
+Reference:
+ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
+"""
+
+
+def add_provider_specific_fields(
+ object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]]
+):
+ if not provider_specific_fields: # set if provider_specific_fields is not empty
+ return
+ setattr(object, "provider_specific_fields", provider_specific_fields)
+
+
+class Message(OpenAIObject):
+ content: Optional[str]
+ role: Literal["assistant", "user", "system", "tool", "function"]
+ tool_calls: Optional[List[ChatCompletionMessageToolCall]]
+ function_call: Optional[FunctionCall]
+ audio: Optional[ChatCompletionAudioResponse] = None
+ reasoning_content: Optional[str] = None
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+ provider_specific_fields: Optional[Dict[str, Any]] = Field(
+ default=None, exclude=True
+ )
+
+ def __init__(
+ self,
+ content: Optional[str] = None,
+ role: Literal["assistant"] = "assistant",
+ function_call=None,
+ tool_calls: Optional[list] = None,
+ audio: Optional[ChatCompletionAudioResponse] = None,
+ provider_specific_fields: Optional[Dict[str, Any]] = None,
+ reasoning_content: Optional[str] = None,
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+ **params,
+ ):
+ init_values: Dict[str, Any] = {
+ "content": content,
+ "role": role or "assistant", # handle null input
+ "function_call": (
+ FunctionCall(**function_call) if function_call is not None else None
+ ),
+ "tool_calls": (
+ [
+ (
+ ChatCompletionMessageToolCall(**tool_call)
+ if isinstance(tool_call, dict)
+ else tool_call
+ )
+ for tool_call in tool_calls
+ ]
+ if tool_calls is not None and len(tool_calls) > 0
+ else None
+ ),
+ }
+
+ if audio is not None:
+ init_values["audio"] = audio
+
+ if thinking_blocks is not None:
+ init_values["thinking_blocks"] = thinking_blocks
+
+ if reasoning_content is not None:
+ init_values["reasoning_content"] = reasoning_content
+
+ super(Message, self).__init__(
+ **init_values, # type: ignore
+ **params,
+ )
+
+ if audio is None:
+ # delete audio from self
+ # OpenAI compatible APIs like mistral API will raise an error if audio is passed in
+ del self.audio
+
+ if reasoning_content is None:
+ # ensure default response matches OpenAI spec
+ del self.reasoning_content
+
+ if thinking_blocks is None:
+ # ensure default response matches OpenAI spec
+ del self.thinking_blocks
+
+ add_provider_specific_fields(self, provider_specific_fields)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Delta(OpenAIObject):
+ reasoning_content: Optional[str] = None
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+ provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
+
+ def __init__(
+ self,
+ content=None,
+ role=None,
+ function_call=None,
+ tool_calls=None,
+ audio: Optional[ChatCompletionAudioResponse] = None,
+ reasoning_content: Optional[str] = None,
+ thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+ **params,
+ ):
+ super(Delta, self).__init__(**params)
+ add_provider_specific_fields(self, params.get("provider_specific_fields", {}))
+ self.content = content
+ self.role = role
+ # Set default values and correct types
+ self.function_call: Optional[Union[FunctionCall, Any]] = None
+ self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
+ self.audio: Optional[ChatCompletionAudioResponse] = None
+
+ if reasoning_content is not None:
+ self.reasoning_content = reasoning_content
+ else:
+ # ensure default response matches OpenAI spec
+ del self.reasoning_content
+
+ if thinking_blocks is not None:
+ self.thinking_blocks = thinking_blocks
+ else:
+ # ensure default response matches OpenAI spec
+ del self.thinking_blocks
+
+ if function_call is not None and isinstance(function_call, dict):
+ self.function_call = FunctionCall(**function_call)
+ else:
+ self.function_call = function_call
+ if tool_calls is not None and isinstance(tool_calls, list):
+ self.tool_calls = []
+ for tool_call in tool_calls:
+ if isinstance(tool_call, dict):
+ if tool_call.get("index", None) is None:
+ tool_call["index"] = 0
+ self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+ elif isinstance(tool_call, ChatCompletionDeltaToolCall):
+ self.tool_calls.append(tool_call)
+ else:
+ self.tool_calls = tool_calls
+
+ self.audio = audio
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class Choices(OpenAIObject):
+ def __init__(
+ self,
+ finish_reason=None,
+ index=0,
+ message: Optional[Union[Message, dict]] = None,
+ logprobs=None,
+ enhancements=None,
+ **params,
+ ):
+ super(Choices, self).__init__(**params)
+ if finish_reason is not None:
+ self.finish_reason = map_finish_reason(
+ finish_reason
+ ) # set finish_reason for all responses
+ else:
+ self.finish_reason = "stop"
+ self.index = index
+ if message is None:
+ self.message = Message()
+ else:
+ if isinstance(message, Message):
+ self.message = message
+ elif isinstance(message, dict):
+ self.message = Message(**message)
+ if logprobs is not None:
+ if isinstance(logprobs, dict):
+ self.logprobs = ChoiceLogprobs(**logprobs)
+ else:
+ self.logprobs = logprobs
+ if enhancements is not None:
+ self.enhancements = enhancements
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class CompletionTokensDetailsWrapper(
+ CompletionTokensDetails
+): # wrapper for older openai versions
+ text_tokens: Optional[int] = None
+ """Text tokens generated by the model."""
+
+
+class PromptTokensDetailsWrapper(
+ PromptTokensDetails
+): # wrapper for older openai versions
+ text_tokens: Optional[int] = None
+ """Text tokens sent to the model."""
+
+ image_tokens: Optional[int] = None
+ """Image tokens sent to the model."""
+
+
+class Usage(CompletionUsage):
+ _cache_creation_input_tokens: int = PrivateAttr(
+ 0
+ ) # hidden param for prompt caching. Might change, once openai introduces their equivalent.
+ _cache_read_input_tokens: int = PrivateAttr(
+ 0
+ ) # hidden param for prompt caching. Might change, once openai introduces their equivalent.
+
+ def __init__(
+ self,
+ prompt_tokens: Optional[int] = None,
+ completion_tokens: Optional[int] = None,
+ total_tokens: Optional[int] = None,
+ reasoning_tokens: Optional[int] = None,
+ prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None,
+ completion_tokens_details: Optional[
+ Union[CompletionTokensDetailsWrapper, dict]
+ ] = None,
+ **params,
+ ):
+ # handle reasoning_tokens
+ _completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None
+ if reasoning_tokens:
+ completion_tokens_details = CompletionTokensDetailsWrapper(
+ reasoning_tokens=reasoning_tokens
+ )
+
+ # Ensure completion_tokens_details is properly handled
+ if completion_tokens_details:
+ if isinstance(completion_tokens_details, dict):
+ _completion_tokens_details = CompletionTokensDetailsWrapper(
+ **completion_tokens_details
+ )
+ elif isinstance(completion_tokens_details, CompletionTokensDetails):
+ _completion_tokens_details = completion_tokens_details
+
+ ## DEEPSEEK MAPPING ##
+ if "prompt_cache_hit_tokens" in params and isinstance(
+ params["prompt_cache_hit_tokens"], int
+ ):
+ if prompt_tokens_details is None:
+ prompt_tokens_details = PromptTokensDetailsWrapper(
+ cached_tokens=params["prompt_cache_hit_tokens"]
+ )
+
+ ## ANTHROPIC MAPPING ##
+ if "cache_read_input_tokens" in params and isinstance(
+ params["cache_read_input_tokens"], int
+ ):
+ if prompt_tokens_details is None:
+ prompt_tokens_details = PromptTokensDetailsWrapper(
+ cached_tokens=params["cache_read_input_tokens"]
+ )
+
+ # handle prompt_tokens_details
+ _prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
+ if prompt_tokens_details:
+ if isinstance(prompt_tokens_details, dict):
+ _prompt_tokens_details = PromptTokensDetailsWrapper(
+ **prompt_tokens_details
+ )
+ elif isinstance(prompt_tokens_details, PromptTokensDetails):
+ _prompt_tokens_details = prompt_tokens_details
+
+ super().__init__(
+ prompt_tokens=prompt_tokens or 0,
+ completion_tokens=completion_tokens or 0,
+ total_tokens=total_tokens or 0,
+ completion_tokens_details=_completion_tokens_details or None,
+ prompt_tokens_details=_prompt_tokens_details or None,
+ )
+
+ ## ANTHROPIC MAPPING ##
+ if "cache_creation_input_tokens" in params and isinstance(
+ params["cache_creation_input_tokens"], int
+ ):
+ self._cache_creation_input_tokens = params["cache_creation_input_tokens"]
+
+ if "cache_read_input_tokens" in params and isinstance(
+ params["cache_read_input_tokens"], int
+ ):
+ self._cache_read_input_tokens = params["cache_read_input_tokens"]
+
+ ## DEEPSEEK MAPPING ##
+ if "prompt_cache_hit_tokens" in params and isinstance(
+ params["prompt_cache_hit_tokens"], int
+ ):
+ self._cache_read_input_tokens = params["prompt_cache_hit_tokens"]
+
+ for k, v in params.items():
+ setattr(self, k, v)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class StreamingChoices(OpenAIObject):
+ def __init__(
+ self,
+ finish_reason=None,
+ index=0,
+ delta: Optional[Delta] = None,
+ logprobs=None,
+ enhancements=None,
+ **params,
+ ):
+ # Fix Perplexity return both delta and message cause OpenWebUI repect text
+ # https://github.com/BerriAI/litellm/issues/8455
+ params.pop("message", None)
+ super(StreamingChoices, self).__init__(**params)
+ if finish_reason:
+ self.finish_reason = map_finish_reason(finish_reason)
+ else:
+ self.finish_reason = None
+ self.index = index
+ if delta is not None:
+
+ if isinstance(delta, Delta):
+ self.delta = delta
+ elif isinstance(delta, dict):
+ self.delta = Delta(**delta)
+ else:
+ self.delta = Delta()
+ if enhancements is not None:
+ self.enhancements = enhancements
+
+ if logprobs is not None and isinstance(logprobs, dict):
+ self.logprobs = ChoiceLogprobs(**logprobs)
+ else:
+ self.logprobs = logprobs # type: ignore
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class StreamingChatCompletionChunk(OpenAIChatCompletionChunk):
+ def __init__(self, **kwargs):
+
+ new_choices = []
+ for choice in kwargs["choices"]:
+ new_choice = StreamingChoices(**choice).model_dump()
+ new_choices.append(new_choice)
+ kwargs["choices"] = new_choices
+
+ super().__init__(**kwargs)
+
+
+from openai.types.chat import ChatCompletionChunk
+
+
+class ModelResponseBase(OpenAIObject):
+ id: str
+ """A unique identifier for the completion."""
+
+ created: int
+ """The Unix timestamp (in seconds) of when the completion was created."""
+
+ model: Optional[str] = None
+ """The model used for completion."""
+
+ object: str
+ """The object type, which is always "text_completion" """
+
+ system_fingerprint: Optional[str] = None
+ """This fingerprint represents the backend configuration that the model runs with.
+
+ Can be used in conjunction with the `seed` request parameter to understand when
+ backend changes have been made that might impact determinism.
+ """
+
+ _hidden_params: dict = {}
+
+ _response_headers: Optional[dict] = None
+
+
+class ModelResponseStream(ModelResponseBase):
+ choices: List[StreamingChoices]
+ provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
+
+ def __init__(
+ self,
+ choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None,
+ id: Optional[str] = None,
+ created: Optional[int] = None,
+ provider_specific_fields: Optional[Dict[str, Any]] = None,
+ **kwargs,
+ ):
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ _new_choice = None
+ if isinstance(choice, StreamingChoices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = StreamingChoices(**choice)
+ elif isinstance(choice, BaseModel):
+ _new_choice = StreamingChoices(**choice.model_dump())
+ new_choices.append(_new_choice)
+ kwargs["choices"] = new_choices
+ else:
+ kwargs["choices"] = [StreamingChoices()]
+
+ if id is None:
+ id = _generate_id()
+ else:
+ id = id
+ if created is None:
+ created = int(time.time())
+ else:
+ created = created
+
+ if (
+ "usage" in kwargs
+ and kwargs["usage"] is not None
+ and isinstance(kwargs["usage"], dict)
+ ):
+ kwargs["usage"] = Usage(**kwargs["usage"])
+
+ kwargs["id"] = id
+ kwargs["created"] = created
+ kwargs["object"] = "chat.completion.chunk"
+ kwargs["provider_specific_fields"] = provider_specific_fields
+
+ super().__init__(**kwargs)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class ModelResponse(ModelResponseBase):
+ choices: List[Union[Choices, StreamingChoices]]
+ """The list of completion choices the model generated for the input prompt."""
+
+ def __init__(
+ self,
+ id=None,
+ choices=None,
+ created=None,
+ model=None,
+ object=None,
+ system_fingerprint=None,
+ usage=None,
+ stream=None,
+ stream_options=None,
+ response_ms=None,
+ hidden_params=None,
+ _response_headers=None,
+ **params,
+ ) -> None:
+ if stream is not None and stream is True:
+ object = "chat.completion.chunk"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ _new_choice = None
+ if isinstance(choice, StreamingChoices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = StreamingChoices(**choice)
+ elif isinstance(choice, BaseModel):
+ _new_choice = StreamingChoices(**choice.model_dump())
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [StreamingChoices()]
+ else:
+ object = "chat.completion"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ if isinstance(choice, Choices):
+ _new_choice = choice # type: ignore
+ elif isinstance(choice, dict):
+ _new_choice = Choices(**choice) # type: ignore
+ else:
+ _new_choice = choice
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [Choices()]
+ if id is None:
+ id = _generate_id()
+ else:
+ id = id
+ if created is None:
+ created = int(time.time())
+ else:
+ created = created
+ model = model
+ if usage is not None:
+ if isinstance(usage, dict):
+ usage = Usage(**usage)
+ else:
+ usage = usage
+ elif stream is None or stream is False:
+ usage = Usage()
+ if hidden_params:
+ self._hidden_params = hidden_params
+
+ if _response_headers:
+ self._response_headers = _response_headers
+
+ init_values = {
+ "id": id,
+ "choices": choices,
+ "created": created,
+ "model": model,
+ "object": object,
+ "system_fingerprint": system_fingerprint,
+ }
+
+ if usage is not None:
+ init_values["usage"] = usage
+
+ super().__init__(
+ **init_values,
+ **params,
+ )
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Embedding(OpenAIObject):
+ embedding: Union[list, str] = []
+ index: int
+ object: Literal["embedding"]
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class EmbeddingResponse(OpenAIObject):
+ model: Optional[str] = None
+ """The model used for embedding."""
+
+ data: List
+ """The actual embedding value"""
+
+ object: Literal["list"]
+ """The object type, which is always "list" """
+
+ usage: Optional[Usage] = None
+ """Usage statistics for the embedding request."""
+
+ _hidden_params: dict = {}
+ _response_headers: Optional[Dict] = None
+ _response_ms: Optional[float] = None
+
+ def __init__(
+ self,
+ model: Optional[str] = None,
+ usage: Optional[Usage] = None,
+ response_ms=None,
+ data: Optional[Union[List, List[Embedding]]] = None,
+ hidden_params=None,
+ _response_headers=None,
+ **params,
+ ):
+ object = "list"
+ if response_ms:
+ _response_ms = response_ms
+ else:
+ _response_ms = None
+ if data:
+ data = data
+ else:
+ data = []
+
+ if usage:
+ usage = usage
+ else:
+ usage = Usage()
+
+ if _response_headers:
+ self._response_headers = _response_headers
+
+ model = model
+ super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Logprobs(OpenAIObject):
+ text_offset: Optional[List[int]]
+ token_logprobs: Optional[List[Union[float, None]]]
+ tokens: Optional[List[str]]
+ top_logprobs: Optional[List[Union[Dict[str, float], None]]]
+
+
+class TextChoices(OpenAIObject):
+ def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
+ super(TextChoices, self).__init__(**params)
+ if finish_reason:
+ self.finish_reason = map_finish_reason(finish_reason)
+ else:
+ self.finish_reason = None
+ self.index = index
+ if text is not None:
+ self.text = text
+ else:
+ self.text = None
+ if logprobs is None:
+ self.logprobs = None
+ else:
+ if isinstance(logprobs, dict):
+ self.logprobs = Logprobs(**logprobs)
+ else:
+ self.logprobs = logprobs
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class TextCompletionResponse(OpenAIObject):
+ """
+ {
+ "id": response["id"],
+ "object": "text_completion",
+ "created": response["created"],
+ "model": response["model"],
+ "choices": [
+ {
+ "text": response["choices"][0]["message"]["content"],
+ "index": response["choices"][0]["index"],
+ "logprobs": transformed_logprobs,
+ "finish_reason": response["choices"][0]["finish_reason"]
+ }
+ ],
+ "usage": response["usage"]
+ }
+ """
+
+ id: str
+ object: str
+ created: int
+ model: Optional[str]
+ choices: List[TextChoices]
+ usage: Optional[Usage]
+ _response_ms: Optional[int] = None
+ _hidden_params: HiddenParams
+
+ def __init__(
+ self,
+ id=None,
+ choices=None,
+ created=None,
+ model=None,
+ usage=None,
+ stream=False,
+ response_ms=None,
+ object=None,
+ **params,
+ ):
+ if stream:
+ object = "text_completion.chunk"
+ choices = [TextChoices()]
+ else:
+ object = "text_completion"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ _new_choice = None
+ if isinstance(choice, TextChoices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = TextChoices(**choice)
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [TextChoices()]
+ if object is not None:
+ object = object
+ if id is None:
+ id = _generate_id()
+ else:
+ id = id
+ if created is None:
+ created = int(time.time())
+ else:
+ created = created
+
+ model = model
+ if usage:
+ usage = usage
+ else:
+ usage = Usage()
+
+ super(TextCompletionResponse, self).__init__(
+ id=id, # type: ignore
+ object=object, # type: ignore
+ created=created, # type: ignore
+ model=model, # type: ignore
+ choices=choices, # type: ignore
+ usage=usage, # type: ignore
+ **params,
+ )
+
+ if response_ms:
+ self._response_ms = response_ms
+ else:
+ self._response_ms = None
+ self._hidden_params = HiddenParams()
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+from openai.types.images_response import Image as OpenAIImage
+
+
+class ImageObject(OpenAIImage):
+ """
+ Represents the url or the content of an image generated by the OpenAI API.
+
+ Attributes:
+ b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
+ url: The URL of the generated image, if response_format is url (default).
+ revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
+
+ https://platform.openai.com/docs/api-reference/images/object
+ """
+
+ b64_json: Optional[str] = None
+ url: Optional[str] = None
+ revised_prompt: Optional[str] = None
+
+ def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs):
+ super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+from openai.types.images_response import ImagesResponse as OpenAIImageResponse
+
+
+class ImageResponse(OpenAIImageResponse):
+ _hidden_params: dict = {}
+ usage: Usage
+
+ def __init__(
+ self,
+ created: Optional[int] = None,
+ data: Optional[List[ImageObject]] = None,
+ response_ms=None,
+ usage: Optional[Usage] = None,
+ hidden_params: Optional[dict] = None,
+ ):
+ if response_ms:
+ _response_ms = response_ms
+ else:
+ _response_ms = None
+ if data:
+ data = data
+ else:
+ data = []
+
+ if created:
+ created = created
+ else:
+ created = int(time.time())
+
+ _data: List[OpenAIImage] = []
+ for d in data:
+ if isinstance(d, dict):
+ _data.append(ImageObject(**d))
+ elif isinstance(d, BaseModel):
+ _data.append(ImageObject(**d.model_dump()))
+ _usage = usage or Usage(
+ prompt_tokens=0,
+ completion_tokens=0,
+ total_tokens=0,
+ )
+ super().__init__(created=created, data=_data, usage=_usage) # type: ignore
+ self._hidden_params = hidden_params or {}
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class TranscriptionResponse(OpenAIObject):
+ text: Optional[str] = None
+
+ _hidden_params: dict = {}
+ _response_headers: Optional[dict] = None
+
+ def __init__(self, text=None):
+ super().__init__(text=text) # type: ignore
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class GenericImageParsingChunk(TypedDict):
+ type: str
+ media_type: str
+ data: str
+
+
+class ResponseFormatChunk(TypedDict, total=False):
+ type: Required[Literal["json_object", "text"]]
+ response_schema: dict
+
+
+class LoggedLiteLLMParams(TypedDict, total=False):
+ force_timeout: Optional[float]
+ custom_llm_provider: Optional[str]
+ api_base: Optional[str]
+ litellm_call_id: Optional[str]
+ model_alias_map: Optional[dict]
+ metadata: Optional[dict]
+ model_info: Optional[dict]
+ proxy_server_request: Optional[dict]
+ acompletion: Optional[bool]
+ preset_cache_key: Optional[str]
+ no_log: Optional[bool]
+ input_cost_per_second: Optional[float]
+ input_cost_per_token: Optional[float]
+ output_cost_per_token: Optional[float]
+ output_cost_per_second: Optional[float]
+ cooldown_time: Optional[float]
+
+
+class AdapterCompletionStreamWrapper:
+ def __init__(self, completion_stream):
+ self.completion_stream = completion_stream
+
+ def __iter__(self):
+ return self
+
+ def __aiter__(self):
+ return self
+
+ def __next__(self):
+ try:
+ for chunk in self.completion_stream:
+ if chunk == "None" or chunk is None:
+ raise Exception
+ return chunk
+ raise StopIteration
+ except StopIteration:
+ raise StopIteration
+ except Exception as e:
+ print(f"AdapterCompletionStreamWrapper - {e}") # noqa
+
+ async def __anext__(self):
+ try:
+ async for chunk in self.completion_stream:
+ if chunk == "None" or chunk is None:
+ raise Exception
+ return chunk
+ raise StopIteration
+ except StopIteration:
+ raise StopAsyncIteration
+
+
+class StandardLoggingUserAPIKeyMetadata(TypedDict):
+ user_api_key_hash: Optional[str] # hash of the litellm virtual key used
+ user_api_key_alias: Optional[str]
+ user_api_key_org_id: Optional[str]
+ user_api_key_team_id: Optional[str]
+ user_api_key_user_id: Optional[str]
+ user_api_key_user_email: Optional[str]
+ user_api_key_team_alias: Optional[str]
+ user_api_key_end_user_id: Optional[str]
+
+
+class StandardLoggingPromptManagementMetadata(TypedDict):
+ prompt_id: str
+ prompt_variables: Optional[dict]
+ prompt_integration: str
+
+
+class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata):
+ """
+ Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management
+ """
+
+ spend_logs_metadata: Optional[
+ dict
+ ] # special param to log k,v pairs to spendlogs for a call
+ requester_ip_address: Optional[str]
+ requester_metadata: Optional[dict]
+ prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata]
+ applied_guardrails: Optional[List[str]]
+
+
+class StandardLoggingAdditionalHeaders(TypedDict, total=False):
+ x_ratelimit_limit_requests: int
+ x_ratelimit_limit_tokens: int
+ x_ratelimit_remaining_requests: int
+ x_ratelimit_remaining_tokens: int
+
+
+class StandardLoggingHiddenParams(TypedDict):
+ model_id: Optional[
+ str
+ ] # id of the model in the router, separates multiple models with the same name but different credentials
+ cache_key: Optional[str]
+ api_base: Optional[str]
+ response_cost: Optional[str]
+ litellm_overhead_time_ms: Optional[float]
+ additional_headers: Optional[StandardLoggingAdditionalHeaders]
+ batch_models: Optional[List[str]]
+ litellm_model_name: Optional[str] # the model name sent to the provider by litellm
+
+
+class StandardLoggingModelInformation(TypedDict):
+ model_map_key: str
+ model_map_value: Optional[ModelInfo]
+
+
+class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False):
+ """
+ Debug information, if cost tracking fails.
+
+ Avoid logging sensitive information like response or optional params
+ """
+
+ error_str: Required[str]
+ traceback_str: Required[str]
+ model: str
+ cache_hit: Optional[bool]
+ custom_llm_provider: Optional[str]
+ base_model: Optional[str]
+ call_type: str
+ custom_pricing: Optional[bool]
+
+
+class StandardLoggingPayloadErrorInformation(TypedDict, total=False):
+ error_code: Optional[str]
+ error_class: Optional[str]
+ llm_provider: Optional[str]
+ traceback: Optional[str]
+ error_message: Optional[str]
+
+
+class StandardLoggingGuardrailInformation(TypedDict, total=False):
+ guardrail_name: Optional[str]
+ guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]]
+ guardrail_response: Optional[Union[dict, str]]
+ guardrail_status: Literal["success", "failure"]
+
+
+StandardLoggingPayloadStatus = Literal["success", "failure"]
+
+
+class StandardLoggingPayload(TypedDict):
+ id: str
+ trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
+ call_type: str
+ stream: Optional[bool]
+ response_cost: float
+ response_cost_failure_debug_info: Optional[
+ StandardLoggingModelCostFailureDebugInformation
+ ]
+ status: StandardLoggingPayloadStatus
+ custom_llm_provider: Optional[str]
+ total_tokens: int
+ prompt_tokens: int
+ completion_tokens: int
+ startTime: float # Note: making this camelCase was a mistake, everything should be snake case
+ endTime: float
+ completionStartTime: float
+ response_time: float
+ model_map_information: StandardLoggingModelInformation
+ model: str
+ model_id: Optional[str]
+ model_group: Optional[str]
+ api_base: str
+ metadata: StandardLoggingMetadata
+ cache_hit: Optional[bool]
+ cache_key: Optional[str]
+ saved_cache_cost: float
+ request_tags: list
+ end_user: Optional[str]
+ requester_ip_address: Optional[str]
+ messages: Optional[Union[str, list, dict]]
+ response: Optional[Union[str, list, dict]]
+ error_str: Optional[str]
+ error_information: Optional[StandardLoggingPayloadErrorInformation]
+ model_parameters: dict
+ hidden_params: StandardLoggingHiddenParams
+ guardrail_information: Optional[StandardLoggingGuardrailInformation]
+
+
+from typing import AsyncIterator, Iterator
+
+
+class CustomStreamingDecoder:
+ async def aiter_bytes(
+ self, iterator: AsyncIterator[bytes]
+ ) -> AsyncIterator[
+ Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]
+ ]:
+ raise NotImplementedError
+
+ def iter_bytes(
+ self, iterator: Iterator[bytes]
+ ) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]:
+ raise NotImplementedError
+
+
+class StandardPassThroughResponseObject(TypedDict):
+ response: str
+
+
+OPENAI_RESPONSE_HEADERS = [
+ "x-ratelimit-remaining-requests",
+ "x-ratelimit-remaining-tokens",
+ "x-ratelimit-limit-requests",
+ "x-ratelimit-limit-tokens",
+ "x-ratelimit-reset-requests",
+ "x-ratelimit-reset-tokens",
+]
+
+
+class StandardCallbackDynamicParams(TypedDict, total=False):
+ # Langfuse dynamic params
+ langfuse_public_key: Optional[str]
+ langfuse_secret: Optional[str]
+ langfuse_secret_key: Optional[str]
+ langfuse_host: Optional[str]
+
+ # GCS dynamic params
+ gcs_bucket_name: Optional[str]
+ gcs_path_service_account: Optional[str]
+
+ # Langsmith dynamic params
+ langsmith_api_key: Optional[str]
+ langsmith_project: Optional[str]
+ langsmith_base_url: Optional[str]
+
+ # Humanloop dynamic params
+ humanloop_api_key: Optional[str]
+
+ # Arize dynamic params
+ arize_api_key: Optional[str]
+ arize_space_key: Optional[str]
+
+ # Logging settings
+ turn_off_message_logging: Optional[bool] # when true will not log messages
+
+
+all_litellm_params = [
+ "metadata",
+ "litellm_metadata",
+ "litellm_trace_id",
+ "tags",
+ "acompletion",
+ "aimg_generation",
+ "atext_completion",
+ "text_completion",
+ "caching",
+ "mock_response",
+ "mock_timeout",
+ "disable_add_transform_inline_image_block",
+ "api_key",
+ "api_version",
+ "prompt_id",
+ "provider_specific_header",
+ "prompt_variables",
+ "api_base",
+ "force_timeout",
+ "logger_fn",
+ "verbose",
+ "custom_llm_provider",
+ "litellm_logging_obj",
+ "litellm_call_id",
+ "use_client",
+ "id",
+ "fallbacks",
+ "azure",
+ "headers",
+ "model_list",
+ "num_retries",
+ "context_window_fallback_dict",
+ "retry_policy",
+ "retry_strategy",
+ "roles",
+ "final_prompt_value",
+ "bos_token",
+ "eos_token",
+ "request_timeout",
+ "complete_response",
+ "self",
+ "client",
+ "rpm",
+ "tpm",
+ "max_parallel_requests",
+ "input_cost_per_token",
+ "output_cost_per_token",
+ "input_cost_per_second",
+ "output_cost_per_second",
+ "hf_model_name",
+ "model_info",
+ "proxy_server_request",
+ "preset_cache_key",
+ "caching_groups",
+ "ttl",
+ "cache",
+ "no-log",
+ "base_model",
+ "stream_timeout",
+ "supports_system_message",
+ "region_name",
+ "allowed_model_region",
+ "model_config",
+ "fastest_response",
+ "cooldown_time",
+ "cache_key",
+ "max_retries",
+ "azure_ad_token_provider",
+ "tenant_id",
+ "client_id",
+ "azure_username",
+ "azure_password",
+ "client_secret",
+ "user_continue_message",
+ "configurable_clientside_auth_params",
+ "weight",
+ "ensure_alternating_roles",
+ "assistant_continue_message",
+ "user_continue_message",
+ "fallback_depth",
+ "max_fallbacks",
+ "max_budget",
+ "budget_duration",
+ "use_in_pass_through",
+ "merge_reasoning_content_in_choices",
+ "litellm_credential_name",
+] + list(StandardCallbackDynamicParams.__annotations__.keys())
+
+
+class KeyGenerationConfig(TypedDict, total=False):
+ required_params: List[
+ str
+ ] # specify params that must be present in the key generation request
+
+
+class TeamUIKeyGenerationConfig(KeyGenerationConfig):
+ allowed_team_member_roles: List[str]
+
+
+class PersonalUIKeyGenerationConfig(KeyGenerationConfig):
+ allowed_user_roles: List[str]
+
+
+class StandardKeyGenerationConfig(TypedDict, total=False):
+ team_key_generation: TeamUIKeyGenerationConfig
+ personal_key_generation: PersonalUIKeyGenerationConfig
+
+
+class BudgetConfig(BaseModel):
+ max_budget: Optional[float] = None
+ budget_duration: Optional[str] = None
+ tpm_limit: Optional[int] = None
+ rpm_limit: Optional[int] = None
+
+ def __init__(self, **data: Any) -> None:
+ # Map time_period to budget_duration if present
+ if "time_period" in data:
+ data["budget_duration"] = data.pop("time_period")
+
+ # Map budget_limit to max_budget if present
+ if "budget_limit" in data:
+ data["max_budget"] = data.pop("budget_limit")
+
+ super().__init__(**data)
+
+
+GenericBudgetConfigType = Dict[str, BudgetConfig]
+
+
+class LlmProviders(str, Enum):
+ OPENAI = "openai"
+ OPENAI_LIKE = "openai_like" # embedding only
+ JINA_AI = "jina_ai"
+ XAI = "xai"
+ CUSTOM_OPENAI = "custom_openai"
+ TEXT_COMPLETION_OPENAI = "text-completion-openai"
+ COHERE = "cohere"
+ COHERE_CHAT = "cohere_chat"
+ CLARIFAI = "clarifai"
+ ANTHROPIC = "anthropic"
+ ANTHROPIC_TEXT = "anthropic_text"
+ REPLICATE = "replicate"
+ HUGGINGFACE = "huggingface"
+ TOGETHER_AI = "together_ai"
+ OPENROUTER = "openrouter"
+ VERTEX_AI = "vertex_ai"
+ VERTEX_AI_BETA = "vertex_ai_beta"
+ GEMINI = "gemini"
+ AI21 = "ai21"
+ BASETEN = "baseten"
+ AZURE = "azure"
+ AZURE_TEXT = "azure_text"
+ AZURE_AI = "azure_ai"
+ SAGEMAKER = "sagemaker"
+ SAGEMAKER_CHAT = "sagemaker_chat"
+ BEDROCK = "bedrock"
+ VLLM = "vllm"
+ NLP_CLOUD = "nlp_cloud"
+ PETALS = "petals"
+ OOBABOOGA = "oobabooga"
+ OLLAMA = "ollama"
+ OLLAMA_CHAT = "ollama_chat"
+ DEEPINFRA = "deepinfra"
+ PERPLEXITY = "perplexity"
+ MISTRAL = "mistral"
+ GROQ = "groq"
+ NVIDIA_NIM = "nvidia_nim"
+ CEREBRAS = "cerebras"
+ AI21_CHAT = "ai21_chat"
+ VOLCENGINE = "volcengine"
+ CODESTRAL = "codestral"
+ TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
+ DEEPSEEK = "deepseek"
+ SAMBANOVA = "sambanova"
+ MARITALK = "maritalk"
+ VOYAGE = "voyage"
+ CLOUDFLARE = "cloudflare"
+ XINFERENCE = "xinference"
+ FIREWORKS_AI = "fireworks_ai"
+ FRIENDLIAI = "friendliai"
+ WATSONX = "watsonx"
+ WATSONX_TEXT = "watsonx_text"
+ TRITON = "triton"
+ PREDIBASE = "predibase"
+ DATABRICKS = "databricks"
+ EMPOWER = "empower"
+ GITHUB = "github"
+ CUSTOM = "custom"
+ LITELLM_PROXY = "litellm_proxy"
+ HOSTED_VLLM = "hosted_vllm"
+ LM_STUDIO = "lm_studio"
+ GALADRIEL = "galadriel"
+ INFINITY = "infinity"
+ DEEPGRAM = "deepgram"
+ AIOHTTP_OPENAI = "aiohttp_openai"
+ LANGFUSE = "langfuse"
+ HUMANLOOP = "humanloop"
+ TOPAZ = "topaz"
+ ASSEMBLYAI = "assemblyai"
+ SNOWFLAKE = "snowflake"
+
+
+# Create a set of all provider values for quick lookup
+LlmProvidersSet = {provider.value for provider in LlmProviders}
+
+
+class LiteLLMLoggingBaseClass:
+ """
+ Base class for logging pre and post call
+
+ Meant to simplify type checking for logging obj.
+ """
+
+ def pre_call(self, input, api_key, model=None, additional_args={}):
+ pass
+
+ def post_call(
+ self, original_response, input=None, api_key=None, additional_args={}
+ ):
+ pass
+
+
+class CustomHuggingfaceTokenizer(TypedDict):
+ identifier: str
+ revision: str # usually 'main'
+ auth_token: Optional[str]
+
+
+class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum):
+ """
+ Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint.
+ """
+
+ OPENAI = LlmProviders.OPENAI.value
+ TOPAZ = LlmProviders.TOPAZ.value
+
+
+class HttpHandlerRequestFields(TypedDict, total=False):
+ data: dict # request body
+ params: dict # query params
+ files: dict # file uploads
+ content: Any # raw content
+
+
+class ProviderSpecificHeader(TypedDict):
+ custom_llm_provider: str
+ extra_headers: dict
+
+
+class SelectTokenizerResponse(TypedDict):
+ type: Literal["openai_tokenizer", "huggingface_tokenizer"]
+ tokenizer: Any
+
+
+class LiteLLMBatch(Batch):
+ _hidden_params: dict = {}
+ usage: Optional[Usage] = None
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def json(self, **kwargs): # type: ignore
+ try:
+ return self.model_dump() # noqa
+ except Exception:
+ # if using pydantic v1
+ return self.dict()
+
+
+class RawRequestTypedDict(TypedDict, total=False):
+ raw_request_api_base: Optional[str]
+ raw_request_body: Optional[dict]
+ raw_request_headers: Optional[dict]
+ error: Optional[str]
+
+
+class CredentialBase(BaseModel):
+ credential_name: str
+ credential_info: dict
+
+
+class CredentialItem(CredentialBase):
+ credential_values: dict
+
+
+class CreateCredentialItem(CredentialBase):
+ credential_values: Optional[dict] = None
+ model_id: Optional[str] = None
+
+ @model_validator(mode="before")
+ @classmethod
+ def check_credential_params(cls, values):
+ if not values.get("credential_values") and not values.get("model_id"):
+ raise ValueError("Either credential_values or model_id must be set")
+ return values