diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/types | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/types')
37 files changed, 7061 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/types/adapter.py b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py new file mode 100644 index 00000000..2995cfbc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/adapter.py @@ -0,0 +1,10 @@ +from typing import List + +from typing_extensions import Dict, Required, TypedDict, override + +from litellm.integrations.custom_logger import CustomLogger + + +class AdapterItem(TypedDict): + id: str + adapter: CustomLogger diff --git a/.venv/lib/python3.12/site-packages/litellm/types/caching.py b/.venv/lib/python3.12/site-packages/litellm/types/caching.py new file mode 100644 index 00000000..c15d4d15 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/caching.py @@ -0,0 +1,78 @@ +from enum import Enum +from typing import Any, Dict, Literal, Optional, TypedDict, Union + +from pydantic import BaseModel + + +class LiteLLMCacheType(str, Enum): + LOCAL = "local" + REDIS = "redis" + REDIS_SEMANTIC = "redis-semantic" + S3 = "s3" + DISK = "disk" + QDRANT_SEMANTIC = "qdrant-semantic" + + +CachingSupportedCallTypes = Literal[ + "completion", + "acompletion", + "embedding", + "aembedding", + "atranscription", + "transcription", + "atext_completion", + "text_completion", + "arerank", + "rerank", +] + + +class RedisPipelineIncrementOperation(TypedDict): + """ + TypeDict for 1 Redis Pipeline Increment Operation + """ + + key: str + increment_value: float + ttl: Optional[int] + + +DynamicCacheControl = TypedDict( + "DynamicCacheControl", + { + # Will cache the response for the user-defined amount of time (in seconds). + "ttl": Optional[int], + # Namespace to use for caching + "namespace": Optional[str], + # Max Age to use for caching + "s-maxage": Optional[int], + "s-max-age": Optional[int], + # Will not return a cached response, but instead call the actual endpoint. + "no-cache": Optional[bool], + # Will not store the response in the cache. + "no-store": Optional[bool], + }, +) + + +class CachePingResponse(BaseModel): + status: str + cache_type: str + ping_response: Optional[bool] = None + set_cache_response: Optional[str] = None + litellm_cache_params: Optional[str] = None + + # intentionally a dict, since we run masker.mask_dict() on HealthCheckCacheParams + health_check_cache_params: Optional[dict] = None + + +class HealthCheckCacheParams(BaseModel): + """ + Cache Params returned on /cache/ping call + """ + + host: Optional[str] = None + port: Optional[Union[str, int]] = None + redis_kwargs: Optional[Dict[str, Any]] = None + namespace: Optional[str] = None + redis_version: Optional[Union[str, int, float]] = None diff --git a/.venv/lib/python3.12/site-packages/litellm/types/completion.py b/.venv/lib/python3.12/site-packages/litellm/types/completion.py new file mode 100644 index 00000000..b06bb733 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/completion.py @@ -0,0 +1,193 @@ +from typing import Iterable, List, Optional, Union + +from pydantic import BaseModel, ConfigDict +from typing_extensions import Literal, Required, TypedDict + + +class ChatCompletionSystemMessageParam(TypedDict, total=False): + content: Required[str] + """The contents of the system message.""" + + role: Required[Literal["system"]] + """The role of the messages author, in this case `system`.""" + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the same + role. + """ + + +class ChatCompletionContentPartTextParam(TypedDict, total=False): + text: Required[str] + """The text content.""" + + type: Required[Literal["text"]] + """The type of the content part.""" + + +class ImageURL(TypedDict, total=False): + url: Required[str] + """Either a URL of the image or the base64 encoded image data.""" + + detail: Literal["auto", "low", "high"] + """Specifies the detail level of the image. + + Learn more in the + [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding). + """ + + +class ChatCompletionContentPartImageParam(TypedDict, total=False): + image_url: Required[ImageURL] + + type: Required[Literal["image_url"]] + """The type of the content part.""" + + +ChatCompletionContentPartParam = Union[ + ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam +] + + +class ChatCompletionUserMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]] + """The contents of the user message.""" + + role: Required[Literal["user"]] + """The role of the messages author, in this case `user`.""" + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the same + role. + """ + + +class FunctionCall(TypedDict, total=False): + arguments: Required[str] + """ + The arguments to call the function with, as generated by the model in JSON + format. Note that the model does not always generate valid JSON, and may + hallucinate parameters not defined by your function schema. Validate the + arguments in your code before calling your function. + """ + + name: Required[str] + """The name of the function to call.""" + + +class Function(TypedDict, total=False): + arguments: Required[str] + """ + The arguments to call the function with, as generated by the model in JSON + format. Note that the model does not always generate valid JSON, and may + hallucinate parameters not defined by your function schema. Validate the + arguments in your code before calling your function. + """ + + name: Required[str] + """The name of the function to call.""" + + +class ChatCompletionToolMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]] + """The contents of the tool message.""" + + role: Required[Literal["tool"]] + """The role of the messages author, in this case `tool`.""" + + tool_call_id: Required[str] + """Tool call that this message is responding to.""" + + +class ChatCompletionFunctionMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]] + """The contents of the function message.""" + + name: Required[str] + """The name of the function to call.""" + + role: Required[Literal["function"]] + """The role of the messages author, in this case `function`.""" + + +class ChatCompletionMessageToolCallParam(TypedDict, total=False): + id: Required[str] + """The ID of the tool call.""" + + function: Required[Function] + """The function that the model called.""" + + type: Required[Literal["function"]] + """The type of the tool. Currently, only `function` is supported.""" + + +class ChatCompletionAssistantMessageParam(TypedDict, total=False): + role: Required[Literal["assistant"]] + """The role of the messages author, in this case `assistant`.""" + + content: Optional[str] + """The contents of the assistant message. + + Required unless `tool_calls` or `function_call` is specified. + """ + + function_call: FunctionCall + """Deprecated and replaced by `tool_calls`. + + The name and arguments of a function that should be called, as generated by the + model. + """ + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the same + role. + """ + + tool_calls: Iterable[ChatCompletionMessageToolCallParam] + """The tool calls generated by the model, such as function calls.""" + + +ChatCompletionMessageParam = Union[ + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, + ChatCompletionAssistantMessageParam, + ChatCompletionFunctionMessageParam, + ChatCompletionToolMessageParam, +] + + +class CompletionRequest(BaseModel): + model: str + messages: List[str] = [] + timeout: Optional[Union[float, int]] = None + temperature: Optional[float] = None + top_p: Optional[float] = None + n: Optional[int] = None + stream: Optional[bool] = None + stop: Optional[dict] = None + max_tokens: Optional[int] = None + presence_penalty: Optional[float] = None + frequency_penalty: Optional[float] = None + logit_bias: Optional[dict] = None + user: Optional[str] = None + response_format: Optional[dict] = None + seed: Optional[int] = None + tools: Optional[List[str]] = None + tool_choice: Optional[str] = None + logprobs: Optional[bool] = None + top_logprobs: Optional[int] = None + deployment_id: Optional[str] = None + functions: Optional[List[str]] = None + function_call: Optional[str] = None + base_url: Optional[str] = None + api_version: Optional[str] = None + api_key: Optional[str] = None + model_list: Optional[List[str]] = None + + model_config = ConfigDict(protected_namespaces=(), extra="allow") diff --git a/.venv/lib/python3.12/site-packages/litellm/types/embedding.py b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py new file mode 100644 index 00000000..f8fdebc5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/embedding.py @@ -0,0 +1,21 @@ +from typing import List, Optional, Union + +from pydantic import BaseModel, ConfigDict + + +class EmbeddingRequest(BaseModel): + model: str + input: List[str] = [] + timeout: int = 600 + api_base: Optional[str] = None + api_version: Optional[str] = None + api_key: Optional[str] = None + api_type: Optional[str] = None + caching: bool = False + user: Optional[str] = None + custom_llm_provider: Optional[Union[str, dict]] = None + litellm_call_id: Optional[str] = None + litellm_logging_obj: Optional[dict] = None + logger_fn: Optional[str] = None + + model_config = ConfigDict(extra="allow") diff --git a/.venv/lib/python3.12/site-packages/litellm/types/files.py b/.venv/lib/python3.12/site-packages/litellm/types/files.py new file mode 100644 index 00000000..600ad806 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/files.py @@ -0,0 +1,283 @@ +from enum import Enum +from types import MappingProxyType +from typing import List, Set, Mapping + +""" +Base Enums/Consts +""" + + +class FileType(Enum): + AAC = "AAC" + CSV = "CSV" + DOC = "DOC" + DOCX = "DOCX" + FLAC = "FLAC" + FLV = "FLV" + GIF = "GIF" + GOOGLE_DOC = "GOOGLE_DOC" + GOOGLE_DRAWINGS = "GOOGLE_DRAWINGS" + GOOGLE_SHEETS = "GOOGLE_SHEETS" + GOOGLE_SLIDES = "GOOGLE_SLIDES" + HEIC = "HEIC" + HEIF = "HEIF" + HTML = "HTML" + JPEG = "JPEG" + JSON = "JSON" + M4A = "M4A" + M4V = "M4V" + MOV = "MOV" + MP3 = "MP3" + MP4 = "MP4" + MPEG = "MPEG" + MPEGPS = "MPEGPS" + MPG = "MPG" + MPA = "MPA" + MPGA = "MPGA" + OGG = "OGG" + OPUS = "OPUS" + PDF = "PDF" + PCM = "PCM" + PNG = "PNG" + PPT = "PPT" + PPTX = "PPTX" + RTF = "RTF" + THREE_GPP = "3GPP" + TXT = "TXT" + WAV = "WAV" + WEBM = "WEBM" + WEBP = "WEBP" + WMV = "WMV" + XLS = "XLS" + XLSX = "XLSX" + + +FILE_EXTENSIONS: Mapping[FileType, List[str]] = MappingProxyType( + { + FileType.AAC: ["aac"], + FileType.CSV: ["csv"], + FileType.DOC: ["doc"], + FileType.DOCX: ["docx"], + FileType.FLAC: ["flac"], + FileType.FLV: ["flv"], + FileType.GIF: ["gif"], + FileType.GOOGLE_DOC: ["gdoc"], + FileType.GOOGLE_DRAWINGS: ["gdraw"], + FileType.GOOGLE_SHEETS: ["gsheet"], + FileType.GOOGLE_SLIDES: ["gslides"], + FileType.HEIC: ["heic"], + FileType.HEIF: ["heif"], + FileType.HTML: ["html", "htm"], + FileType.JPEG: ["jpeg", "jpg"], + FileType.JSON: ["json"], + FileType.M4A: ["m4a"], + FileType.M4V: ["m4v"], + FileType.MOV: ["mov"], + FileType.MP3: ["mp3"], + FileType.MP4: ["mp4"], + FileType.MPEG: ["mpeg"], + FileType.MPEGPS: ["mpegps"], + FileType.MPG: ["mpg"], + FileType.MPA: ["mpa"], + FileType.MPGA: ["mpga"], + FileType.OGG: ["ogg"], + FileType.OPUS: ["opus"], + FileType.PDF: ["pdf"], + FileType.PCM: ["pcm"], + FileType.PNG: ["png"], + FileType.PPT: ["ppt"], + FileType.PPTX: ["pptx"], + FileType.RTF: ["rtf"], + FileType.THREE_GPP: ["3gpp"], + FileType.TXT: ["txt"], + FileType.WAV: ["wav"], + FileType.WEBM: ["webm"], + FileType.WEBP: ["webp"], + FileType.WMV: ["wmv"], + FileType.XLS: ["xls"], + FileType.XLSX: ["xlsx"], + } +) + +FILE_MIME_TYPES: Mapping[FileType, str] = MappingProxyType( + { + FileType.AAC: "audio/aac", + FileType.CSV: "text/csv", + FileType.DOC: "application/msword", + FileType.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + FileType.FLAC: "audio/flac", + FileType.FLV: "video/x-flv", + FileType.GIF: "image/gif", + FileType.GOOGLE_DOC: "application/vnd.google-apps.document", + FileType.GOOGLE_DRAWINGS: "application/vnd.google-apps.drawing", + FileType.GOOGLE_SHEETS: "application/vnd.google-apps.spreadsheet", + FileType.GOOGLE_SLIDES: "application/vnd.google-apps.presentation", + FileType.HEIC: "image/heic", + FileType.HEIF: "image/heif", + FileType.HTML: "text/html", + FileType.JPEG: "image/jpeg", + FileType.JSON: "application/json", + FileType.M4A: "audio/x-m4a", + FileType.M4V: "video/x-m4v", + FileType.MOV: "video/quicktime", + FileType.MP3: "audio/mpeg", + FileType.MP4: "video/mp4", + FileType.MPEG: "video/mpeg", + FileType.MPEGPS: "video/mpegps", + FileType.MPG: "video/mpg", + FileType.MPA: "audio/m4a", + FileType.MPGA: "audio/mpga", + FileType.OGG: "audio/ogg", + FileType.OPUS: "audio/opus", + FileType.PDF: "application/pdf", + FileType.PCM: "audio/pcm", + FileType.PNG: "image/png", + FileType.PPT: "application/vnd.ms-powerpoint", + FileType.PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation", + FileType.RTF: "application/rtf", + FileType.THREE_GPP: "video/3gpp", + FileType.TXT: "text/plain", + FileType.WAV: "audio/wav", + FileType.WEBM: "video/webm", + FileType.WEBP: "image/webp", + FileType.WMV: "video/wmv", + FileType.XLS: "application/vnd.ms-excel", + FileType.XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + } +) + +""" +Util Functions +""" + + +def get_file_extension_from_mime_type(mime_type: str) -> str: + for file_type, mime in FILE_MIME_TYPES.items(): + if mime.lower() == mime_type.lower(): + return FILE_EXTENSIONS[file_type][0] + raise ValueError(f"Unknown extension for mime type: {mime_type}") + + +def get_file_type_from_extension(extension: str) -> FileType: + for file_type, extensions in FILE_EXTENSIONS.items(): + if extension.lower() in extensions: + return file_type + + raise ValueError(f"Unknown file type for extension: {extension}") + + +def get_file_extension_for_file_type(file_type: FileType) -> str: + return FILE_EXTENSIONS[file_type][0] + + +def get_file_mime_type_for_file_type(file_type: FileType) -> str: + return FILE_MIME_TYPES[file_type] + + +def get_file_mime_type_from_extension(extension: str) -> str: + file_type = get_file_type_from_extension(extension) + return get_file_mime_type_for_file_type(file_type) + + +""" +FileType Type Groupings (Videos, Images, etc) +""" + +# Images +IMAGE_FILE_TYPES = { + FileType.PNG, + FileType.JPEG, + FileType.GIF, + FileType.WEBP, + FileType.HEIC, + FileType.HEIF, +} + + +def is_image_file_type(file_type): + return file_type in IMAGE_FILE_TYPES + + +# Videos +VIDEO_FILE_TYPES = { + FileType.MOV, + FileType.MP4, + FileType.MPEG, + FileType.M4V, + FileType.FLV, + FileType.MPEGPS, + FileType.MPG, + FileType.WEBM, + FileType.WMV, + FileType.THREE_GPP, +} + + +def is_video_file_type(file_type): + return file_type in VIDEO_FILE_TYPES + + +# Audio +AUDIO_FILE_TYPES = { + FileType.AAC, + FileType.FLAC, + FileType.MP3, + FileType.MPA, + FileType.MPGA, + FileType.OPUS, + FileType.PCM, + FileType.WAV, +} + + +def is_audio_file_type(file_type): + return file_type in AUDIO_FILE_TYPES + + +# Text +TEXT_FILE_TYPES = {FileType.CSV, FileType.HTML, FileType.RTF, FileType.TXT} + + +def is_text_file_type(file_type): + return file_type in TEXT_FILE_TYPES + + +""" +Other FileType Groupings +""" +# Accepted file types for GEMINI 1.5 through Vertex AI +# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-images-nodejs +GEMINI_1_5_ACCEPTED_FILE_TYPES: Set[FileType] = { + # Image + FileType.PNG, + FileType.JPEG, + FileType.WEBP, + # Audio + FileType.AAC, + FileType.FLAC, + FileType.MP3, + FileType.MPA, + FileType.MPEG, + FileType.MPGA, + FileType.OPUS, + FileType.PCM, + FileType.WAV, + FileType.WEBM, + # Video + FileType.FLV, + FileType.MOV, + FileType.MPEG, + FileType.MPEGPS, + FileType.MPG, + FileType.MP4, + FileType.WEBM, + FileType.WMV, + FileType.THREE_GPP, + # PDF + FileType.PDF, + FileType.TXT, +} + + +def is_gemini_1_5_accepted_file_type(file_type: FileType) -> bool: + return file_type in GEMINI_1_5_ACCEPTED_FILE_TYPES diff --git a/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py new file mode 100644 index 00000000..af99d88c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/fine_tuning.py @@ -0,0 +1,5 @@ +from openai.types.fine_tuning.fine_tuning_job import Hyperparameters + + +class OpenAIFineTuningHyperparameters(Hyperparameters): + model_config = {"extra": "allow"} diff --git a/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py new file mode 100644 index 00000000..b7018fe2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/guardrails.py @@ -0,0 +1,168 @@ +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union + +from pydantic import BaseModel, ConfigDict, Field, SecretStr +from typing_extensions import Required, TypedDict + +""" +Pydantic object defining how to set guardrails on litellm proxy + +guardrails: + - guardrail_name: "bedrock-pre-guard" + litellm_params: + guardrail: bedrock # supported values: "aporia", "bedrock", "lakera" + mode: "during_call" + guardrailIdentifier: ff6ujrregl1q + guardrailVersion: "DRAFT" + default_on: true +""" + + +class SupportedGuardrailIntegrations(Enum): + APORIA = "aporia" + BEDROCK = "bedrock" + GURDRAILS_AI = "guardrails_ai" + LAKERA = "lakera" + PRESIDIO = "presidio" + HIDE_SECRETS = "hide-secrets" + AIM = "aim" + + +class Role(Enum): + SYSTEM = "system" + ASSISTANT = "assistant" + USER = "user" + + +default_roles = [Role.SYSTEM, Role.ASSISTANT, Role.USER] + + +class GuardrailItemSpec(TypedDict, total=False): + callbacks: Required[List[str]] + default_on: bool + logging_only: Optional[bool] + enabled_roles: Optional[List[Role]] + callback_args: Dict[str, Dict] + + +class GuardrailItem(BaseModel): + callbacks: List[str] + default_on: bool + logging_only: Optional[bool] + guardrail_name: str + callback_args: Dict[str, Dict] + enabled_roles: Optional[List[Role]] + + model_config = ConfigDict(use_enum_values=True) + + def __init__( + self, + callbacks: List[str], + guardrail_name: str, + default_on: bool = False, + logging_only: Optional[bool] = None, + enabled_roles: Optional[List[Role]] = default_roles, + callback_args: Dict[str, Dict] = {}, + ): + super().__init__( + callbacks=callbacks, + default_on=default_on, + logging_only=logging_only, + guardrail_name=guardrail_name, + enabled_roles=enabled_roles, + callback_args=callback_args, + ) + + +# Define the TypedDicts +class LakeraCategoryThresholds(TypedDict, total=False): + prompt_injection: float + jailbreak: float + + +class LitellmParams(TypedDict): + guardrail: str + mode: str + api_key: Optional[str] + api_base: Optional[str] + + # Lakera specific params + category_thresholds: Optional[LakeraCategoryThresholds] + + # Bedrock specific params + guardrailIdentifier: Optional[str] + guardrailVersion: Optional[str] + + # Presidio params + output_parse_pii: Optional[bool] + presidio_ad_hoc_recognizers: Optional[str] + mock_redacted_text: Optional[dict] + + # hide secrets params + detect_secrets_config: Optional[dict] + + # guardrails ai params + guard_name: Optional[str] + default_on: Optional[bool] + + +class Guardrail(TypedDict, total=False): + guardrail_name: str + litellm_params: LitellmParams + guardrail_info: Optional[Dict] + + +class guardrailConfig(TypedDict): + guardrails: List[Guardrail] + + +class GuardrailEventHooks(str, Enum): + pre_call = "pre_call" + post_call = "post_call" + during_call = "during_call" + logging_only = "logging_only" + + +class BedrockTextContent(TypedDict, total=False): + text: str + + +class BedrockContentItem(TypedDict, total=False): + text: BedrockTextContent + + +class BedrockRequest(TypedDict, total=False): + source: Literal["INPUT", "OUTPUT"] + content: List[BedrockContentItem] + + +class DynamicGuardrailParams(TypedDict): + extra_body: Dict[str, Any] + + +class GuardrailLiteLLMParamsResponse(BaseModel): + """The returned LiteLLM Params object for /guardrails/list""" + + guardrail: str + mode: Union[str, List[str]] + default_on: bool = Field(default=False) + + def __init__(self, **kwargs): + default_on = kwargs.get("default_on") + if default_on is None: + default_on = False + + super().__init__(**kwargs) + + +class GuardrailInfoResponse(BaseModel): + guardrail_name: str + litellm_params: GuardrailLiteLLMParamsResponse + guardrail_info: Optional[Dict] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + +class ListGuardrailsResponse(BaseModel): + guardrails: List[GuardrailInfoResponse] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py new file mode 100644 index 00000000..6c0de762 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/argilla.py @@ -0,0 +1,21 @@ +import os +from datetime import datetime as dt +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Set, TypedDict + + +class ArgillaItem(TypedDict): + fields: Dict[str, Any] + + +class ArgillaPayload(TypedDict): + items: List[ArgillaItem] + + +class ArgillaCredentialsObject(TypedDict): + ARGILLA_API_KEY: str + ARGILLA_DATASET_NAME: str + ARGILLA_BASE_URL: str + + +SUPPORTED_PAYLOAD_FIELDS = ["messages", "response"] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py new file mode 100644 index 00000000..e1ec1755 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize.py @@ -0,0 +1,15 @@ +from typing import TYPE_CHECKING, Any, Literal, Optional + +from pydantic import BaseModel + +if TYPE_CHECKING: + Protocol = Literal["otlp_grpc", "otlp_http"] +else: + Protocol = Any + + +class ArizeConfig(BaseModel): + space_key: Optional[str] = None + api_key: Optional[str] = None + protocol: Protocol + endpoint: str diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py new file mode 100644 index 00000000..4566022d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/arize_phoenix.py @@ -0,0 +1,9 @@ +from typing import TYPE_CHECKING, Literal, Optional + +from pydantic import BaseModel +from .arize import Protocol + +class ArizePhoenixConfig(BaseModel): + otlp_auth_headers: Optional[str] = None + protocol: Protocol + endpoint: str diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py new file mode 100644 index 00000000..b69529d1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/base_health_check.py @@ -0,0 +1,6 @@ +from typing import Literal, Optional, TypedDict + + +class IntegrationHealthCheckStatus(TypedDict): + status: Literal["healthy", "unhealthy"] + error_message: Optional[str] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py new file mode 100644 index 00000000..79d4eded --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog.py @@ -0,0 +1,29 @@ +from enum import Enum +from typing import Optional, TypedDict + + +class DataDogStatus(str, Enum): + INFO = "info" + WARN = "warning" + ERROR = "error" + + +class DatadogPayload(TypedDict, total=False): + ddsource: str + ddtags: str + hostname: str + message: str + service: str + status: str + + +class DD_ERRORS(Enum): + DATADOG_413_ERROR = "Datadog API Error - Payload too large (batch is above 5MB uncompressed). If you want this logged either disable request/response logging or set `DD_BATCH_SIZE=50`" + + +class DatadogProxyFailureHookJsonMessage(TypedDict, total=False): + exception: str + error_class: str + status_code: Optional[int] + traceback: str + user_api_key_dict: dict diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py new file mode 100644 index 00000000..9298b157 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/datadog_llm_obs.py @@ -0,0 +1,54 @@ +""" +Payloads for Datadog LLM Observability Service (LLMObs) + +API Reference: https://docs.datadoghq.com/llm_observability/setup/api/?tab=example#api-standards +""" + +from typing import Any, Dict, List, Literal, Optional, TypedDict + + +class InputMeta(TypedDict): + messages: List[Any] + + +class OutputMeta(TypedDict): + messages: List[Any] + + +class Meta(TypedDict): + # The span kind: "agent", "workflow", "llm", "tool", "task", "embedding", or "retrieval". + kind: Literal["llm", "tool", "task", "embedding", "retrieval"] + input: InputMeta # The span’s input information. + output: OutputMeta # The span’s output information. + metadata: Dict[str, Any] + + +class LLMMetrics(TypedDict, total=False): + input_tokens: float + output_tokens: float + total_tokens: float + time_to_first_token: float + time_per_output_token: float + + +class LLMObsPayload(TypedDict): + parent_id: str + trace_id: str + span_id: str + name: str + meta: Meta + start_ns: int + duration: int + metrics: LLMMetrics + tags: List + + +class DDSpanAttributes(TypedDict): + ml_app: str + tags: List[str] + spans: List[LLMObsPayload] + + +class DDIntakePayload(TypedDict): + type: str + attributes: DDSpanAttributes diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py new file mode 100644 index 00000000..a4fd8a6a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/gcs_bucket.py @@ -0,0 +1,28 @@ +from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict + +from litellm.types.utils import StandardLoggingPayload + +if TYPE_CHECKING: + from litellm.llms.vertex_ai.vertex_llm_base import VertexBase +else: + VertexBase = Any + + +class GCSLoggingConfig(TypedDict): + """ + Internal LiteLLM Config for GCS Bucket logging + """ + + bucket_name: str + vertex_instance: VertexBase + path_service_account: Optional[str] + + +class GCSLogQueueItem(TypedDict): + """ + Internal Type, used for queueing logs to be sent to GCS Bucket + """ + + payload: StandardLoggingPayload + kwargs: Dict[str, Any] + response_obj: Optional[Any] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py new file mode 100644 index 00000000..ecf42d8c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langfuse.py @@ -0,0 +1,7 @@ +from typing import Optional, TypedDict + + +class LangfuseLoggingConfig(TypedDict): + langfuse_secret: Optional[str] + langfuse_public_key: Optional[str] + langfuse_host: Optional[str] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py new file mode 100644 index 00000000..48c8e2e0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/langsmith.py @@ -0,0 +1,61 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, List, NamedTuple, Optional, TypedDict + +from pydantic import BaseModel + + +class LangsmithInputs(BaseModel): + model: Optional[str] = None + messages: Optional[List[Any]] = None + stream: Optional[bool] = None + call_type: Optional[str] = None + litellm_call_id: Optional[str] = None + completion_start_time: Optional[datetime] = None + temperature: Optional[float] = None + max_tokens: Optional[int] = None + custom_llm_provider: Optional[str] = None + input: Optional[List[Any]] = None + log_event_type: Optional[str] = None + original_response: Optional[Any] = None + response_cost: Optional[float] = None + + # LiteLLM Virtual Key specific fields + user_api_key: Optional[str] = None + user_api_key_user_id: Optional[str] = None + user_api_key_team_alias: Optional[str] = None + + +class LangsmithCredentialsObject(TypedDict): + LANGSMITH_API_KEY: str + LANGSMITH_PROJECT: str + LANGSMITH_BASE_URL: str + + +class LangsmithQueueObject(TypedDict): + """ + Langsmith Queue Object - this is what gets stored in the internal system queue before flushing to Langsmith + + We need to store: + - data[Dict] - data that should get logged on langsmith + - credentials[LangsmithCredentialsObject] - credentials to use for logging to langsmith + """ + + data: Dict + credentials: LangsmithCredentialsObject + + +class CredentialsKey(NamedTuple): + """Immutable key for grouping credentials""" + + api_key: str + project: str + base_url: str + + +@dataclass +class BatchGroup: + """Groups credentials with their associated queue objects""" + + credentials: LangsmithCredentialsObject + queue_objects: List[LangsmithQueueObject] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py new file mode 100644 index 00000000..22fd1665 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/pagerduty.py @@ -0,0 +1,62 @@ +from datetime import datetime +from typing import List, Literal, Optional, TypedDict, Union + +from litellm.types.utils import StandardLoggingUserAPIKeyMetadata + + +class LinkDict(TypedDict, total=False): + href: str + text: Optional[str] + + +class ImageDict(TypedDict, total=False): + src: str + href: Optional[str] + alt: Optional[str] + + +class PagerDutyPayload(TypedDict, total=False): + summary: str + timestamp: Optional[str] # ISO 8601 date-time format + severity: Literal["critical", "warning", "error", "info"] + source: str + component: Optional[str] + group: Optional[str] + class_: Optional[str] # Using class_ since 'class' is a reserved keyword + custom_details: Optional[dict] + + +class PagerDutyRequestBody(TypedDict, total=False): + payload: PagerDutyPayload + routing_key: str + event_action: Literal["trigger", "acknowledge", "resolve"] + dedup_key: Optional[str] + client: Optional[str] + client_url: Optional[str] + links: Optional[List[LinkDict]] + images: Optional[List[ImageDict]] + + +class AlertingConfig(TypedDict, total=False): + """ + Config for alerting thresholds + """ + + # Requests failing threshold + failure_threshold: int # Number of requests failing in a window + failure_threshold_window_seconds: int # Window in seconds + + # Requests hanging threshold + hanging_threshold_seconds: float # Number of seconds of waiting for a response before a request is considered hanging + hanging_threshold_fails: int # Number of requests hanging in a window + hanging_threshold_window_seconds: int # Window in seconds + + +class PagerDutyInternalEvent(StandardLoggingUserAPIKeyMetadata, total=False): + """Simple structure to hold timestamp and error info.""" + + failure_event_type: Literal["failed_response", "hanging_response"] + timestamp: datetime + error_class: Optional[str] + error_code: Optional[str] + error_llm_provider: Optional[str] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py new file mode 100644 index 00000000..8fdcce4c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/prometheus.py @@ -0,0 +1,294 @@ +from enum import Enum +from typing import Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, Field +from typing_extensions import Annotated + +import litellm + +REQUESTED_MODEL = "requested_model" +EXCEPTION_STATUS = "exception_status" +EXCEPTION_CLASS = "exception_class" +STATUS_CODE = "status_code" +EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS] +LATENCY_BUCKETS = ( + 0.005, + 0.00625, + 0.0125, + 0.025, + 0.05, + 0.1, + 0.5, + 1.0, + 1.5, + 2.0, + 2.5, + 3.0, + 3.5, + 4.0, + 4.5, + 5.0, + 5.5, + 6.0, + 6.5, + 7.0, + 7.5, + 8.0, + 8.5, + 9.0, + 9.5, + 10.0, + 15.0, + 20.0, + 25.0, + 30.0, + 60.0, + 120.0, + 180.0, + 240.0, + 300.0, + float("inf"), +) + + +class UserAPIKeyLabelNames(Enum): + END_USER = "end_user" + USER = "user" + USER_EMAIL = "user_email" + API_KEY_HASH = "hashed_api_key" + API_KEY_ALIAS = "api_key_alias" + TEAM = "team" + TEAM_ALIAS = "team_alias" + REQUESTED_MODEL = REQUESTED_MODEL + v1_LITELLM_MODEL_NAME = "model" + v2_LITELLM_MODEL_NAME = "litellm_model_name" + TAG = "tag" + MODEL_ID = "model_id" + API_BASE = "api_base" + API_PROVIDER = "api_provider" + EXCEPTION_STATUS = EXCEPTION_STATUS + EXCEPTION_CLASS = EXCEPTION_CLASS + STATUS_CODE = "status_code" + FALLBACK_MODEL = "fallback_model" + + +DEFINED_PROMETHEUS_METRICS = Literal[ + "litellm_llm_api_latency_metric", + "litellm_request_total_latency_metric", + "litellm_proxy_total_requests_metric", + "litellm_proxy_failed_requests_metric", + "litellm_deployment_latency_per_output_token", + "litellm_requests_metric", + "litellm_input_tokens_metric", + "litellm_output_tokens_metric", + "litellm_deployment_successful_fallbacks", + "litellm_deployment_failed_fallbacks", + "litellm_remaining_team_budget_metric", + "litellm_team_max_budget_metric", + "litellm_team_budget_remaining_hours_metric", + "litellm_remaining_api_key_budget_metric", + "litellm_api_key_max_budget_metric", + "litellm_api_key_budget_remaining_hours_metric", +] + + +class PrometheusMetricLabels: + litellm_llm_api_latency_metric = [ + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.USER.value, + ] + + litellm_request_total_latency_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + ] + + litellm_proxy_total_requests_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.STATUS_CODE.value, + UserAPIKeyLabelNames.USER_EMAIL.value, + ] + + litellm_proxy_failed_requests_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.EXCEPTION_STATUS.value, + UserAPIKeyLabelNames.EXCEPTION_CLASS.value, + ] + + litellm_deployment_latency_per_output_token = [ + UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.MODEL_ID.value, + UserAPIKeyLabelNames.API_BASE.value, + UserAPIKeyLabelNames.API_PROVIDER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + ] + + litellm_requests_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.USER_EMAIL.value, + ] + + litellm_input_tokens_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + ] + + litellm_output_tokens_metric = [ + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + ] + + litellm_deployment_successful_fallbacks = [ + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.FALLBACK_MODEL.value, + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.EXCEPTION_STATUS.value, + UserAPIKeyLabelNames.EXCEPTION_CLASS.value, + ] + + litellm_deployment_failed_fallbacks = litellm_deployment_successful_fallbacks + + litellm_remaining_team_budget_metric = [ + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + ] + + litellm_team_max_budget_metric = [ + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + ] + + litellm_team_budget_remaining_hours_metric = [ + UserAPIKeyLabelNames.TEAM.value, + UserAPIKeyLabelNames.TEAM_ALIAS.value, + ] + + litellm_remaining_api_key_budget_metric = [ + UserAPIKeyLabelNames.API_KEY_HASH.value, + UserAPIKeyLabelNames.API_KEY_ALIAS.value, + ] + + litellm_api_key_max_budget_metric = litellm_remaining_api_key_budget_metric + + litellm_api_key_budget_remaining_hours_metric = ( + litellm_remaining_api_key_budget_metric + ) + + @staticmethod + def get_labels(label_name: DEFINED_PROMETHEUS_METRICS) -> List[str]: + default_labels = getattr(PrometheusMetricLabels, label_name) + return default_labels + [ + metric.replace(".", "_") + for metric in litellm.custom_prometheus_metadata_labels + ] + + +from typing import List, Optional + +from pydantic import BaseModel, Field + + +class UserAPIKeyLabelValues(BaseModel): + end_user: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.END_USER.value) + ] = None + user: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER.value) + ] = None + user_email: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.USER_EMAIL.value) + ] = None + hashed_api_key: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_HASH.value) + ] = None + api_key_alias: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_KEY_ALIAS.value) + ] = None + team: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM.value) + ] = None + team_alias: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.TEAM_ALIAS.value) + ] = None + requested_model: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.REQUESTED_MODEL.value) + ] = None + model: Annotated[ + Optional[str], + Field(..., alias=UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value), + ] = None + litellm_model_name: Annotated[ + Optional[str], + Field(..., alias=UserAPIKeyLabelNames.v2_LITELLM_MODEL_NAME.value), + ] = None + tags: List[str] = [] + custom_metadata_labels: Dict[str, str] = {} + model_id: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.MODEL_ID.value) + ] = None + api_base: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_BASE.value) + ] = None + api_provider: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.API_PROVIDER.value) + ] = None + exception_status: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_STATUS.value) + ] = None + exception_class: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.EXCEPTION_CLASS.value) + ] = None + status_code: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.STATUS_CODE.value) + ] = None + fallback_model: Annotated[ + Optional[str], Field(..., alias=UserAPIKeyLabelNames.FALLBACK_MODEL.value) + ] = None diff --git a/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py new file mode 100644 index 00000000..9019b098 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/integrations/slack_alerting.py @@ -0,0 +1,186 @@ +import os +from datetime import datetime as dt +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Set, TypedDict + +from pydantic import BaseModel, Field + +from litellm.types.utils import LiteLLMPydanticObjectBase + + +class BaseOutageModel(TypedDict): + alerts: List[int] + minor_alert_sent: bool + major_alert_sent: bool + last_updated_at: float + + +class OutageModel(BaseOutageModel): + model_id: str + + +class ProviderRegionOutageModel(BaseOutageModel): + provider_region_id: str + deployment_ids: Set[str] + + +# we use this for the email header, please send a test email if you change this. verify it looks good on email +LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" +LITELLM_SUPPORT_CONTACT = "support@berri.ai" + + +class SlackAlertingArgsEnum(Enum): + daily_report_frequency = 12 * 60 * 60 + report_check_interval = 5 * 60 + budget_alert_ttl = 24 * 60 * 60 + outage_alert_ttl = 1 * 60 + region_outage_alert_ttl = 1 * 60 + minor_outage_alert_threshold = 1 * 5 + major_outage_alert_threshold = 1 * 10 + max_outage_alert_list_size = 1 * 10 + + +class SlackAlertingArgs(LiteLLMPydanticObjectBase): + daily_report_frequency: int = Field( + default=int( + os.getenv( + "SLACK_DAILY_REPORT_FREQUENCY", + int(SlackAlertingArgsEnum.daily_report_frequency.value), + ) + ), + description="Frequency of receiving deployment latency/failure reports. Default is 12hours. Value is in seconds.", + ) + report_check_interval: int = Field( + default=SlackAlertingArgsEnum.report_check_interval.value, + description="Frequency of checking cache if report should be sent. Background process. Default is once per hour. Value is in seconds.", + ) # 5 minutes + budget_alert_ttl: int = Field( + default=SlackAlertingArgsEnum.budget_alert_ttl.value, + description="Cache ttl for budgets alerts. Prevents spamming same alert, each time budget is crossed. Value is in seconds.", + ) # 24 hours + outage_alert_ttl: int = Field( + default=SlackAlertingArgsEnum.outage_alert_ttl.value, + description="Cache ttl for model outage alerts. Sets time-window for errors. Default is 1 minute. Value is in seconds.", + ) # 1 minute ttl + region_outage_alert_ttl: int = Field( + default=SlackAlertingArgsEnum.region_outage_alert_ttl.value, + description="Cache ttl for provider-region based outage alerts. Alert sent if 2+ models in same region report errors. Sets time-window for errors. Default is 1 minute. Value is in seconds.", + ) # 1 minute ttl + minor_outage_alert_threshold: int = Field( + default=SlackAlertingArgsEnum.minor_outage_alert_threshold.value, + description="The number of errors that count as a model/region minor outage. ('400' error code is not counted).", + ) + major_outage_alert_threshold: int = Field( + default=SlackAlertingArgsEnum.major_outage_alert_threshold.value, + description="The number of errors that countas a model/region major outage. ('400' error code is not counted).", + ) + max_outage_alert_list_size: int = Field( + default=SlackAlertingArgsEnum.max_outage_alert_list_size.value, + description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.", + ) # prevent memory leak + log_to_console: bool = Field( + default=False, + description="If true, the alerting payload will be printed to the console.", + ) + + +class DeploymentMetrics(LiteLLMPydanticObjectBase): + """ + Metrics per deployment, stored in cache + + Used for daily reporting + """ + + id: str + """id of deployment in router model list""" + + failed_request: bool + """did it fail the request?""" + + latency_per_output_token: Optional[float] + """latency/output token of deployment""" + + updated_at: dt + """Current time of deployment being updated""" + + +class SlackAlertingCacheKeys(Enum): + """ + Enum for deployment daily metrics keys - {deployment_id}:{enum} + """ + + failed_requests_key = "failed_requests_daily_metrics" + latency_key = "latency_daily_metrics" + report_sent_key = "daily_metrics_report_sent" + + +class AlertType(str, Enum): + """ + Enum for alert types and management event types + """ + + # LLM-related alerts + llm_exceptions = "llm_exceptions" + llm_too_slow = "llm_too_slow" + llm_requests_hanging = "llm_requests_hanging" + + # Budget and spend alerts + budget_alerts = "budget_alerts" + spend_reports = "spend_reports" + failed_tracking_spend = "failed_tracking_spend" + + # Database alerts + db_exceptions = "db_exceptions" + + # Report alerts + daily_reports = "daily_reports" + + # Deployment alerts + cooldown_deployment = "cooldown_deployment" + new_model_added = "new_model_added" + + # Outage alerts + outage_alerts = "outage_alerts" + region_outage_alerts = "region_outage_alerts" + + # Fallback alerts + fallback_reports = "fallback_reports" + + # Virtual Key Events + new_virtual_key_created = "new_virtual_key_created" + virtual_key_updated = "virtual_key_updated" + virtual_key_deleted = "virtual_key_deleted" + + # Team Events + new_team_created = "new_team_created" + team_updated = "team_updated" + team_deleted = "team_deleted" + + # Internal User Events + new_internal_user_created = "new_internal_user_created" + internal_user_updated = "internal_user_updated" + internal_user_deleted = "internal_user_deleted" + + +DEFAULT_ALERT_TYPES: List[AlertType] = [ + # LLM related alerts + AlertType.llm_exceptions, + AlertType.llm_too_slow, + AlertType.llm_requests_hanging, + # Budget and spend alerts + AlertType.budget_alerts, + AlertType.spend_reports, + AlertType.failed_tracking_spend, + # Database alerts + AlertType.db_exceptions, + # Report alerts + AlertType.daily_reports, + # Deployment alerts + AlertType.cooldown_deployment, + AlertType.new_model_added, + # Outage alerts + AlertType.outage_alerts, + AlertType.region_outage_alerts, + # Fallback alerts + AlertType.fallback_reports, +] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py new file mode 100644 index 00000000..367b2421 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/anthropic.py @@ -0,0 +1,366 @@ +from typing import Any, Dict, Iterable, List, Optional, Union + +from pydantic import BaseModel, validator +from typing_extensions import Literal, Required, TypedDict + +from .openai import ChatCompletionCachedContent, ChatCompletionThinkingBlock + + +class AnthropicMessagesToolChoice(TypedDict, total=False): + type: Required[Literal["auto", "any", "tool"]] + name: str + disable_parallel_tool_use: bool # default is false + + +class AnthropicInputSchema(TypedDict, total=False): + type: Optional[str] + properties: Optional[dict] + additionalProperties: Optional[bool] + + +class AnthropicMessagesTool(TypedDict, total=False): + name: Required[str] + description: str + input_schema: Optional[AnthropicInputSchema] + type: Literal["custom"] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +class AnthropicComputerTool(TypedDict, total=False): + display_width_px: Required[int] + display_height_px: Required[int] + display_number: int + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + type: Required[str] + name: Required[str] + + +class AnthropicHostedTools(TypedDict, total=False): # for bash_tool and text_editor + type: Required[str] + name: Required[str] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +AllAnthropicToolsValues = Union[ + AnthropicComputerTool, AnthropicHostedTools, AnthropicMessagesTool +] + + +class AnthropicMessagesTextParam(TypedDict, total=False): + type: Required[Literal["text"]] + text: Required[str] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +class AnthropicMessagesToolUseParam(TypedDict): + type: Required[Literal["tool_use"]] + id: str + name: str + input: dict + + +AnthropicMessagesAssistantMessageValues = Union[ + AnthropicMessagesTextParam, + AnthropicMessagesToolUseParam, + ChatCompletionThinkingBlock, +] + + +class AnthopicMessagesAssistantMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[AnthropicMessagesAssistantMessageValues]]] + """The contents of the system message.""" + + role: Required[Literal["assistant"]] + """The role of the messages author, in this case `author`.""" + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the same + role. + """ + + +class AnthropicContentParamSource(TypedDict): + type: Literal["base64"] + media_type: str + data: str + + +class AnthropicMessagesImageParam(TypedDict, total=False): + type: Required[Literal["image"]] + source: Required[AnthropicContentParamSource] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +class CitationsObject(TypedDict): + enabled: bool + + +class AnthropicMessagesDocumentParam(TypedDict, total=False): + type: Required[Literal["document"]] + source: Required[AnthropicContentParamSource] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + title: str + context: str + citations: Optional[CitationsObject] + + +class AnthropicMessagesToolResultContent(TypedDict): + type: Literal["text"] + text: str + + +class AnthropicMessagesToolResultParam(TypedDict, total=False): + type: Required[Literal["tool_result"]] + tool_use_id: Required[str] + is_error: bool + content: Union[ + str, + Iterable[ + Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam] + ], + ] + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +AnthropicMessagesUserMessageValues = Union[ + AnthropicMessagesTextParam, + AnthropicMessagesImageParam, + AnthropicMessagesToolResultParam, + AnthropicMessagesDocumentParam, +] + + +class AnthropicMessagesUserMessageParam(TypedDict, total=False): + role: Required[Literal["user"]] + content: Required[Union[str, Iterable[AnthropicMessagesUserMessageValues]]] + + +class AnthropicMetadata(TypedDict, total=False): + user_id: str + + +class AnthropicSystemMessageContent(TypedDict, total=False): + type: str + text: str + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +AllAnthropicMessageValues = Union[ + AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam +] + + +class AnthropicMessageRequestBase(TypedDict, total=False): + messages: Required[List[AllAnthropicMessageValues]] + max_tokens: Required[int] + metadata: AnthropicMetadata + stop_sequences: List[str] + stream: bool + system: Union[str, List] + temperature: float + tool_choice: AnthropicMessagesToolChoice + tools: List[AllAnthropicToolsValues] + top_k: int + top_p: float + + +class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False): + model: Required[str] + # litellm param - used for tracking litellm proxy metadata in the request + litellm_metadata: dict + + +class ContentTextBlockDelta(TypedDict): + """ + 'delta': {'type': 'text_delta', 'text': 'Hello'} + """ + + type: str + text: str + + +class ContentCitationsBlockDelta(TypedDict): + type: Literal["citations"] + citation: dict + + +class ContentJsonBlockDelta(TypedDict): + """ + "delta": {"type": "input_json_delta","partial_json": "{\"location\": \"San Fra"}} + """ + + type: str + partial_json: str + + +class ContentBlockDelta(TypedDict): + type: Literal["content_block_delta"] + index: int + delta: Union[ + ContentTextBlockDelta, ContentJsonBlockDelta, ContentCitationsBlockDelta + ] + + +class ContentBlockStop(TypedDict): + type: Literal["content_block_stop"] + index: int + + +class ToolUseBlock(TypedDict): + """ + "content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}} + """ + + id: str + + input: dict + + name: str + + type: Literal["tool_use"] + + +class TextBlock(TypedDict): + text: str + + type: Literal["text"] + + +class ContentBlockStart(TypedDict): + """ + event: content_block_start + data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}} + """ + + type: str + index: int + content_block: Union[ToolUseBlock, TextBlock] + + +class MessageDelta(TypedDict, total=False): + stop_reason: Optional[str] + + +class UsageDelta(TypedDict, total=False): + input_tokens: int + output_tokens: int + + +class MessageBlockDelta(TypedDict): + """ + Anthropic + chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}} + """ + + type: Literal["message_delta"] + delta: MessageDelta + usage: UsageDelta + + +class MessageChunk(TypedDict, total=False): + id: str + type: str + role: str + model: str + content: List + stop_reason: Optional[str] + stop_sequence: Optional[str] + usage: UsageDelta + + +class MessageStartBlock(TypedDict): + """ + Anthropic + chunk = { + "type": "message_start", + "message": { + "id": "msg_vrtx_011PqREFEMzd3REdCoUFAmdG", + "type": "message", + "role": "assistant", + "model": "claude-3-sonnet-20240229", + "content": [], + "stop_reason": null, + "stop_sequence": null, + "usage": { + "input_tokens": 270, + "output_tokens": 1 + } + } + } + """ + + type: Literal["message_start"] + message: MessageChunk + + +class AnthropicResponseContentBlockText(BaseModel): + type: Literal["text"] + text: str + + +class AnthropicResponseContentBlockToolUse(BaseModel): + type: Literal["tool_use"] + id: str + name: str + input: dict + + +class AnthropicResponseUsageBlock(BaseModel): + input_tokens: int + output_tokens: int + + +AnthropicFinishReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"] + + +class AnthropicResponse(BaseModel): + id: str + """Unique object identifier.""" + + type: Literal["message"] + """For Messages, this is always "message".""" + + role: Literal["assistant"] + """Conversational role of the generated message. This will always be "assistant".""" + + content: List[ + Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] + ] + """Content generated by the model.""" + + model: str + """The model that handled the request.""" + + stop_reason: Optional[AnthropicFinishReason] + """The reason that we stopped.""" + + stop_sequence: Optional[str] + """Which custom stop sequence was generated, if any.""" + + usage: AnthropicResponseUsageBlock + """Billing and rate-limit usage.""" + + +from .openai import ChatCompletionUsageBlock + + +class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False): + cache_creation_input_tokens: int + cache_read_input_tokens: int + + +ANTHROPIC_API_HEADERS = { + "anthropic-version", + "anthropic-beta", +} + +ANTHROPIC_API_ONLY_HEADERS = { # fails if calling anthropic on vertex ai / bedrock + "anthropic-beta", +} + + +class AnthropicThinkingParam(TypedDict, total=False): + type: Literal["enabled"] + budget_tokens: int diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py new file mode 100644 index 00000000..2d597aef --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/azure_ai.py @@ -0,0 +1,17 @@ +from typing import Any, Dict, Iterable, List, Literal, Optional, Union + +from typing_extensions import Required, TypedDict + + +class ImageEmbeddingInput(TypedDict, total=False): + image: Required[str] + text: str + + +EncodingFormat = Literal["base64", "binary", "float", "int8", "ubinary", "uint8"] + + +class ImageEmbeddingRequest(TypedDict, total=False): + input: Required[List[ImageEmbeddingInput]] + dimensions: int + encoding_format: EncodingFormat diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py new file mode 100644 index 00000000..57fb04c8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/bedrock.py @@ -0,0 +1,503 @@ +import json +from typing import Any, List, Literal, Optional, TypedDict, Union + +from typing_extensions import ( + TYPE_CHECKING, + Protocol, + Required, + Self, + TypeGuard, + get_origin, + override, + runtime_checkable, +) + +from .openai import ChatCompletionToolCallChunk + + +class CachePointBlock(TypedDict, total=False): + type: Literal["default"] + + +class SystemContentBlock(TypedDict, total=False): + text: str + cachePoint: CachePointBlock + + +class SourceBlock(TypedDict): + bytes: Optional[str] # base 64 encoded string + + +BedrockImageTypes = Literal["png", "jpeg", "gif", "webp"] + + +class ImageBlock(TypedDict): + format: Union[BedrockImageTypes, str] + source: SourceBlock + + +BedrockDocumentTypes = Literal[ + "pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md" +] + + +class DocumentBlock(TypedDict): + format: Union[BedrockDocumentTypes, str] + source: SourceBlock + name: str + + +class ToolResultContentBlock(TypedDict, total=False): + image: ImageBlock + document: DocumentBlock + json: dict + text: str + + +class ToolResultBlock(TypedDict, total=False): + content: Required[List[ToolResultContentBlock]] + toolUseId: Required[str] + status: Literal["success", "error"] + + +class ToolUseBlock(TypedDict): + input: dict + name: str + toolUseId: str + + +class BedrockConverseReasoningTextBlock(TypedDict, total=False): + text: Required[str] + signature: str + + +class BedrockConverseReasoningContentBlock(TypedDict, total=False): + reasoningText: BedrockConverseReasoningTextBlock + redactedContent: str + + +class BedrockConverseReasoningContentBlockDelta(TypedDict, total=False): + signature: str + redactedContent: str + text: str + + +class ContentBlock(TypedDict, total=False): + text: str + image: ImageBlock + document: DocumentBlock + toolResult: ToolResultBlock + toolUse: ToolUseBlock + cachePoint: CachePointBlock + reasoningContent: BedrockConverseReasoningContentBlock + + +class MessageBlock(TypedDict): + content: List[ContentBlock] + role: Literal["user", "assistant"] + + +class ConverseMetricsBlock(TypedDict): + latencyMs: float # time in ms + + +class ConverseResponseOutputBlock(TypedDict): + message: Optional[MessageBlock] + + +class ConverseTokenUsageBlock(TypedDict): + inputTokens: int + outputTokens: int + totalTokens: int + cacheReadInputTokenCount: int + cacheReadInputTokens: int + cacheWriteInputTokenCount: int + cacheWriteInputTokens: int + + +class ConverseResponseBlock(TypedDict): + additionalModelResponseFields: dict + metrics: ConverseMetricsBlock + output: ConverseResponseOutputBlock + stopReason: ( + str # end_turn | tool_use | max_tokens | stop_sequence | content_filtered + ) + usage: ConverseTokenUsageBlock + + +class ToolInputSchemaBlock(TypedDict): + json: Optional[dict] + + +class ToolSpecBlock(TypedDict, total=False): + inputSchema: Required[ToolInputSchemaBlock] + name: Required[str] + description: str + + +class ToolBlock(TypedDict): + toolSpec: Optional[ToolSpecBlock] + + +class SpecificToolChoiceBlock(TypedDict): + name: str + + +class ToolChoiceValuesBlock(TypedDict, total=False): + any: dict + auto: dict + tool: SpecificToolChoiceBlock + + +class ToolConfigBlock(TypedDict, total=False): + tools: Required[List[ToolBlock]] + toolChoice: Union[str, ToolChoiceValuesBlock] + + +class GuardrailConfigBlock(TypedDict, total=False): + guardrailIdentifier: str + guardrailVersion: str + trace: Literal["enabled", "disabled"] + + +class InferenceConfig(TypedDict, total=False): + maxTokens: int + stopSequences: List[str] + temperature: float + topP: float + topK: int + + +class ToolBlockDeltaEvent(TypedDict): + input: str + + +class ToolUseBlockStartEvent(TypedDict): + name: str + toolUseId: str + + +class ContentBlockStartEvent(TypedDict, total=False): + toolUse: Optional[ToolUseBlockStartEvent] + + +class ContentBlockDeltaEvent(TypedDict, total=False): + """ + Either 'text' or 'toolUse' will be specified for Converse API streaming response. + """ + + text: str + toolUse: ToolBlockDeltaEvent + reasoningContent: BedrockConverseReasoningContentBlockDelta + + +class CommonRequestObject( + TypedDict, total=False +): # common request object across sync + async flows + additionalModelRequestFields: dict + additionalModelResponseFieldPaths: List[str] + inferenceConfig: InferenceConfig + system: List[SystemContentBlock] + toolConfig: ToolConfigBlock + guardrailConfig: Optional[GuardrailConfigBlock] + + +class RequestObject(CommonRequestObject, total=False): + messages: Required[List[MessageBlock]] + + +class BedrockInvokeNovaRequest(TypedDict, total=False): + """ + Request object for sending `nova` requests to `/bedrock/invoke/` + """ + + messages: List[MessageBlock] + inferenceConfig: InferenceConfig + system: List[SystemContentBlock] + toolConfig: ToolConfigBlock + guardrailConfig: Optional[GuardrailConfigBlock] + + +class GenericStreamingChunk(TypedDict): + text: Required[str] + tool_use: Optional[ChatCompletionToolCallChunk] + is_finished: Required[bool] + finish_reason: Required[str] + usage: Optional[ConverseTokenUsageBlock] + index: int + + +class Document(TypedDict): + title: str + snippet: str + + +class ServerSentEvent: + def __init__( + self, + *, + event: Optional[str] = None, + data: Optional[str] = None, + id: Optional[str] = None, + retry: Optional[int] = None, + ) -> None: + if data is None: + data = "" + + self._id = id + self._data = data + self._event = event or None + self._retry = retry + + @property + def event(self) -> Optional[str]: + return self._event + + @property + def id(self) -> Optional[str]: + return self._id + + @property + def retry(self) -> Optional[int]: + return self._retry + + @property + def data(self) -> str: + return self._data + + def json(self) -> Any: + return json.loads(self.data) + + @override + def __repr__(self) -> str: + return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})" + + +COHERE_EMBEDDING_INPUT_TYPES = Literal[ + "search_document", "search_query", "classification", "clustering", "image" +] + + +class CohereEmbeddingRequest(TypedDict, total=False): + texts: List[str] + images: List[str] + input_type: Required[COHERE_EMBEDDING_INPUT_TYPES] + truncate: Literal["NONE", "START", "END"] + embedding_types: Literal["float", "int8", "uint8", "binary", "ubinary"] + + +class CohereEmbeddingRequestWithModel(CohereEmbeddingRequest): + model: Required[str] + + +class CohereEmbeddingResponse(TypedDict): + embeddings: List[List[float]] + id: str + response_type: Literal["embedding_floats"] + texts: List[str] + + +class AmazonTitanV2EmbeddingRequest(TypedDict): + inputText: str + dimensions: int + normalize: bool + + +class AmazonTitanV2EmbeddingResponse(TypedDict): + embedding: List[float] + inputTextTokenCount: int + + +class AmazonTitanG1EmbeddingRequest(TypedDict): + inputText: str + + +class AmazonTitanG1EmbeddingResponse(TypedDict): + embedding: List[float] + inputTextTokenCount: int + + +class AmazonTitanMultimodalEmbeddingConfig(TypedDict): + outputEmbeddingLength: Literal[256, 384, 1024] + + +class AmazonTitanMultimodalEmbeddingRequest(TypedDict, total=False): + inputText: str + inputImage: str + embeddingConfig: AmazonTitanMultimodalEmbeddingConfig + + +class AmazonTitanMultimodalEmbeddingResponse(TypedDict): + embedding: List[float] + inputTextTokenCount: int + message: str # Specifies any errors that occur during generation. + + +AmazonEmbeddingRequest = Union[ + AmazonTitanMultimodalEmbeddingRequest, + AmazonTitanV2EmbeddingRequest, + AmazonTitanG1EmbeddingRequest, +] + + +class AmazonStability3TextToImageRequest(TypedDict, total=False): + """ + Request for Amazon Stability 3 Text to Image API + + Ref here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html + """ + + prompt: str + aspect_ratio: Literal[ + "16:9", "1:1", "21:9", "2:3", "3:2", "4:5", "5:4", "9:16", "9:21" + ] + mode: Literal["image-to-image", "text-to-image"] + output_format: Literal["JPEG", "PNG"] + seed: int + negative_prompt: str + + +class AmazonStability3TextToImageResponse(TypedDict, total=False): + """ + Response for Amazon Stability 3 Text to Image API + + Ref: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-diffusion-3-text-image.html + """ + + images: List[str] + seeds: List[str] + finish_reasons: List[str] + + +class AmazonNovaCanvasRequestBase(TypedDict, total=False): + """ + Base class for Amazon Nova Canvas API requests + """ + + pass + + +class AmazonNovaCanvasImageGenerationConfig(TypedDict, total=False): + """ + Config for Amazon Nova Canvas Text to Image API + + Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html + """ + + cfgScale: int + seed: int + quality: Literal["standard", "premium"] + width: int + height: int + numberOfImages: int + + +class AmazonNovaCanvasTextToImageParams(TypedDict, total=False): + """ + Params for Amazon Nova Canvas Text to Image API + """ + + text: str + negativeText: str + controlStrength: float + controlMode: Literal["CANNY_EDIT", "SEGMENTATION"] + conditionImage: str + + +class AmazonNovaCanvasTextToImageRequest( + AmazonNovaCanvasRequestBase, TypedDict, total=False +): + """ + Request for Amazon Nova Canvas Text to Image API + + Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html + """ + + textToImageParams: AmazonNovaCanvasTextToImageParams + taskType: Literal["TEXT_IMAGE"] + imageGenerationConfig: AmazonNovaCanvasImageGenerationConfig + + +class AmazonNovaCanvasTextToImageResponse(TypedDict, total=False): + """ + Response for Amazon Nova Canvas Text to Image API + + Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html + """ + + images: List[str] + + +if TYPE_CHECKING: + from botocore.awsrequest import AWSPreparedRequest +else: + AWSPreparedRequest = Any + +from pydantic import BaseModel + + +class BedrockPreparedRequest(TypedDict): + """ + Internal/Helper class for preparing the request for bedrock image generation + """ + + endpoint_url: str + prepped: AWSPreparedRequest + body: bytes + data: dict + + +class BedrockRerankTextQuery(TypedDict): + text: str + + +class BedrockRerankQuery(TypedDict): + textQuery: BedrockRerankTextQuery + type: Literal["TEXT"] + + +class BedrockRerankModelConfiguration(TypedDict, total=False): + modelArn: Required[str] + modelConfiguration: dict + + +class BedrockRerankBedrockRerankingConfiguration(TypedDict): + modelConfiguration: BedrockRerankModelConfiguration + numberOfResults: int + + +class BedrockRerankConfiguration(TypedDict): + bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration + type: Literal["BEDROCK_RERANKING_MODEL"] + + +class BedrockRerankTextDocument(TypedDict, total=False): + text: str + + +class BedrockRerankInlineDocumentSource(TypedDict, total=False): + jsonDocument: dict + textDocument: BedrockRerankTextDocument + type: Literal["TEXT", "JSON"] + + +class BedrockRerankSource(TypedDict): + inlineDocumentSource: BedrockRerankInlineDocumentSource + type: Literal["INLINE"] + + +class BedrockRerankRequest(TypedDict): + """ + Request for Bedrock Rerank API + """ + + queries: List[BedrockRerankQuery] + rerankingConfiguration: BedrockRerankConfiguration + sources: List[BedrockRerankSource] + + +class AmazonDeepSeekR1StreamingResponse(TypedDict): + generation: str + generation_token_count: int + stop_reason: Optional[str] + prompt_token_count: int diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py new file mode 100644 index 00000000..7112a242 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/cohere.py @@ -0,0 +1,46 @@ +from typing import Iterable, List, Optional, Union + +from typing_extensions import Literal, Required, TypedDict + + +class CallObject(TypedDict): + name: str + parameters: dict + + +class ToolResultObject(TypedDict): + call: CallObject + outputs: List[dict] + + +class ChatHistoryToolResult(TypedDict, total=False): + role: Required[Literal["TOOL"]] + tool_results: List[ToolResultObject] + + +class ToolCallObject(TypedDict): + name: str + parameters: dict + + +class ChatHistoryUser(TypedDict, total=False): + role: Required[Literal["USER"]] + message: str + tool_calls: List[ToolCallObject] + + +class ChatHistorySystem(TypedDict, total=False): + role: Required[Literal["SYSTEM"]] + message: str + tool_calls: List[ToolCallObject] + + +class ChatHistoryChatBot(TypedDict, total=False): + role: Required[Literal["CHATBOT"]] + message: str + tool_calls: List[ToolCallObject] + + +ChatHistory = List[ + Union[ChatHistorySystem, ChatHistoryChatBot, ChatHistoryUser, ChatHistoryToolResult] +] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py new file mode 100644 index 00000000..5eec187d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_http.py @@ -0,0 +1,24 @@ +import ssl +from enum import Enum +from typing import Union + + +class httpxSpecialProvider(str, Enum): + """ + Httpx Clients can be created for these litellm internal providers + + Example: + - langsmith logging would need a custom async httpx client + - pass through endpoint would need a custom async httpx client + """ + + LoggingCallback = "logging_callback" + GuardrailCallback = "guardrail_callback" + Caching = "caching" + Oauth2Check = "oauth2_check" + SecretManager = "secret_manager" + PassThroughEndpoint = "pass_through_endpoint" + PromptFactory = "prompt_factory" + + +VerifyTypes = Union[str, bool, ssl.SSLContext] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py new file mode 100644 index 00000000..d5499a41 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/custom_llm.py @@ -0,0 +1,10 @@ +from typing import List + +from typing_extensions import Dict, Required, TypedDict, override + +from litellm.llms.custom_llm import CustomLLM + + +class CustomLLMItem(TypedDict): + provider: str + custom_handler: CustomLLM diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py new file mode 100644 index 00000000..770e05fe --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/databricks.py @@ -0,0 +1,21 @@ +from typing import TypedDict, Any, Union, Optional +import json +from typing_extensions import ( + Self, + Protocol, + TypeGuard, + override, + get_origin, + runtime_checkable, + Required, +) +from pydantic import BaseModel + + +class GenericStreamingChunk(TypedDict, total=False): + text: Required[str] + is_finished: Required[bool] + finish_reason: Required[Optional[str]] + logprobs: Optional[BaseModel] + original_chunk: Optional[BaseModel] + usage: Optional[BaseModel] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py new file mode 100644 index 00000000..e9563a9a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/mistral.py @@ -0,0 +1,12 @@ +from typing import List, Literal, Optional, TypedDict, Union + + +class FunctionCall(TypedDict): + name: Optional[str] + arguments: Optional[Union[str, dict]] + + +class MistralToolCallMessage(TypedDict): + id: Optional[str] + type: Literal["function"] + function: Optional[FunctionCall] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py new file mode 100644 index 00000000..9d71904c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/ollama.py @@ -0,0 +1,29 @@ +import json +from typing import Any, List, Optional, TypedDict, Union + +from pydantic import BaseModel +from typing_extensions import ( + Protocol, + Required, + Self, + TypeGuard, + get_origin, + override, + runtime_checkable, +) + + +class OllamaToolCallFunction( + TypedDict +): # follows - https://github.com/ollama/ollama/blob/6bd8a4b0a1ac15d5718f52bbe1cd56f827beb694/api/types.go#L148 + name: str + arguments: dict + + +class OllamaToolCall(TypedDict): + function: OllamaToolCallFunction + + +class OllamaVisionModelObject(TypedDict): + prompt: str + images: List[str] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py new file mode 100644 index 00000000..4b0be9d5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/openai.py @@ -0,0 +1,1040 @@ +from enum import Enum +from os import PathLike +from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union + +import httpx +from openai._legacy_response import ( + HttpxBinaryResponseContent as _HttpxBinaryResponseContent, +) +from openai.lib.streaming._assistants import ( + AssistantEventHandler, + AssistantStreamManager, + AsyncAssistantEventHandler, + AsyncAssistantStreamManager, +) +from openai.pagination import AsyncCursorPage, SyncCursorPage +from openai.types import Batch, EmbeddingCreateParams, FileObject +from openai.types.beta.assistant import Assistant +from openai.types.beta.assistant_tool_param import AssistantToolParam +from openai.types.beta.thread_create_params import ( + Message as OpenAICreateThreadParamsMessage, +) +from openai.types.beta.threads.message import Message as OpenAIMessage +from openai.types.beta.threads.message_content import MessageContent +from openai.types.beta.threads.run import Run +from openai.types.chat import ChatCompletionChunk +from openai.types.chat.chat_completion_audio_param import ChatCompletionAudioParam +from openai.types.chat.chat_completion_content_part_input_audio_param import ( + ChatCompletionContentPartInputAudioParam, +) +from openai.types.chat.chat_completion_modality import ChatCompletionModality +from openai.types.chat.chat_completion_prediction_content_param import ( + ChatCompletionPredictionContentParam, +) +from openai.types.embedding import Embedding as OpenAIEmbedding +from openai.types.fine_tuning.fine_tuning_job import FineTuningJob +from openai.types.responses.response import ( + IncompleteDetails, + Response, + ResponseOutputItem, + ResponseTextConfig, + Tool, + ToolChoice, +) +from openai.types.responses.response_create_params import ( + Reasoning, + ResponseIncludable, + ResponseInputParam, + ResponseTextConfigParam, + ToolChoice, + ToolParam, +) +from pydantic import BaseModel, Discriminator, Field, PrivateAttr +from typing_extensions import Annotated, Dict, Required, TypedDict, override + +FileContent = Union[IO[bytes], bytes, PathLike] + +FileTypes = Union[ + # file (or bytes) + FileContent, + # (filename, file (or bytes)) + Tuple[Optional[str], FileContent], + # (filename, file (or bytes), content_type) + Tuple[Optional[str], FileContent, Optional[str]], + # (filename, file (or bytes), content_type, headers) + Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]], +] + + +EmbeddingInput = Union[str, List[str]] + + +class HttpxBinaryResponseContent(_HttpxBinaryResponseContent): + _hidden_params: dict = {} + pass + + +class NotGiven: + """ + A sentinel singleton class used to distinguish omitted keyword arguments + from those passed in with the value None (which may have different behavior). + + For example: + + ```py + def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: + ... + + + get(timeout=1) # 1s timeout + get(timeout=None) # No timeout + get() # Default timeout behavior, which may not be statically known at the method definition. + ``` + """ + + def __bool__(self) -> Literal[False]: + return False + + @override + def __repr__(self) -> str: + return "NOT_GIVEN" + + +NOT_GIVEN = NotGiven() + + +class ToolResourcesCodeInterpreter(TypedDict, total=False): + file_ids: List[str] + """ + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made + available to the `code_interpreter` tool. There can be a maximum of 20 files + associated with the tool. + """ + + +class ToolResourcesFileSearchVectorStore(TypedDict, total=False): + file_ids: List[str] + """ + A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to + add to the vector store. There can be a maximum of 10000 files in a vector + store. + """ + + metadata: object + """Set of 16 key-value pairs that can be attached to a vector store. + + This can be useful for storing additional information about the vector store in + a structured format. Keys can be a maximum of 64 characters long and values can + be a maxium of 512 characters long. + """ + + +class ToolResourcesFileSearch(TypedDict, total=False): + vector_store_ids: List[str] + """ + The + [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + attached to this thread. There can be a maximum of 1 vector store attached to + the thread. + """ + + vector_stores: Iterable[ToolResourcesFileSearchVectorStore] + """ + A helper to create a + [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) + with file_ids and attach it to this thread. There can be a maximum of 1 vector + store attached to the thread. + """ + + +class OpenAICreateThreadParamsToolResources(TypedDict, total=False): + code_interpreter: ToolResourcesCodeInterpreter + + file_search: ToolResourcesFileSearch + + +class FileSearchToolParam(TypedDict, total=False): + type: Required[Literal["file_search"]] + """The type of tool being defined: `file_search`""" + + +class CodeInterpreterToolParam(TypedDict, total=False): + type: Required[Literal["code_interpreter"]] + """The type of tool being defined: `code_interpreter`""" + + +AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam] + + +class Attachment(TypedDict, total=False): + file_id: str + """The ID of the file to attach to the message.""" + + tools: Iterable[AttachmentTool] + """The tools to add this file to.""" + + +class ImageFileObject(TypedDict): + file_id: Required[str] + detail: Optional[str] + + +class ImageURLObject(TypedDict): + url: Required[str] + detail: Optional[str] + + +class MessageContentTextObject(TypedDict): + type: Required[Literal["text"]] + text: str + + +class MessageContentImageFileObject(TypedDict): + type: Literal["image_file"] + image_file: ImageFileObject + + +class MessageContentImageURLObject(TypedDict): + type: Required[str] + image_url: ImageURLObject + + +class MessageData(TypedDict): + role: Literal["user", "assistant"] + content: Union[ + str, + List[ + Union[ + MessageContentTextObject, + MessageContentImageFileObject, + MessageContentImageURLObject, + ] + ], + ] + attachments: Optional[List[Attachment]] + metadata: Optional[dict] + + +class Thread(BaseModel): + id: str + """The identifier, which can be referenced in API endpoints.""" + + created_at: int + """The Unix timestamp (in seconds) for when the thread was created.""" + + metadata: Optional[object] = None + """Set of 16 key-value pairs that can be attached to an object. + + This can be useful for storing additional information about the object in a + structured format. Keys can be a maximum of 64 characters long and values can be + a maxium of 512 characters long. + """ + + object: Literal["thread"] + """The object type, which is always `thread`.""" + + +# OpenAI Files Types +class CreateFileRequest(TypedDict, total=False): + """ + CreateFileRequest + Used by Assistants API, Batches API, and Fine-Tunes API + + Required Params: + file: FileTypes + purpose: Literal['assistants', 'batch', 'fine-tune'] + + Optional Params: + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] = None + timeout: Optional[float] = None + """ + + file: FileTypes + purpose: Literal["assistants", "batch", "fine-tune"] + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +class FileContentRequest(TypedDict, total=False): + """ + FileContentRequest + Used by Assistants API, Batches API, and Fine-Tunes API + + Required Params: + file_id: str + + Optional Params: + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] = None + timeout: Optional[float] = None + """ + + file_id: str + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +# OpenAI Batches Types +class CreateBatchRequest(TypedDict, total=False): + """ + CreateBatchRequest + """ + + completion_window: Literal["24h"] + endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"] + input_file_id: str + metadata: Optional[Dict[str, str]] + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +class RetrieveBatchRequest(TypedDict, total=False): + """ + RetrieveBatchRequest + """ + + batch_id: str + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +class CancelBatchRequest(TypedDict, total=False): + """ + CancelBatchRequest + """ + + batch_id: str + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +class ListBatchRequest(TypedDict, total=False): + """ + ListBatchRequest - List your organization's batches + Calls https://api.openai.com/v1/batches + """ + + after: Union[str, NotGiven] + limit: Union[int, NotGiven] + extra_headers: Optional[Dict[str, str]] + extra_body: Optional[Dict[str, str]] + timeout: Optional[float] + + +BatchJobStatus = Literal[ + "validating", + "failed", + "in_progress", + "finalizing", + "completed", + "expired", + "cancelling", + "cancelled", +] + + +class ChatCompletionAudioDelta(TypedDict, total=False): + data: str + transcript: str + expires_at: int + id: str + + +class ChatCompletionToolCallFunctionChunk(TypedDict, total=False): + name: Optional[str] + arguments: str + + +class ChatCompletionAssistantToolCall(TypedDict): + id: Optional[str] + type: Literal["function"] + function: ChatCompletionToolCallFunctionChunk + + +class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call + id: Optional[str] + type: Literal["function"] + function: ChatCompletionToolCallFunctionChunk + index: int + + +class ChatCompletionDeltaToolCallChunk(TypedDict, total=False): + id: str + type: Literal["function"] + function: ChatCompletionToolCallFunctionChunk + index: int + + +class ChatCompletionCachedContent(TypedDict): + type: Literal["ephemeral"] + + +class ChatCompletionThinkingBlock(TypedDict, total=False): + type: Required[Literal["thinking"]] + thinking: str + signature: str + cache_control: Optional[Union[dict, ChatCompletionCachedContent]] + + +class OpenAIChatCompletionTextObject(TypedDict): + type: Literal["text"] + text: str + + +class ChatCompletionTextObject( + OpenAIChatCompletionTextObject, total=False +): # litellm wrapper on top of openai object for handling cached content + cache_control: ChatCompletionCachedContent + + +class ChatCompletionImageUrlObject(TypedDict, total=False): + url: Required[str] + detail: str + format: str + + +class ChatCompletionImageObject(TypedDict): + type: Literal["image_url"] + image_url: Union[str, ChatCompletionImageUrlObject] + + +class ChatCompletionVideoUrlObject(TypedDict, total=False): + url: Required[str] + detail: str + + +class ChatCompletionVideoObject(TypedDict): + type: Literal["video_url"] + video_url: Union[str, ChatCompletionVideoUrlObject] + + +class ChatCompletionAudioObject(ChatCompletionContentPartInputAudioParam): + pass + + +class DocumentObject(TypedDict): + type: Literal["text"] + media_type: str + data: str + + +class CitationsObject(TypedDict): + enabled: bool + + +class ChatCompletionDocumentObject(TypedDict): + type: Literal["document"] + source: DocumentObject + title: str + context: str + citations: Optional[CitationsObject] + + +class ChatCompletionFileObjectFile(TypedDict): + file_data: Optional[str] + file_id: Optional[str] + filename: Optional[str] + + +class ChatCompletionFileObject(TypedDict): + type: Literal["file"] + file: ChatCompletionFileObjectFile + + +OpenAIMessageContentListBlock = Union[ + ChatCompletionTextObject, + ChatCompletionImageObject, + ChatCompletionAudioObject, + ChatCompletionDocumentObject, + ChatCompletionVideoObject, + ChatCompletionFileObject, +] + +OpenAIMessageContent = Union[ + str, + Iterable[OpenAIMessageContentListBlock], +] + +# The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. +AllPromptValues = Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None] + + +class OpenAIChatCompletionUserMessage(TypedDict): + role: Literal["user"] + content: OpenAIMessageContent + + +class OpenAITextCompletionUserMessage(TypedDict): + role: Literal["user"] + content: AllPromptValues + + +class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False): + cache_control: ChatCompletionCachedContent + + +class OpenAIChatCompletionAssistantMessage(TypedDict, total=False): + role: Required[Literal["assistant"]] + content: Optional[ + Union[ + str, Iterable[Union[ChatCompletionTextObject, ChatCompletionThinkingBlock]] + ] + ] + name: Optional[str] + tool_calls: Optional[List[ChatCompletionAssistantToolCall]] + function_call: Optional[ChatCompletionToolCallFunctionChunk] + + +class ChatCompletionAssistantMessage(OpenAIChatCompletionAssistantMessage, total=False): + cache_control: ChatCompletionCachedContent + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] + + +class ChatCompletionToolMessage(TypedDict): + role: Literal["tool"] + content: Union[str, Iterable[ChatCompletionTextObject]] + tool_call_id: str + + +class ChatCompletionFunctionMessage(TypedDict): + role: Literal["function"] + content: Optional[Union[str, Iterable[ChatCompletionTextObject]]] + name: str + tool_call_id: Optional[str] + + +class OpenAIChatCompletionSystemMessage(TypedDict, total=False): + role: Required[Literal["system"]] + content: Required[Union[str, List]] + name: str + + +class OpenAIChatCompletionDeveloperMessage(TypedDict, total=False): + role: Required[Literal["developer"]] + content: Required[Union[str, List]] + name: str + + +class ChatCompletionSystemMessage(OpenAIChatCompletionSystemMessage, total=False): + cache_control: ChatCompletionCachedContent + + +class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total=False): + cache_control: ChatCompletionCachedContent + + +ValidUserMessageContentTypes = [ + "text", + "image_url", + "input_audio", + "document", + "video_url", + "file", +] # used for validating user messages. Prevent users from accidentally sending anthropic messages. + +AllMessageValues = Union[ + ChatCompletionUserMessage, + ChatCompletionAssistantMessage, + ChatCompletionToolMessage, + ChatCompletionSystemMessage, + ChatCompletionFunctionMessage, + ChatCompletionDeveloperMessage, +] + + +class ChatCompletionToolChoiceFunctionParam(TypedDict): + name: str + + +class ChatCompletionToolChoiceObjectParam(TypedDict): + type: Literal["function"] + function: ChatCompletionToolChoiceFunctionParam + + +ChatCompletionToolChoiceStringValues = Literal["none", "auto", "required"] + +ChatCompletionToolChoiceValues = Union[ + ChatCompletionToolChoiceStringValues, ChatCompletionToolChoiceObjectParam +] + + +class ChatCompletionToolParamFunctionChunk(TypedDict, total=False): + name: Required[str] + description: str + parameters: dict + + +class OpenAIChatCompletionToolParam(TypedDict): + type: Union[Literal["function"], str] + function: ChatCompletionToolParamFunctionChunk + + +class ChatCompletionToolParam(OpenAIChatCompletionToolParam, total=False): + cache_control: ChatCompletionCachedContent + + +class Function(TypedDict, total=False): + name: Required[str] + """The name of the function to call.""" + + +class ChatCompletionNamedToolChoiceParam(TypedDict, total=False): + function: Required[Function] + + type: Required[Literal["function"]] + """The type of the tool. Currently, only `function` is supported.""" + + +class ChatCompletionRequest(TypedDict, total=False): + model: Required[str] + messages: Required[List[AllMessageValues]] + frequency_penalty: float + logit_bias: dict + logprobs: bool + top_logprobs: int + max_tokens: int + n: int + presence_penalty: float + response_format: dict + seed: int + service_tier: str + stop: Union[str, List[str]] + stream_options: dict + temperature: float + top_p: float + tools: List[ChatCompletionToolParam] + tool_choice: ChatCompletionToolChoiceValues + parallel_tool_calls: bool + function_call: Union[str, dict] + functions: List + user: str + metadata: dict # litellm specific param + + +class ChatCompletionDeltaChunk(TypedDict, total=False): + content: Optional[str] + tool_calls: List[ChatCompletionDeltaToolCallChunk] + role: str + + +ChatCompletionAssistantContentValue = ( + str # keep as var, used in stream_chunk_builder as well +) + + +class ChatCompletionResponseMessage(TypedDict, total=False): + content: Optional[ChatCompletionAssistantContentValue] + tool_calls: Optional[List[ChatCompletionToolCallChunk]] + role: Literal["assistant"] + function_call: Optional[ChatCompletionToolCallFunctionChunk] + provider_specific_fields: Optional[dict] + reasoning_content: Optional[str] + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] + + +class ChatCompletionUsageBlock(TypedDict): + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +class OpenAIChatCompletionChunk(ChatCompletionChunk): + def __init__(self, **kwargs): + # Set the 'object' kwarg to 'chat.completion.chunk' + kwargs["object"] = "chat.completion.chunk" + super().__init__(**kwargs) + + +class Hyperparameters(BaseModel): + batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch." + learning_rate_multiplier: Optional[Union[str, float]] = ( + None # Scaling factor for the learning rate + ) + n_epochs: Optional[Union[str, int]] = ( + None # "The number of epochs to train the model for" + ) + + +class FineTuningJobCreate(BaseModel): + """ + FineTuningJobCreate - Create a fine-tuning job + + Example Request + ``` + { + "model": "gpt-3.5-turbo", + "training_file": "file-abc123", + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": 0.1, + "n_epochs": 3 + }, + "suffix": "custom-model-name", + "validation_file": "file-xyz789", + "integrations": ["slack"], + "seed": 42 + } + ``` + """ + + model: str # "The name of the model to fine-tune." + training_file: str # "The ID of an uploaded file that contains training data." + hyperparameters: Optional[Hyperparameters] = ( + None # "The hyperparameters used for the fine-tuning job." + ) + suffix: Optional[str] = ( + None # "A string of up to 18 characters that will be added to your fine-tuned model name." + ) + validation_file: Optional[str] = ( + None # "The ID of an uploaded file that contains validation data." + ) + integrations: Optional[List[str]] = ( + None # "A list of integrations to enable for your fine-tuning job." + ) + seed: Optional[int] = None # "The seed controls the reproducibility of the job." + + +class LiteLLMFineTuningJobCreate(FineTuningJobCreate): + custom_llm_provider: Literal["openai", "azure", "vertex_ai"] + + class Config: + extra = "allow" # This allows the model to accept additional fields + + +AllEmbeddingInputValues = Union[str, List[str], List[int], List[List[int]]] + +OpenAIAudioTranscriptionOptionalParams = Literal[ + "language", "prompt", "temperature", "response_format", "timestamp_granularities" +] + + +OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"] + + +class ResponsesAPIOptionalRequestParams(TypedDict, total=False): + """TypedDict for Optional parameters supported by the responses API.""" + + include: Optional[List[ResponseIncludable]] + instructions: Optional[str] + max_output_tokens: Optional[int] + metadata: Optional[Dict[str, Any]] + parallel_tool_calls: Optional[bool] + previous_response_id: Optional[str] + reasoning: Optional[Reasoning] + store: Optional[bool] + stream: Optional[bool] + temperature: Optional[float] + text: Optional[ResponseTextConfigParam] + tool_choice: Optional[ToolChoice] + tools: Optional[Iterable[ToolParam]] + top_p: Optional[float] + truncation: Optional[Literal["auto", "disabled"]] + user: Optional[str] + + +class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False): + """TypedDict for request parameters supported by the responses API.""" + + input: Union[str, ResponseInputParam] + model: str + + +class BaseLiteLLMOpenAIResponseObject(BaseModel): + def __getitem__(self, key): + return self.__dict__[key] + + def get(self, key, default=None): + return self.__dict__.get(key, default) + + def __contains__(self, key): + return key in self.__dict__ + + def items(self): + return self.__dict__.items() + + +class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject): + reasoning_tokens: int + + model_config = {"extra": "allow"} + + +class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject): + input_tokens: int + """The number of input tokens.""" + + output_tokens: int + """The number of output tokens.""" + + output_tokens_details: Optional[OutputTokensDetails] + """A detailed breakdown of the output tokens.""" + + total_tokens: int + """The total number of tokens used.""" + + model_config = {"extra": "allow"} + + +class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject): + id: str + created_at: float + error: Optional[dict] + incomplete_details: Optional[IncompleteDetails] + instructions: Optional[str] + metadata: Optional[Dict] + model: Optional[str] + object: Optional[str] + output: List[ResponseOutputItem] + parallel_tool_calls: bool + temperature: Optional[float] + tool_choice: ToolChoice + tools: List[Tool] + top_p: Optional[float] + max_output_tokens: Optional[int] + previous_response_id: Optional[str] + reasoning: Optional[Reasoning] + status: Optional[str] + text: Optional[ResponseTextConfig] + truncation: Optional[Literal["auto", "disabled"]] + usage: Optional[ResponseAPIUsage] + user: Optional[str] + # Define private attributes using PrivateAttr + _hidden_params: dict = PrivateAttr(default_factory=dict) + + +class ResponsesAPIStreamEvents(str, Enum): + """ + Enum representing all supported OpenAI stream event types for the Responses API. + + Inherits from str to allow direct string comparison and usage as dictionary keys. + """ + + # Response lifecycle events + RESPONSE_CREATED = "response.created" + RESPONSE_IN_PROGRESS = "response.in_progress" + RESPONSE_COMPLETED = "response.completed" + RESPONSE_FAILED = "response.failed" + RESPONSE_INCOMPLETE = "response.incomplete" + + # Output item events + OUTPUT_ITEM_ADDED = "response.output_item.added" + OUTPUT_ITEM_DONE = "response.output_item.done" + + # Content part events + CONTENT_PART_ADDED = "response.content_part.added" + CONTENT_PART_DONE = "response.content_part.done" + + # Output text events + OUTPUT_TEXT_DELTA = "response.output_text.delta" + OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added" + OUTPUT_TEXT_DONE = "response.output_text.done" + + # Refusal events + REFUSAL_DELTA = "response.refusal.delta" + REFUSAL_DONE = "response.refusal.done" + + # Function call events + FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta" + FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done" + + # File search events + FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress" + FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching" + FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed" + + # Web search events + WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress" + WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching" + WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed" + + # Error event + ERROR = "error" + + +class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED] + response: ResponsesAPIResponse + + +class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS] + response: ResponsesAPIResponse + + +class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED] + response: ResponsesAPIResponse + _hidden_params: dict = PrivateAttr(default_factory=dict) + + +class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED] + response: ResponsesAPIResponse + + +class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE] + response: ResponsesAPIResponse + + +class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED] + output_index: int + item: dict + + +class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE] + output_index: int + item: dict + + +class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED] + item_id: str + output_index: int + content_index: int + part: dict + + +class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE] + item_id: str + output_index: int + content_index: int + part: dict + + +class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA] + item_id: str + output_index: int + content_index: int + delta: str + + +class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED] + item_id: str + output_index: int + content_index: int + annotation_index: int + annotation: dict + + +class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE] + item_id: str + output_index: int + content_index: int + text: str + + +class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA] + item_id: str + output_index: int + content_index: int + delta: str + + +class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE] + item_id: str + output_index: int + content_index: int + refusal: str + + +class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA] + item_id: str + output_index: int + delta: str + + +class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE] + item_id: str + output_index: int + arguments: str + + +class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS] + output_index: int + item_id: str + + +class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING] + output_index: int + item_id: str + + +class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED] + output_index: int + item_id: str + + +class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS] + output_index: int + item_id: str + + +class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING] + output_index: int + item_id: str + + +class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED] + output_index: int + item_id: str + + +class ErrorEvent(BaseLiteLLMOpenAIResponseObject): + type: Literal[ResponsesAPIStreamEvents.ERROR] + code: Optional[str] + message: str + param: Optional[str] + + +# Union type for all possible streaming responses +ResponsesAPIStreamingResponse = Annotated[ + Union[ + ResponseCreatedEvent, + ResponseInProgressEvent, + ResponseCompletedEvent, + ResponseFailedEvent, + ResponseIncompleteEvent, + OutputItemAddedEvent, + OutputItemDoneEvent, + ContentPartAddedEvent, + ContentPartDoneEvent, + OutputTextDeltaEvent, + OutputTextAnnotationAddedEvent, + OutputTextDoneEvent, + RefusalDeltaEvent, + RefusalDoneEvent, + FunctionCallArgumentsDeltaEvent, + FunctionCallArgumentsDoneEvent, + FileSearchCallInProgressEvent, + FileSearchCallSearchingEvent, + FileSearchCallCompletedEvent, + WebSearchCallInProgressEvent, + WebSearchCallSearchingEvent, + WebSearchCallCompletedEvent, + ErrorEvent, + ], + Discriminator("type"), +] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py new file mode 100644 index 00000000..f781af88 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/rerank.py @@ -0,0 +1,19 @@ +import json +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union + +from typing_extensions import ( + Protocol, + Required, + Self, + TypeGuard, + get_origin, + override, + runtime_checkable, +) + + +class InfinityRerankResult(TypedDict): + index: int + relevance_score: float + document: Optional[str] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py new file mode 100644 index 00000000..7024909a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/vertex_ai.py @@ -0,0 +1,486 @@ +import json +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union + +from typing_extensions import ( + Protocol, + Required, + Self, + TypeGuard, + get_origin, + override, + runtime_checkable, +) + + +class FunctionResponse(TypedDict): + name: str + response: Optional[dict] + + +class FunctionCall(TypedDict): + name: str + args: Optional[dict] + + +class FileDataType(TypedDict): + mime_type: str + file_uri: str # the cloud storage uri of storing this file + + +class BlobType(TypedDict): + mime_type: Required[str] + data: Required[str] + + +class PartType(TypedDict, total=False): + text: str + inline_data: BlobType + file_data: FileDataType + function_call: FunctionCall + function_response: FunctionResponse + + +class HttpxFunctionCall(TypedDict): + name: str + args: dict + + +class HttpxExecutableCode(TypedDict): + code: str + language: str + + +class HttpxCodeExecutionResult(TypedDict): + outcome: str + output: str + + +class HttpxPartType(TypedDict, total=False): + text: str + inline_data: BlobType + file_data: FileDataType + functionCall: HttpxFunctionCall + function_response: FunctionResponse + executableCode: HttpxExecutableCode + codeExecutionResult: HttpxCodeExecutionResult + + +class HttpxContentType(TypedDict, total=False): + role: Literal["user", "model"] + parts: List[HttpxPartType] + + +class ContentType(TypedDict, total=False): + role: Literal["user", "model"] + parts: Required[List[PartType]] + + +class SystemInstructions(TypedDict): + parts: Required[List[PartType]] + + +class Schema(TypedDict, total=False): + type: Literal["STRING", "INTEGER", "BOOLEAN", "NUMBER", "ARRAY", "OBJECT"] + description: str + enum: List[str] + items: List["Schema"] + properties: "Schema" + required: List[str] + nullable: bool + + +class FunctionDeclaration(TypedDict, total=False): + name: Required[str] + description: str + parameters: Union[Schema, dict] + response: Schema + + +class VertexAISearch(TypedDict, total=False): + datastore: Required[str] + + +class Retrieval(TypedDict): + source: VertexAISearch + + +class FunctionCallingConfig(TypedDict, total=False): + mode: Literal["ANY", "AUTO", "NONE"] + allowed_function_names: List[str] + + +HarmCategory = Literal[ + "HARM_CATEGORY_UNSPECIFIED", + "HARM_CATEGORY_HATE_SPEECH", + "HARM_CATEGORY_DANGEROUS_CONTENT", + "HARM_CATEGORY_HARASSMENT", + "HARM_CATEGORY_SEXUALLY_EXPLICIT", +] +HarmBlockThreshold = Literal[ + "HARM_BLOCK_THRESHOLD_UNSPECIFIED", + "BLOCK_LOW_AND_ABOVE", + "BLOCK_MEDIUM_AND_ABOVE", + "BLOCK_ONLY_HIGH", + "BLOCK_NONE", +] +HarmBlockMethod = Literal["HARM_BLOCK_METHOD_UNSPECIFIED", "SEVERITY", "PROBABILITY"] + +HarmProbability = Literal[ + "HARM_PROBABILITY_UNSPECIFIED", "NEGLIGIBLE", "LOW", "MEDIUM", "HIGH" +] + +HarmSeverity = Literal[ + "HARM_SEVERITY_UNSPECIFIED", + "HARM_SEVERITY_NEGLIGIBLE", + "HARM_SEVERITY_LOW", + "HARM_SEVERITY_MEDIUM", + "HARM_SEVERITY_HIGH", +] + + +class SafetSettingsConfig(TypedDict, total=False): + category: HarmCategory + threshold: HarmBlockThreshold + max_influential_terms: int + method: HarmBlockMethod + + +class GenerationConfig(TypedDict, total=False): + temperature: float + top_p: float + top_k: float + candidate_count: int + max_output_tokens: int + stop_sequences: List[str] + presence_penalty: float + frequency_penalty: float + response_mime_type: Literal["text/plain", "application/json"] + response_schema: dict + seed: int + responseLogprobs: bool + logprobs: int + + +class Tools(TypedDict, total=False): + function_declarations: List[FunctionDeclaration] + googleSearch: dict + googleSearchRetrieval: dict + code_execution: dict + retrieval: Retrieval + + +class ToolConfig(TypedDict): + functionCallingConfig: FunctionCallingConfig + + +class TTL(TypedDict, total=False): + seconds: Required[float] + nano: float + + +class UsageMetadata(TypedDict, total=False): + promptTokenCount: int + totalTokenCount: int + candidatesTokenCount: int + cachedContentTokenCount: int + + +class CachedContent(TypedDict, total=False): + ttl: TTL + expire_time: str + contents: List[ContentType] + tools: List[Tools] + createTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z" + updateTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z" + usageMetadata: UsageMetadata + expireTime: str # "2014-10-02T15:01:23Z" and "2014-10-02T15:01:23.045123456Z" + name: str + displayName: str + model: str + systemInstruction: ContentType + toolConfig: ToolConfig + + +class RequestBody(TypedDict, total=False): + contents: Required[List[ContentType]] + system_instruction: SystemInstructions + tools: Tools + toolConfig: ToolConfig + safetySettings: List[SafetSettingsConfig] + generationConfig: GenerationConfig + cachedContent: str + + +class CachedContentRequestBody(TypedDict, total=False): + contents: Required[List[ContentType]] + system_instruction: SystemInstructions + tools: Tools + toolConfig: ToolConfig + model: Required[str] # Format: models/{model} + ttl: str # ending in 's' - Example: "3.5s". + displayName: str + + +class CachedContentListAllResponseBody(TypedDict, total=False): + cachedContents: List[CachedContent] + nextPageToken: str + + +class SafetyRatings(TypedDict): + category: HarmCategory + probability: HarmProbability + probabilityScore: int + severity: HarmSeverity + blocked: bool + + +class Date(TypedDict): + year: int + month: int + date: int + + +class Citation(TypedDict): + startIndex: int + endIndex: int + uri: str + title: str + license: str + publicationDate: Date + + +class CitationMetadata(TypedDict): + citations: List[Citation] + + +class SearchEntryPoint(TypedDict, total=False): + renderedContent: str + sdkBlob: str + + +class GroundingMetadata(TypedDict, total=False): + webSearchQueries: List[str] + searchEntryPoint: SearchEntryPoint + groundingAttributions: List[dict] + + +class LogprobsCandidate(TypedDict): + token: str + tokenId: int + logProbability: float + + +class LogprobsTopCandidate(TypedDict): + candidates: List[LogprobsCandidate] + + +class LogprobsResult(TypedDict, total=False): + topCandidates: List[LogprobsTopCandidate] + chosenCandidates: List[LogprobsCandidate] + + +class Candidates(TypedDict, total=False): + index: int + content: HttpxContentType + finishReason: Literal[ + "FINISH_REASON_UNSPECIFIED", + "STOP", + "MAX_TOKENS", + "SAFETY", + "RECITATION", + "OTHER", + "BLOCKLIST", + "PROHIBITED_CONTENT", + "SPII", + ] + safetyRatings: List[SafetyRatings] + citationMetadata: CitationMetadata + groundingMetadata: GroundingMetadata + finishMessage: str + logprobsResult: LogprobsResult + + +class PromptFeedback(TypedDict): + blockReason: str + safetyRatings: List[SafetyRatings] + blockReasonMessage: str + + +class GenerateContentResponseBody(TypedDict, total=False): + candidates: List[Candidates] + promptFeedback: PromptFeedback + usageMetadata: Required[UsageMetadata] + + +class FineTuneHyperparameters(TypedDict, total=False): + epoch_count: Optional[int] + learning_rate_multiplier: Optional[float] + adapter_size: Optional[ + Literal[ + "ADAPTER_SIZE_UNSPECIFIED", + "ADAPTER_SIZE_ONE", + "ADAPTER_SIZE_FOUR", + "ADAPTER_SIZE_EIGHT", + "ADAPTER_SIZE_SIXTEEN", + ] + ] + + +class FineTunesupervisedTuningSpec(TypedDict, total=False): + training_dataset_uri: str + validation_dataset: Optional[str] + tuned_model_display_name: Optional[str] + hyperParameters: Optional[FineTuneHyperparameters] + + +class FineTuneJobCreate(TypedDict, total=False): + baseModel: str + supervisedTuningSpec: FineTunesupervisedTuningSpec + tunedModelDisplayName: Optional[str] + + +class ResponseSupervisedTuningSpec(TypedDict, total=False): + trainingDatasetUri: Optional[str] + hyperParameters: Optional[FineTuneHyperparameters] + + +class ResponseTuningJob(TypedDict): + name: Optional[str] + tunedModelDisplayName: Optional[str] + baseModel: Optional[str] + supervisedTuningSpec: Optional[ResponseSupervisedTuningSpec] + state: Optional[ + Literal[ + "JOB_STATE_PENDING", + "JOB_STATE_RUNNING", + "JOB_STATE_SUCCEEDED", + "JOB_STATE_FAILED", + "JOB_STATE_CANCELLED", + ] + ] + createTime: Optional[str] + updateTime: Optional[str] + + +class InstanceVideo(TypedDict, total=False): + gcsUri: str + videoSegmentConfig: Tuple[float, float, float] + + +class InstanceImage(TypedDict, total=False): + gcsUri: Optional[str] + bytesBase64Encoded: Optional[str] + mimeType: Optional[str] + + +class Instance(TypedDict, total=False): + text: str + image: InstanceImage + video: InstanceVideo + + +class VertexMultimodalEmbeddingRequest(TypedDict, total=False): + instances: List[Instance] + + +class VideoEmbedding(TypedDict): + startOffsetSec: int + endOffsetSec: int + embedding: List[float] + + +class MultimodalPrediction(TypedDict, total=False): + textEmbedding: List[float] + imageEmbedding: List[float] + videoEmbeddings: List[VideoEmbedding] + + +class MultimodalPredictions(TypedDict, total=False): + predictions: List[MultimodalPrediction] + + +class VertexAICachedContentResponseObject(TypedDict): + name: str + model: str + + +class TaskTypeEnum(Enum): + TASK_TYPE_UNSPECIFIED = "TASK_TYPE_UNSPECIFIED" + RETRIEVAL_QUERY = "RETRIEVAL_QUERY" + RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT" + SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY" + CLASSIFICATION = "CLASSIFICATION" + CLUSTERING = "CLUSTERING" + QUESTION_ANSWERING = "QUESTION_ANSWERING" + FACT_VERIFICATION = "FACT_VERIFICATION" + + +class VertexAITextEmbeddingsRequestBody(TypedDict, total=False): + content: Required[ContentType] + taskType: TaskTypeEnum + title: str + outputDimensionality: int + + +class ContentEmbeddings(TypedDict): + values: List[int] + + +class VertexAITextEmbeddingsResponseObject(TypedDict): + embedding: ContentEmbeddings + + +class EmbedContentRequest(VertexAITextEmbeddingsRequestBody): + model: Required[str] + + +class VertexAIBatchEmbeddingsRequestBody(TypedDict, total=False): + requests: List[EmbedContentRequest] + + +class VertexAIBatchEmbeddingsResponseObject(TypedDict): + embeddings: List[ContentEmbeddings] + + +# Vertex AI Batch Prediction + + +class GcsSource(TypedDict): + uris: str + + +class InputConfig(TypedDict): + instancesFormat: str + gcsSource: GcsSource + + +class GcsDestination(TypedDict): + outputUriPrefix: str + + +class OutputConfig(TypedDict, total=False): + predictionsFormat: str + gcsDestination: GcsDestination + + +class VertexAIBatchPredictionJob(TypedDict): + displayName: str + model: str + inputConfig: InputConfig + outputConfig: OutputConfig + + +class VertexBatchPredictionResponse(TypedDict, total=False): + name: str + displayName: str + model: str + inputConfig: InputConfig + outputConfig: OutputConfig + state: str + createTime: str + updateTime: str + modelVersionId: str + + +VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py new file mode 100644 index 00000000..7dee2836 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/llms/watsonx.py @@ -0,0 +1,33 @@ +import json +from enum import Enum +from typing import Any, List, Optional, TypedDict, Union + +from pydantic import BaseModel + + +class WatsonXAPIParams(TypedDict): + project_id: str + space_id: Optional[str] + region_name: Optional[str] + + +class WatsonXCredentials(TypedDict): + api_key: str + api_base: str + token: Optional[str] + + +class WatsonXAIEndpoint(str, Enum): + TEXT_GENERATION = "/ml/v1/text/generation" + TEXT_GENERATION_STREAM = "/ml/v1/text/generation_stream" + CHAT = "/ml/v1/text/chat" + CHAT_STREAM = "/ml/v1/text/chat_stream" + DEPLOYMENT_TEXT_GENERATION = "/ml/v1/deployments/{deployment_id}/text/generation" + DEPLOYMENT_TEXT_GENERATION_STREAM = ( + "/ml/v1/deployments/{deployment_id}/text/generation_stream" + ) + DEPLOYMENT_CHAT = "/ml/v1/deployments/{deployment_id}/text/chat" + DEPLOYMENT_CHAT_STREAM = "/ml/v1/deployments/{deployment_id}/text/chat_stream" + EMBEDDINGS = "/ml/v1/text/embeddings" + PROMPTS = "/ml/v1/prompts" + AVAILABLE_MODELS = "/ml/v1/foundation_model_specs" diff --git a/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py new file mode 100644 index 00000000..90871198 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/passthrough_endpoints/vertex_ai.py @@ -0,0 +1,20 @@ +""" +Used for /vertex_ai/ pass through endpoints +""" + +from typing import Optional + +from pydantic import BaseModel + +from ..llms.vertex_ai import VERTEX_CREDENTIALS_TYPES + + +class VertexPassThroughCredentials(BaseModel): + # Example: vertex_project = "my-project-123" + vertex_project: Optional[str] = None + + # Example: vertex_location = "us-central1" + vertex_location: Optional[str] = None + + # Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS" + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None diff --git a/.venv/lib/python3.12/site-packages/litellm/types/rerank.py b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py new file mode 100644 index 00000000..8e2a8cc3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/rerank.py @@ -0,0 +1,78 @@ +""" +LiteLLM Follows the cohere API format for the re rank API +https://docs.cohere.com/reference/rerank + +""" + +from typing import List, Optional, Union + +from pydantic import BaseModel, PrivateAttr +from typing_extensions import Required, TypedDict + + +class RerankRequest(BaseModel): + model: str + query: str + top_n: Optional[int] = None + documents: List[Union[str, dict]] + rank_fields: Optional[List[str]] = None + return_documents: Optional[bool] = None + max_chunks_per_doc: Optional[int] = None + max_tokens_per_doc: Optional[int] = None + + + +class OptionalRerankParams(TypedDict, total=False): + query: str + top_n: Optional[int] + documents: List[Union[str, dict]] + rank_fields: Optional[List[str]] + return_documents: Optional[bool] + max_chunks_per_doc: Optional[int] + max_tokens_per_doc: Optional[int] + + +class RerankBilledUnits(TypedDict, total=False): + search_units: Optional[int] + total_tokens: Optional[int] + + +class RerankTokens(TypedDict, total=False): + input_tokens: Optional[int] + output_tokens: Optional[int] + + +class RerankResponseMeta(TypedDict, total=False): + api_version: Optional[dict] + billed_units: Optional[RerankBilledUnits] + tokens: Optional[RerankTokens] + + +class RerankResponseDocument(TypedDict): + text: str + + +class RerankResponseResult(TypedDict, total=False): + index: Required[int] + relevance_score: Required[float] + document: RerankResponseDocument + + +class RerankResponse(BaseModel): + id: Optional[str] = None + results: Optional[List[RerankResponseResult]] = ( + None # Contains index and relevance_score + ) + meta: Optional[RerankResponseMeta] = None # Contains api_version and billed_units + + # Define private attributes using PrivateAttr + _hidden_params: dict = PrivateAttr(default_factory=dict) + + def __getitem__(self, key): + return self.__dict__[key] + + def get(self, key, default=None): + return self.__dict__.get(key, default) + + def __contains__(self, key): + return key in self.__dict__ diff --git a/.venv/lib/python3.12/site-packages/litellm/types/router.py b/.venv/lib/python3.12/site-packages/litellm/types/router.py new file mode 100644 index 00000000..e34366aa --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/router.py @@ -0,0 +1,707 @@ +""" +litellm.Router Types - includes RouterConfig, UpdateRouterConfig, ModelInfo etc +""" + +import datetime +import enum +import uuid +from typing import Any, Dict, List, Literal, Optional, Tuple, Union, get_type_hints + +import httpx +from httpx import AsyncClient, Client +from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI +from pydantic import BaseModel, ConfigDict, Field +from typing_extensions import Required, TypedDict + +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler + +from ..exceptions import RateLimitError +from .completion import CompletionRequest +from .embedding import EmbeddingRequest +from .llms.vertex_ai import VERTEX_CREDENTIALS_TYPES +from .utils import ModelResponse, ProviderSpecificModelInfo + + +class ConfigurableClientsideParamsCustomAuth(TypedDict): + api_base: str + + +CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = Optional[ + List[Union[str, ConfigurableClientsideParamsCustomAuth]] +] + + +class ModelConfig(BaseModel): + model_name: str + litellm_params: Union[CompletionRequest, EmbeddingRequest] + tpm: int + rpm: int + + model_config = ConfigDict(protected_namespaces=()) + + +class RouterConfig(BaseModel): + model_list: List[ModelConfig] + + redis_url: Optional[str] = None + redis_host: Optional[str] = None + redis_port: Optional[int] = None + redis_password: Optional[str] = None + + cache_responses: Optional[bool] = False + cache_kwargs: Optional[Dict] = {} + caching_groups: Optional[List[Tuple[str, List[str]]]] = None + client_ttl: Optional[int] = 3600 + num_retries: Optional[int] = 0 + timeout: Optional[float] = None + default_litellm_params: Optional[Dict[str, str]] = {} + set_verbose: Optional[bool] = False + fallbacks: Optional[List] = [] + allowed_fails: Optional[int] = None + context_window_fallbacks: Optional[List] = [] + model_group_alias: Optional[Dict[str, List[str]]] = {} + retry_after: Optional[int] = 0 + routing_strategy: Literal[ + "simple-shuffle", + "least-busy", + "usage-based-routing", + "latency-based-routing", + ] = "simple-shuffle" + + model_config = ConfigDict(protected_namespaces=()) + + +class UpdateRouterConfig(BaseModel): + """ + Set of params that you can modify via `router.update_settings()`. + """ + + routing_strategy_args: Optional[dict] = None + routing_strategy: Optional[str] = None + model_group_retry_policy: Optional[dict] = None + allowed_fails: Optional[int] = None + cooldown_time: Optional[float] = None + num_retries: Optional[int] = None + timeout: Optional[float] = None + max_retries: Optional[int] = None + retry_after: Optional[float] = None + fallbacks: Optional[List[dict]] = None + context_window_fallbacks: Optional[List[dict]] = None + + model_config = ConfigDict(protected_namespaces=()) + + +class ModelInfo(BaseModel): + id: Optional[ + str + ] # Allow id to be optional on input, but it will always be present as a str in the model instance + db_model: bool = ( + False # used for proxy - to separate models which are stored in the db vs. config. + ) + updated_at: Optional[datetime.datetime] = None + updated_by: Optional[str] = None + + created_at: Optional[datetime.datetime] = None + created_by: Optional[str] = None + + base_model: Optional[str] = ( + None # specify if the base model is azure/gpt-3.5-turbo etc for accurate cost tracking + ) + tier: Optional[Literal["free", "paid"]] = None + + """ + Team Model Specific Fields + """ + # the team id that this model belongs to + team_id: Optional[str] = None + + # the model_name that can be used by the team when making LLM calls + team_public_model_name: Optional[str] = None + + def __init__(self, id: Optional[Union[str, int]] = None, **params): + if id is None: + id = str(uuid.uuid4()) # Generate a UUID if id is None or not provided + elif isinstance(id, int): + id = str(id) + super().__init__(id=id, **params) + + model_config = ConfigDict(extra="allow") + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class CredentialLiteLLMParams(BaseModel): + api_key: Optional[str] = None + api_base: Optional[str] = None + api_version: Optional[str] = None + ## VERTEX AI ## + vertex_project: Optional[str] = None + vertex_location: Optional[str] = None + vertex_credentials: Optional[Union[str, dict]] = None + ## UNIFIED PROJECT/REGION ## + region_name: Optional[str] = None + + ## AWS BEDROCK / SAGEMAKER ## + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + aws_region_name: Optional[str] = None + ## IBM WATSONX ## + watsonx_region_name: Optional[str] = None + + +class GenericLiteLLMParams(CredentialLiteLLMParams): + """ + LiteLLM Params without 'model' arg (used across completion / assistants api) + """ + + custom_llm_provider: Optional[str] = None + tpm: Optional[int] = None + rpm: Optional[int] = None + timeout: Optional[Union[float, str, httpx.Timeout]] = ( + None # if str, pass in as os.environ/ + ) + stream_timeout: Optional[Union[float, str]] = ( + None # timeout when making stream=True calls, if str, pass in as os.environ/ + ) + max_retries: Optional[int] = None + organization: Optional[str] = None # for openai orgs + configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None + + ## LOGGING PARAMS ## + litellm_trace_id: Optional[str] = None + + ## CUSTOM PRICING ## + input_cost_per_token: Optional[float] = None + output_cost_per_token: Optional[float] = None + input_cost_per_second: Optional[float] = None + output_cost_per_second: Optional[float] = None + + max_file_size_mb: Optional[float] = None + + # Deployment budgets + max_budget: Optional[float] = None + budget_duration: Optional[str] = None + use_in_pass_through: Optional[bool] = False + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + merge_reasoning_content_in_choices: Optional[bool] = False + model_info: Optional[Dict] = None + + def __init__( + self, + custom_llm_provider: Optional[str] = None, + max_retries: Optional[Union[int, str]] = None, + tpm: Optional[int] = None, + rpm: Optional[int] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + api_version: Optional[str] = None, + timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/ + stream_timeout: Optional[Union[float, str]] = ( + None # timeout when making stream=True calls, if str, pass in as os.environ/ + ), + organization: Optional[str] = None, # for openai orgs + ## LOGGING PARAMS ## + litellm_trace_id: Optional[str] = None, + ## UNIFIED PROJECT/REGION ## + region_name: Optional[str] = None, + ## VERTEX AI ## + vertex_project: Optional[str] = None, + vertex_location: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, + ## AWS BEDROCK / SAGEMAKER ## + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + aws_region_name: Optional[str] = None, + ## IBM WATSONX ## + watsonx_region_name: Optional[str] = None, + input_cost_per_token: Optional[float] = None, + output_cost_per_token: Optional[float] = None, + input_cost_per_second: Optional[float] = None, + output_cost_per_second: Optional[float] = None, + max_file_size_mb: Optional[float] = None, + # Deployment budgets + max_budget: Optional[float] = None, + budget_duration: Optional[str] = None, + # Pass through params + use_in_pass_through: Optional[bool] = False, + # This will merge the reasoning content in the choices + merge_reasoning_content_in_choices: Optional[bool] = False, + model_info: Optional[Dict] = None, + **params, + ): + args = locals() + args.pop("max_retries", None) + args.pop("self", None) + args.pop("params", None) + args.pop("__class__", None) + if max_retries is not None and isinstance(max_retries, str): + max_retries = int(max_retries) # cast to int + # We need to keep max_retries in args since it's a parameter of GenericLiteLLMParams + args["max_retries"] = ( + max_retries # Put max_retries back in args after popping it + ) + super().__init__(**args, **params) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class LiteLLM_Params(GenericLiteLLMParams): + """ + LiteLLM Params with 'model' requirement - used for completions + """ + + model: str + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + + def __init__( + self, + model: str, + custom_llm_provider: Optional[str] = None, + max_retries: Optional[Union[int, str]] = None, + tpm: Optional[int] = None, + rpm: Optional[int] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + api_version: Optional[str] = None, + timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/ + stream_timeout: Optional[Union[float, str]] = ( + None # timeout when making stream=True calls, if str, pass in as os.environ/ + ), + organization: Optional[str] = None, # for openai orgs + ## VERTEX AI ## + vertex_project: Optional[str] = None, + vertex_location: Optional[str] = None, + ## AWS BEDROCK / SAGEMAKER ## + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, + aws_region_name: Optional[str] = None, + # OpenAI / Azure Whisper + # set a max-size of file that can be passed to litellm proxy + max_file_size_mb: Optional[float] = None, + # will use deployment on pass-through endpoints if True + use_in_pass_through: Optional[bool] = False, + **params, + ): + args = locals() + args.pop("max_retries", None) + args.pop("self", None) + args.pop("params", None) + args.pop("__class__", None) + if max_retries is not None and isinstance(max_retries, str): + max_retries = int(max_retries) # cast to int + super().__init__(max_retries=max_retries, **args, **params) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class updateLiteLLMParams(GenericLiteLLMParams): + # This class is used to update the LiteLLM_Params + # only differece is model is optional + model: Optional[str] = None + + +class updateDeployment(BaseModel): + model_name: Optional[str] = None + litellm_params: Optional[updateLiteLLMParams] = None + model_info: Optional[ModelInfo] = None + + model_config = ConfigDict(protected_namespaces=()) + + +class LiteLLMParamsTypedDict(TypedDict, total=False): + model: str + custom_llm_provider: Optional[str] + tpm: Optional[int] + rpm: Optional[int] + order: Optional[int] + weight: Optional[int] + max_parallel_requests: Optional[int] + api_key: Optional[str] + api_base: Optional[str] + api_version: Optional[str] + timeout: Optional[Union[float, str, httpx.Timeout]] + stream_timeout: Optional[Union[float, str]] + max_retries: Optional[int] + organization: Optional[Union[List, str]] # for openai orgs + configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS # for allowing api base switching on finetuned models + ## DROP PARAMS ## + drop_params: Optional[bool] + ## UNIFIED PROJECT/REGION ## + region_name: Optional[str] + ## VERTEX AI ## + vertex_project: Optional[str] + vertex_location: Optional[str] + ## AWS BEDROCK / SAGEMAKER ## + aws_access_key_id: Optional[str] + aws_secret_access_key: Optional[str] + aws_region_name: Optional[str] + ## IBM WATSONX ## + watsonx_region_name: Optional[str] + ## CUSTOM PRICING ## + input_cost_per_token: Optional[float] + output_cost_per_token: Optional[float] + input_cost_per_second: Optional[float] + output_cost_per_second: Optional[float] + num_retries: Optional[int] + ## MOCK RESPONSES ## + mock_response: Optional[Union[str, ModelResponse, Exception]] + + # routing params + # use this for tag-based routing + tags: Optional[List[str]] + + # deployment budgets + max_budget: Optional[float] + budget_duration: Optional[str] + + +class DeploymentTypedDict(TypedDict, total=False): + model_name: Required[str] + litellm_params: Required[LiteLLMParamsTypedDict] + model_info: dict + + +SPECIAL_MODEL_INFO_PARAMS = [ + "input_cost_per_token", + "output_cost_per_token", + "input_cost_per_character", + "output_cost_per_character", +] + + +class Deployment(BaseModel): + model_name: str + litellm_params: LiteLLM_Params + model_info: ModelInfo + + model_config = ConfigDict(extra="allow", protected_namespaces=()) + + def __init__( + self, + model_name: str, + litellm_params: LiteLLM_Params, + model_info: Optional[Union[ModelInfo, dict]] = None, + **params, + ): + if model_info is None: + model_info = ModelInfo() + elif isinstance(model_info, dict): + model_info = ModelInfo(**model_info) + + for ( + key + ) in ( + SPECIAL_MODEL_INFO_PARAMS + ): # ensures custom pricing info is consistently in 'model_info' + field = getattr(litellm_params, key, None) + if field is not None: + setattr(model_info, key, field) + + super().__init__( + model_info=model_info, + model_name=model_name, + litellm_params=litellm_params, + **params, + ) + + def to_json(self, **kwargs): + try: + return self.model_dump(**kwargs) # noqa + except Exception as e: + # if using pydantic v1 + return self.dict(**kwargs) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class RouterErrors(enum.Enum): + """ + Enum for router specific errors with common codes + """ + + user_defined_ratelimit_error = "Deployment over user-defined ratelimit." + no_deployments_available = "No deployments available for selected model" + no_deployments_with_tag_routing = ( + "Not allowed to access model due to tags configuration" + ) + no_deployments_with_provider_budget_routing = ( + "No deployments available - crossed budget" + ) + + +class AllowedFailsPolicy(BaseModel): + """ + Use this to set a custom number of allowed fails/minute before cooling down a deployment + If `AuthenticationErrorAllowedFails = 1000`, then 1000 AuthenticationError will be allowed before cooling down a deployment + + Mapping of Exception type to allowed_fails for each exception + https://docs.litellm.ai/docs/exception_mapping + """ + + BadRequestErrorAllowedFails: Optional[int] = None + AuthenticationErrorAllowedFails: Optional[int] = None + TimeoutErrorAllowedFails: Optional[int] = None + RateLimitErrorAllowedFails: Optional[int] = None + ContentPolicyViolationErrorAllowedFails: Optional[int] = None + InternalServerErrorAllowedFails: Optional[int] = None + + +class RetryPolicy(BaseModel): + """ + Use this to set a custom number of retries per exception type + If RateLimitErrorRetries = 3, then 3 retries will be made for RateLimitError + Mapping of Exception type to number of retries + https://docs.litellm.ai/docs/exception_mapping + """ + + BadRequestErrorRetries: Optional[int] = None + AuthenticationErrorRetries: Optional[int] = None + TimeoutErrorRetries: Optional[int] = None + RateLimitErrorRetries: Optional[int] = None + ContentPolicyViolationErrorRetries: Optional[int] = None + InternalServerErrorRetries: Optional[int] = None + + +class AlertingConfig(BaseModel): + """ + Use this configure alerting for the router. Receive alerts on the following events + - LLM API Exceptions + - LLM Responses Too Slow + - LLM Requests Hanging + + Args: + webhook_url: str - webhook url for alerting, slack provides a webhook url to send alerts to + alerting_threshold: Optional[float] = None - threshold for slow / hanging llm responses (in seconds) + """ + + webhook_url: str + alerting_threshold: Optional[float] = 300 + + +class ModelGroupInfo(BaseModel): + model_group: str + providers: List[str] + max_input_tokens: Optional[float] = None + max_output_tokens: Optional[float] = None + input_cost_per_token: Optional[float] = None + output_cost_per_token: Optional[float] = None + mode: Optional[ + Union[ + str, + Literal[ + "chat", + "embedding", + "completion", + "image_generation", + "audio_transcription", + "rerank", + "moderations", + ], + ] + ] = Field(default="chat") + tpm: Optional[int] = None + rpm: Optional[int] = None + supports_parallel_function_calling: bool = Field(default=False) + supports_vision: bool = Field(default=False) + supports_function_calling: bool = Field(default=False) + supported_openai_params: Optional[List[str]] = Field(default=[]) + configurable_clientside_auth_params: CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS = None + + def __init__(self, **data): + for field_name, field_type in get_type_hints(self.__class__).items(): + if field_type == bool and data.get(field_name) is None: + data[field_name] = False + super().__init__(**data) + + +class AssistantsTypedDict(TypedDict): + custom_llm_provider: Literal["azure", "openai"] + litellm_params: LiteLLMParamsTypedDict + + +class FineTuningConfig(BaseModel): + + custom_llm_provider: Literal["azure", "openai"] + + +class CustomRoutingStrategyBase: + async def async_get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + """ + Asynchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ + pass + + def get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + """ + Synchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ + pass + + +class RouterGeneralSettings(BaseModel): + async_only_mode: bool = Field( + default=False + ) # this will only initialize async clients. Good for memory utils + pass_through_all_models: bool = Field( + default=False + ) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding + + +class RouterRateLimitErrorBasic(ValueError): + """ + Raise a basic error inside helper functions. + """ + + def __init__( + self, + model: str, + ): + self.model = model + _message = f"{RouterErrors.no_deployments_available.value}." + super().__init__(_message) + + +class RouterRateLimitError(ValueError): + def __init__( + self, + model: str, + cooldown_time: float, + enable_pre_call_checks: bool, + cooldown_list: List, + ): + self.model = model + self.cooldown_time = cooldown_time + self.enable_pre_call_checks = enable_pre_call_checks + self.cooldown_list = cooldown_list + _message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}" + super().__init__(_message) + + +class RouterModelGroupAliasItem(TypedDict): + model: str + hidden: bool # if 'True', don't return on `.get_model_list` + + +VALID_LITELLM_ENVIRONMENTS = [ + "development", + "staging", + "production", +] + + +class RoutingStrategy(enum.Enum): + LEAST_BUSY = "least-busy" + LATENCY_BASED = "latency-based-routing" + COST_BASED = "cost-based-routing" + USAGE_BASED_ROUTING_V2 = "usage-based-routing-v2" + USAGE_BASED_ROUTING = "usage-based-routing" + PROVIDER_BUDGET_LIMITING = "provider-budget-routing" + + +class RouterCacheEnum(enum.Enum): + TPM = "global_router:{id}:{model}:tpm:{current_minute}" + RPM = "global_router:{id}:{model}:rpm:{current_minute}" + + +class GenericBudgetWindowDetails(BaseModel): + """Details about a provider's budget window""" + + budget_start: float + spend_key: str + start_time_key: str + ttl_seconds: int + + +OptionalPreCallChecks = List[Literal["prompt_caching", "router_budget_limiting"]] diff --git a/.venv/lib/python3.12/site-packages/litellm/types/services.py b/.venv/lib/python3.12/site-packages/litellm/types/services.py new file mode 100644 index 00000000..3eb283db --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/services.py @@ -0,0 +1,39 @@ +import enum +import uuid +from typing import Optional + +from pydantic import BaseModel, Field + + +class ServiceTypes(str, enum.Enum): + """ + Enum for litellm + litellm-adjacent services (redis/postgres/etc.) + """ + + REDIS = "redis" + DB = "postgres" + BATCH_WRITE_TO_DB = "batch_write_to_db" + RESET_BUDGET_JOB = "reset_budget_job" + LITELLM = "self" + ROUTER = "router" + AUTH = "auth" + PROXY_PRE_CALL = "proxy_pre_call" + + +class ServiceLoggerPayload(BaseModel): + """ + The payload logged during service success/failure + """ + + is_error: bool = Field(description="did an error occur") + error: Optional[str] = Field(None, description="what was the error") + service: ServiceTypes = Field(description="who is this for? - postgres/redis") + duration: float = Field(description="How long did the request take?") + call_type: str = Field(description="The call of the service, being made") + + def to_json(self, **kwargs): + try: + return self.model_dump(**kwargs) # noqa + except Exception as e: + # if using pydantic v1 + return self.dict(**kwargs) diff --git a/.venv/lib/python3.12/site-packages/litellm/types/utils.py b/.venv/lib/python3.12/site-packages/litellm/types/utils.py new file mode 100644 index 00000000..a6654285 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/types/utils.py @@ -0,0 +1,2081 @@ +import json +import time +import uuid +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from aiohttp import FormData +from openai._models import BaseModel as OpenAIObject +from openai.types.audio.transcription_create_params import FileTypes # type: ignore +from openai.types.completion_usage import ( + CompletionTokensDetails, + CompletionUsage, + PromptTokensDetails, +) +from openai.types.moderation import ( + Categories, + CategoryAppliedInputTypes, + CategoryScores, +) +from openai.types.moderation_create_response import Moderation, ModerationCreateResponse +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator +from typing_extensions import Callable, Dict, Required, TypedDict, override + +import litellm + +from ..litellm_core_utils.core_helpers import map_finish_reason +from .guardrails import GuardrailEventHooks +from .llms.openai import ( + Batch, + ChatCompletionThinkingBlock, + ChatCompletionToolCallChunk, + ChatCompletionUsageBlock, + OpenAIChatCompletionChunk, +) +from .rerank import RerankResponse + + +def _generate_id(): # private helper function + return "chatcmpl-" + str(uuid.uuid4()) + + +class LiteLLMPydanticObjectBase(BaseModel): + """ + Implements default functions, all pydantic objects should have. + """ + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump(**kwargs) # noqa + except Exception: + # if using pydantic v1 + return self.dict(**kwargs) + + def fields_set(self): + try: + return self.model_fields_set # noqa + except Exception: + # if using pydantic v1 + return self.__fields_set__ + + model_config = ConfigDict(protected_namespaces=()) + + +class LiteLLMCommonStrings(Enum): + redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'" + llm_provider_not_provided = "Unmapped LLM provider for this endpoint. You passed model={model}, custom_llm_provider={custom_llm_provider}. Check supported provider and route: https://docs.litellm.ai/docs/providers" + + +SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"] + + +class CostPerToken(TypedDict): + input_cost_per_token: float + output_cost_per_token: float + + +class ProviderField(TypedDict): + field_name: str + field_type: Literal["string"] + field_description: str + field_value: str + + +class ProviderSpecificModelInfo(TypedDict, total=False): + supports_system_messages: Optional[bool] + supports_response_schema: Optional[bool] + supports_vision: Optional[bool] + supports_function_calling: Optional[bool] + supports_tool_choice: Optional[bool] + supports_assistant_prefill: Optional[bool] + supports_prompt_caching: Optional[bool] + supports_audio_input: Optional[bool] + supports_embedding_image_input: Optional[bool] + supports_audio_output: Optional[bool] + supports_pdf_input: Optional[bool] + supports_native_streaming: Optional[bool] + supports_parallel_function_calling: Optional[bool] + + +class ModelInfoBase(ProviderSpecificModelInfo, total=False): + key: Required[str] # the key in litellm.model_cost which is returned + + max_tokens: Required[Optional[int]] + max_input_tokens: Required[Optional[int]] + max_output_tokens: Required[Optional[int]] + input_cost_per_token: Required[float] + cache_creation_input_token_cost: Optional[float] + cache_read_input_token_cost: Optional[float] + input_cost_per_character: Optional[float] # only for vertex ai models + input_cost_per_audio_token: Optional[float] + input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models + input_cost_per_character_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + input_cost_per_query: Optional[float] # only for rerank models + input_cost_per_image: Optional[float] # only for vertex ai models + input_cost_per_audio_per_second: Optional[float] # only for vertex ai models + input_cost_per_video_per_second: Optional[float] # only for vertex ai models + input_cost_per_second: Optional[float] # for OpenAI Speech models + input_cost_per_token_batches: Optional[float] + output_cost_per_token_batches: Optional[float] + output_cost_per_token: Required[float] + output_cost_per_character: Optional[float] # only for vertex ai models + output_cost_per_audio_token: Optional[float] + output_cost_per_token_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + output_cost_per_character_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + output_cost_per_image: Optional[float] + output_vector_size: Optional[int] + output_cost_per_video_per_second: Optional[float] # only for vertex ai models + output_cost_per_audio_per_second: Optional[float] # only for vertex ai models + output_cost_per_second: Optional[float] # for OpenAI Speech models + + litellm_provider: Required[str] + mode: Required[ + Literal[ + "completion", "embedding", "image_generation", "chat", "audio_transcription" + ] + ] + tpm: Optional[int] + rpm: Optional[int] + + +class ModelInfo(ModelInfoBase, total=False): + """ + Model info for a given model, this is information found in litellm.model_prices_and_context_window.json + """ + + supported_openai_params: Required[Optional[List[str]]] + + +class GenericStreamingChunk(TypedDict, total=False): + text: Required[str] + tool_use: Optional[ChatCompletionToolCallChunk] + is_finished: Required[bool] + finish_reason: Required[str] + usage: Required[Optional[ChatCompletionUsageBlock]] + index: int + + # use this dict if you want to return any provider specific fields in the response + provider_specific_fields: Optional[Dict[str, Any]] + + +from enum import Enum + + +class CallTypes(Enum): + embedding = "embedding" + aembedding = "aembedding" + completion = "completion" + acompletion = "acompletion" + atext_completion = "atext_completion" + text_completion = "text_completion" + image_generation = "image_generation" + aimage_generation = "aimage_generation" + moderation = "moderation" + amoderation = "amoderation" + atranscription = "atranscription" + transcription = "transcription" + aspeech = "aspeech" + speech = "speech" + rerank = "rerank" + arerank = "arerank" + arealtime = "_arealtime" + create_batch = "create_batch" + acreate_batch = "acreate_batch" + aretrieve_batch = "aretrieve_batch" + retrieve_batch = "retrieve_batch" + pass_through = "pass_through_endpoint" + anthropic_messages = "anthropic_messages" + get_assistants = "get_assistants" + aget_assistants = "aget_assistants" + create_assistants = "create_assistants" + acreate_assistants = "acreate_assistants" + delete_assistant = "delete_assistant" + adelete_assistant = "adelete_assistant" + acreate_thread = "acreate_thread" + create_thread = "create_thread" + aget_thread = "aget_thread" + get_thread = "get_thread" + a_add_message = "a_add_message" + add_message = "add_message" + aget_messages = "aget_messages" + get_messages = "get_messages" + arun_thread = "arun_thread" + run_thread = "run_thread" + arun_thread_stream = "arun_thread_stream" + run_thread_stream = "run_thread_stream" + afile_retrieve = "afile_retrieve" + file_retrieve = "file_retrieve" + afile_delete = "afile_delete" + file_delete = "file_delete" + afile_list = "afile_list" + file_list = "file_list" + acreate_file = "acreate_file" + create_file = "create_file" + afile_content = "afile_content" + file_content = "file_content" + create_fine_tuning_job = "create_fine_tuning_job" + acreate_fine_tuning_job = "acreate_fine_tuning_job" + acancel_fine_tuning_job = "acancel_fine_tuning_job" + cancel_fine_tuning_job = "cancel_fine_tuning_job" + alist_fine_tuning_jobs = "alist_fine_tuning_jobs" + list_fine_tuning_jobs = "list_fine_tuning_jobs" + aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job" + retrieve_fine_tuning_job = "retrieve_fine_tuning_job" + responses = "responses" + aresponses = "aresponses" + + +CallTypesLiteral = Literal[ + "embedding", + "aembedding", + "completion", + "acompletion", + "atext_completion", + "text_completion", + "image_generation", + "aimage_generation", + "moderation", + "amoderation", + "atranscription", + "transcription", + "aspeech", + "speech", + "rerank", + "arerank", + "_arealtime", + "create_batch", + "acreate_batch", + "pass_through_endpoint", + "anthropic_messages", + "aretrieve_batch", + "retrieve_batch", +] + + +class PassthroughCallTypes(Enum): + passthrough_image_generation = "passthrough-image-generation" + + +class TopLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + +class ChatCompletionTokenLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + top_logprobs: List[TopLogprob] + """List of the most likely tokens and their log probability, at this token + position. + + In rare cases, there may be fewer than the number of requested `top_logprobs` + returned. + """ + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + +class ChoiceLogprobs(OpenAIObject): + content: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message content tokens with log probability information.""" + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + +class FunctionCall(OpenAIObject): + arguments: str + name: Optional[str] = None + + +class Function(OpenAIObject): + arguments: str + name: Optional[ + str + ] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None) + + def __init__( + self, + arguments: Optional[Union[Dict, str]], + name: Optional[str] = None, + **params, + ): + if arguments is None: + arguments = "" + elif isinstance(arguments, Dict): + arguments = json.dumps(arguments) + else: + arguments = arguments + + name = name + + # Build a dictionary with the structure your BaseModel expects + data = {"arguments": arguments, "name": name, **params} + + super(Function, self).__init__(**data) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ChatCompletionDeltaToolCall(OpenAIObject): + id: Optional[str] = None + function: Function + type: Optional[str] = None + index: int + + +class HiddenParams(OpenAIObject): + original_response: Optional[Union[str, Any]] = None + model_id: Optional[str] = None # used in Router for individual deployments + api_base: Optional[str] = None # returns api base used for making completion call + + model_config = ConfigDict(extra="allow", protected_namespaces=()) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class ChatCompletionMessageToolCall(OpenAIObject): + def __init__( + self, + function: Union[Dict, Function], + id: Optional[str] = None, + type: Optional[str] = None, + **params, + ): + super(ChatCompletionMessageToolCall, self).__init__(**params) + if isinstance(function, Dict): + self.function = Function(**function) + else: + self.function = function + + if id is not None: + self.id = id + else: + self.id = f"{uuid.uuid4()}" + + if type is not None: + self.type = type + else: + self.type = "function" + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +from openai.types.chat.chat_completion_audio import ChatCompletionAudio + + +class ChatCompletionAudioResponse(ChatCompletionAudio): + + def __init__( + self, + data: str, + expires_at: int, + transcript: str, + id: Optional[str] = None, + **params, + ): + if id is not None: + id = id + else: + id = f"{uuid.uuid4()}" + super(ChatCompletionAudioResponse, self).__init__( + data=data, expires_at=expires_at, transcript=transcript, id=id, **params + ) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +""" +Reference: +ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None)) +""" + + +def add_provider_specific_fields( + object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]] +): + if not provider_specific_fields: # set if provider_specific_fields is not empty + return + setattr(object, "provider_specific_fields", provider_specific_fields) + + +class Message(OpenAIObject): + content: Optional[str] + role: Literal["assistant", "user", "system", "tool", "function"] + tool_calls: Optional[List[ChatCompletionMessageToolCall]] + function_call: Optional[FunctionCall] + audio: Optional[ChatCompletionAudioResponse] = None + reasoning_content: Optional[str] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + provider_specific_fields: Optional[Dict[str, Any]] = Field( + default=None, exclude=True + ) + + def __init__( + self, + content: Optional[str] = None, + role: Literal["assistant"] = "assistant", + function_call=None, + tool_calls: Optional[list] = None, + audio: Optional[ChatCompletionAudioResponse] = None, + provider_specific_fields: Optional[Dict[str, Any]] = None, + reasoning_content: Optional[str] = None, + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None, + **params, + ): + init_values: Dict[str, Any] = { + "content": content, + "role": role or "assistant", # handle null input + "function_call": ( + FunctionCall(**function_call) if function_call is not None else None + ), + "tool_calls": ( + [ + ( + ChatCompletionMessageToolCall(**tool_call) + if isinstance(tool_call, dict) + else tool_call + ) + for tool_call in tool_calls + ] + if tool_calls is not None and len(tool_calls) > 0 + else None + ), + } + + if audio is not None: + init_values["audio"] = audio + + if thinking_blocks is not None: + init_values["thinking_blocks"] = thinking_blocks + + if reasoning_content is not None: + init_values["reasoning_content"] = reasoning_content + + super(Message, self).__init__( + **init_values, # type: ignore + **params, + ) + + if audio is None: + # delete audio from self + # OpenAI compatible APIs like mistral API will raise an error if audio is passed in + del self.audio + + if reasoning_content is None: + # ensure default response matches OpenAI spec + del self.reasoning_content + + if thinking_blocks is None: + # ensure default response matches OpenAI spec + del self.thinking_blocks + + add_provider_specific_fields(self, provider_specific_fields) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class Delta(OpenAIObject): + reasoning_content: Optional[str] = None + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None + provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) + + def __init__( + self, + content=None, + role=None, + function_call=None, + tool_calls=None, + audio: Optional[ChatCompletionAudioResponse] = None, + reasoning_content: Optional[str] = None, + thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None, + **params, + ): + super(Delta, self).__init__(**params) + add_provider_specific_fields(self, params.get("provider_specific_fields", {})) + self.content = content + self.role = role + # Set default values and correct types + self.function_call: Optional[Union[FunctionCall, Any]] = None + self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None + self.audio: Optional[ChatCompletionAudioResponse] = None + + if reasoning_content is not None: + self.reasoning_content = reasoning_content + else: + # ensure default response matches OpenAI spec + del self.reasoning_content + + if thinking_blocks is not None: + self.thinking_blocks = thinking_blocks + else: + # ensure default response matches OpenAI spec + del self.thinking_blocks + + if function_call is not None and isinstance(function_call, dict): + self.function_call = FunctionCall(**function_call) + else: + self.function_call = function_call + if tool_calls is not None and isinstance(tool_calls, list): + self.tool_calls = [] + for tool_call in tool_calls: + if isinstance(tool_call, dict): + if tool_call.get("index", None) is None: + tool_call["index"] = 0 + self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) + elif isinstance(tool_call, ChatCompletionDeltaToolCall): + self.tool_calls.append(tool_call) + else: + self.tool_calls = tool_calls + + self.audio = audio + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Choices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + message: Optional[Union[Message, dict]] = None, + logprobs=None, + enhancements=None, + **params, + ): + super(Choices, self).__init__(**params) + if finish_reason is not None: + self.finish_reason = map_finish_reason( + finish_reason + ) # set finish_reason for all responses + else: + self.finish_reason = "stop" + self.index = index + if message is None: + self.message = Message() + else: + if isinstance(message, Message): + self.message = message + elif isinstance(message, dict): + self.message = Message(**message) + if logprobs is not None: + if isinstance(logprobs, dict): + self.logprobs = ChoiceLogprobs(**logprobs) + else: + self.logprobs = logprobs + if enhancements is not None: + self.enhancements = enhancements + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class CompletionTokensDetailsWrapper( + CompletionTokensDetails +): # wrapper for older openai versions + text_tokens: Optional[int] = None + """Text tokens generated by the model.""" + + +class PromptTokensDetailsWrapper( + PromptTokensDetails +): # wrapper for older openai versions + text_tokens: Optional[int] = None + """Text tokens sent to the model.""" + + image_tokens: Optional[int] = None + """Image tokens sent to the model.""" + + +class Usage(CompletionUsage): + _cache_creation_input_tokens: int = PrivateAttr( + 0 + ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. + _cache_read_input_tokens: int = PrivateAttr( + 0 + ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. + + def __init__( + self, + prompt_tokens: Optional[int] = None, + completion_tokens: Optional[int] = None, + total_tokens: Optional[int] = None, + reasoning_tokens: Optional[int] = None, + prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None, + completion_tokens_details: Optional[ + Union[CompletionTokensDetailsWrapper, dict] + ] = None, + **params, + ): + # handle reasoning_tokens + _completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None + if reasoning_tokens: + completion_tokens_details = CompletionTokensDetailsWrapper( + reasoning_tokens=reasoning_tokens + ) + + # Ensure completion_tokens_details is properly handled + if completion_tokens_details: + if isinstance(completion_tokens_details, dict): + _completion_tokens_details = CompletionTokensDetailsWrapper( + **completion_tokens_details + ) + elif isinstance(completion_tokens_details, CompletionTokensDetails): + _completion_tokens_details = completion_tokens_details + + ## DEEPSEEK MAPPING ## + if "prompt_cache_hit_tokens" in params and isinstance( + params["prompt_cache_hit_tokens"], int + ): + if prompt_tokens_details is None: + prompt_tokens_details = PromptTokensDetailsWrapper( + cached_tokens=params["prompt_cache_hit_tokens"] + ) + + ## ANTHROPIC MAPPING ## + if "cache_read_input_tokens" in params and isinstance( + params["cache_read_input_tokens"], int + ): + if prompt_tokens_details is None: + prompt_tokens_details = PromptTokensDetailsWrapper( + cached_tokens=params["cache_read_input_tokens"] + ) + + # handle prompt_tokens_details + _prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None + if prompt_tokens_details: + if isinstance(prompt_tokens_details, dict): + _prompt_tokens_details = PromptTokensDetailsWrapper( + **prompt_tokens_details + ) + elif isinstance(prompt_tokens_details, PromptTokensDetails): + _prompt_tokens_details = prompt_tokens_details + + super().__init__( + prompt_tokens=prompt_tokens or 0, + completion_tokens=completion_tokens or 0, + total_tokens=total_tokens or 0, + completion_tokens_details=_completion_tokens_details or None, + prompt_tokens_details=_prompt_tokens_details or None, + ) + + ## ANTHROPIC MAPPING ## + if "cache_creation_input_tokens" in params and isinstance( + params["cache_creation_input_tokens"], int + ): + self._cache_creation_input_tokens = params["cache_creation_input_tokens"] + + if "cache_read_input_tokens" in params and isinstance( + params["cache_read_input_tokens"], int + ): + self._cache_read_input_tokens = params["cache_read_input_tokens"] + + ## DEEPSEEK MAPPING ## + if "prompt_cache_hit_tokens" in params and isinstance( + params["prompt_cache_hit_tokens"], int + ): + self._cache_read_input_tokens = params["prompt_cache_hit_tokens"] + + for k, v in params.items(): + setattr(self, k, v) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class StreamingChoices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + delta: Optional[Delta] = None, + logprobs=None, + enhancements=None, + **params, + ): + # Fix Perplexity return both delta and message cause OpenWebUI repect text + # https://github.com/BerriAI/litellm/issues/8455 + params.pop("message", None) + super(StreamingChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = map_finish_reason(finish_reason) + else: + self.finish_reason = None + self.index = index + if delta is not None: + + if isinstance(delta, Delta): + self.delta = delta + elif isinstance(delta, dict): + self.delta = Delta(**delta) + else: + self.delta = Delta() + if enhancements is not None: + self.enhancements = enhancements + + if logprobs is not None and isinstance(logprobs, dict): + self.logprobs = ChoiceLogprobs(**logprobs) + else: + self.logprobs = logprobs # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class StreamingChatCompletionChunk(OpenAIChatCompletionChunk): + def __init__(self, **kwargs): + + new_choices = [] + for choice in kwargs["choices"]: + new_choice = StreamingChoices(**choice).model_dump() + new_choices.append(new_choice) + kwargs["choices"] = new_choices + + super().__init__(**kwargs) + + +from openai.types.chat import ChatCompletionChunk + + +class ModelResponseBase(OpenAIObject): + id: str + """A unique identifier for the completion.""" + + created: int + """The Unix timestamp (in seconds) of when the completion was created.""" + + model: Optional[str] = None + """The model used for completion.""" + + object: str + """The object type, which is always "text_completion" """ + + system_fingerprint: Optional[str] = None + """This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when + backend changes have been made that might impact determinism. + """ + + _hidden_params: dict = {} + + _response_headers: Optional[dict] = None + + +class ModelResponseStream(ModelResponseBase): + choices: List[StreamingChoices] + provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None) + + def __init__( + self, + choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None, + id: Optional[str] = None, + created: Optional[int] = None, + provider_specific_fields: Optional[Dict[str, Any]] = None, + **kwargs, + ): + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + _new_choice = None + if isinstance(choice, StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = StreamingChoices(**choice) + elif isinstance(choice, BaseModel): + _new_choice = StreamingChoices(**choice.model_dump()) + new_choices.append(_new_choice) + kwargs["choices"] = new_choices + else: + kwargs["choices"] = [StreamingChoices()] + + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + + if ( + "usage" in kwargs + and kwargs["usage"] is not None + and isinstance(kwargs["usage"], dict) + ): + kwargs["usage"] = Usage(**kwargs["usage"]) + + kwargs["id"] = id + kwargs["created"] = created + kwargs["object"] = "chat.completion.chunk" + kwargs["provider_specific_fields"] = provider_specific_fields + + super().__init__(**kwargs) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class ModelResponse(ModelResponseBase): + choices: List[Union[Choices, StreamingChoices]] + """The list of completion choices the model generated for the input prompt.""" + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + object=None, + system_fingerprint=None, + usage=None, + stream=None, + stream_options=None, + response_ms=None, + hidden_params=None, + _response_headers=None, + **params, + ) -> None: + if stream is not None and stream is True: + object = "chat.completion.chunk" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + _new_choice = None + if isinstance(choice, StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = StreamingChoices(**choice) + elif isinstance(choice, BaseModel): + _new_choice = StreamingChoices(**choice.model_dump()) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [StreamingChoices()] + else: + object = "chat.completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, Choices): + _new_choice = choice # type: ignore + elif isinstance(choice, dict): + _new_choice = Choices(**choice) # type: ignore + else: + _new_choice = choice + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [Choices()] + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + model = model + if usage is not None: + if isinstance(usage, dict): + usage = Usage(**usage) + else: + usage = usage + elif stream is None or stream is False: + usage = Usage() + if hidden_params: + self._hidden_params = hidden_params + + if _response_headers: + self._response_headers = _response_headers + + init_values = { + "id": id, + "choices": choices, + "created": created, + "model": model, + "object": object, + "system_fingerprint": system_fingerprint, + } + + if usage is not None: + init_values["usage"] = usage + + super().__init__( + **init_values, + **params, + ) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class Embedding(OpenAIObject): + embedding: Union[list, str] = [] + index: int + object: Literal["embedding"] + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class EmbeddingResponse(OpenAIObject): + model: Optional[str] = None + """The model used for embedding.""" + + data: List + """The actual embedding value""" + + object: Literal["list"] + """The object type, which is always "list" """ + + usage: Optional[Usage] = None + """Usage statistics for the embedding request.""" + + _hidden_params: dict = {} + _response_headers: Optional[Dict] = None + _response_ms: Optional[float] = None + + def __init__( + self, + model: Optional[str] = None, + usage: Optional[Usage] = None, + response_ms=None, + data: Optional[Union[List, List[Embedding]]] = None, + hidden_params=None, + _response_headers=None, + **params, + ): + object = "list" + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = [] + + if usage: + usage = usage + else: + usage = Usage() + + if _response_headers: + self._response_headers = _response_headers + + model = model + super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class Logprobs(OpenAIObject): + text_offset: Optional[List[int]] + token_logprobs: Optional[List[Union[float, None]]] + tokens: Optional[List[str]] + top_logprobs: Optional[List[Union[Dict[str, float], None]]] + + +class TextChoices(OpenAIObject): + def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): + super(TextChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = map_finish_reason(finish_reason) + else: + self.finish_reason = None + self.index = index + if text is not None: + self.text = text + else: + self.text = None + if logprobs is None: + self.logprobs = None + else: + if isinstance(logprobs, dict): + self.logprobs = Logprobs(**logprobs) + else: + self.logprobs = logprobs + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class TextCompletionResponse(OpenAIObject): + """ + { + "id": response["id"], + "object": "text_completion", + "created": response["created"], + "model": response["model"], + "choices": [ + { + "text": response["choices"][0]["message"]["content"], + "index": response["choices"][0]["index"], + "logprobs": transformed_logprobs, + "finish_reason": response["choices"][0]["finish_reason"] + } + ], + "usage": response["usage"] + } + """ + + id: str + object: str + created: int + model: Optional[str] + choices: List[TextChoices] + usage: Optional[Usage] + _response_ms: Optional[int] = None + _hidden_params: HiddenParams + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + usage=None, + stream=False, + response_ms=None, + object=None, + **params, + ): + if stream: + object = "text_completion.chunk" + choices = [TextChoices()] + else: + object = "text_completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + _new_choice = None + if isinstance(choice, TextChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = TextChoices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [TextChoices()] + if object is not None: + object = object + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + + model = model + if usage: + usage = usage + else: + usage = Usage() + + super(TextCompletionResponse, self).__init__( + id=id, # type: ignore + object=object, # type: ignore + created=created, # type: ignore + model=model, # type: ignore + choices=choices, # type: ignore + usage=usage, # type: ignore + **params, + ) + + if response_ms: + self._response_ms = response_ms + else: + self._response_ms = None + self._hidden_params = HiddenParams() + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +from openai.types.images_response import Image as OpenAIImage + + +class ImageObject(OpenAIImage): + """ + Represents the url or the content of an image generated by the OpenAI API. + + Attributes: + b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. + url: The URL of the generated image, if response_format is url (default). + revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. + + https://platform.openai.com/docs/api-reference/images/object + """ + + b64_json: Optional[str] = None + url: Optional[str] = None + revised_prompt: Optional[str] = None + + def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs): + super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +from openai.types.images_response import ImagesResponse as OpenAIImageResponse + + +class ImageResponse(OpenAIImageResponse): + _hidden_params: dict = {} + usage: Usage + + def __init__( + self, + created: Optional[int] = None, + data: Optional[List[ImageObject]] = None, + response_ms=None, + usage: Optional[Usage] = None, + hidden_params: Optional[dict] = None, + ): + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = [] + + if created: + created = created + else: + created = int(time.time()) + + _data: List[OpenAIImage] = [] + for d in data: + if isinstance(d, dict): + _data.append(ImageObject(**d)) + elif isinstance(d, BaseModel): + _data.append(ImageObject(**d.model_dump())) + _usage = usage or Usage( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0, + ) + super().__init__(created=created, data=_data, usage=_usage) # type: ignore + self._hidden_params = hidden_params or {} + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class TranscriptionResponse(OpenAIObject): + text: Optional[str] = None + + _hidden_params: dict = {} + _response_headers: Optional[dict] = None + + def __init__(self, text=None): + super().__init__(text=text) # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class GenericImageParsingChunk(TypedDict): + type: str + media_type: str + data: str + + +class ResponseFormatChunk(TypedDict, total=False): + type: Required[Literal["json_object", "text"]] + response_schema: dict + + +class LoggedLiteLLMParams(TypedDict, total=False): + force_timeout: Optional[float] + custom_llm_provider: Optional[str] + api_base: Optional[str] + litellm_call_id: Optional[str] + model_alias_map: Optional[dict] + metadata: Optional[dict] + model_info: Optional[dict] + proxy_server_request: Optional[dict] + acompletion: Optional[bool] + preset_cache_key: Optional[str] + no_log: Optional[bool] + input_cost_per_second: Optional[float] + input_cost_per_token: Optional[float] + output_cost_per_token: Optional[float] + output_cost_per_second: Optional[float] + cooldown_time: Optional[float] + + +class AdapterCompletionStreamWrapper: + def __init__(self, completion_stream): + self.completion_stream = completion_stream + + def __iter__(self): + return self + + def __aiter__(self): + return self + + def __next__(self): + try: + for chunk in self.completion_stream: + if chunk == "None" or chunk is None: + raise Exception + return chunk + raise StopIteration + except StopIteration: + raise StopIteration + except Exception as e: + print(f"AdapterCompletionStreamWrapper - {e}") # noqa + + async def __anext__(self): + try: + async for chunk in self.completion_stream: + if chunk == "None" or chunk is None: + raise Exception + return chunk + raise StopIteration + except StopIteration: + raise StopAsyncIteration + + +class StandardLoggingUserAPIKeyMetadata(TypedDict): + user_api_key_hash: Optional[str] # hash of the litellm virtual key used + user_api_key_alias: Optional[str] + user_api_key_org_id: Optional[str] + user_api_key_team_id: Optional[str] + user_api_key_user_id: Optional[str] + user_api_key_user_email: Optional[str] + user_api_key_team_alias: Optional[str] + user_api_key_end_user_id: Optional[str] + + +class StandardLoggingPromptManagementMetadata(TypedDict): + prompt_id: str + prompt_variables: Optional[dict] + prompt_integration: str + + +class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): + """ + Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management + """ + + spend_logs_metadata: Optional[ + dict + ] # special param to log k,v pairs to spendlogs for a call + requester_ip_address: Optional[str] + requester_metadata: Optional[dict] + prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata] + applied_guardrails: Optional[List[str]] + + +class StandardLoggingAdditionalHeaders(TypedDict, total=False): + x_ratelimit_limit_requests: int + x_ratelimit_limit_tokens: int + x_ratelimit_remaining_requests: int + x_ratelimit_remaining_tokens: int + + +class StandardLoggingHiddenParams(TypedDict): + model_id: Optional[ + str + ] # id of the model in the router, separates multiple models with the same name but different credentials + cache_key: Optional[str] + api_base: Optional[str] + response_cost: Optional[str] + litellm_overhead_time_ms: Optional[float] + additional_headers: Optional[StandardLoggingAdditionalHeaders] + batch_models: Optional[List[str]] + litellm_model_name: Optional[str] # the model name sent to the provider by litellm + + +class StandardLoggingModelInformation(TypedDict): + model_map_key: str + model_map_value: Optional[ModelInfo] + + +class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False): + """ + Debug information, if cost tracking fails. + + Avoid logging sensitive information like response or optional params + """ + + error_str: Required[str] + traceback_str: Required[str] + model: str + cache_hit: Optional[bool] + custom_llm_provider: Optional[str] + base_model: Optional[str] + call_type: str + custom_pricing: Optional[bool] + + +class StandardLoggingPayloadErrorInformation(TypedDict, total=False): + error_code: Optional[str] + error_class: Optional[str] + llm_provider: Optional[str] + traceback: Optional[str] + error_message: Optional[str] + + +class StandardLoggingGuardrailInformation(TypedDict, total=False): + guardrail_name: Optional[str] + guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]] + guardrail_response: Optional[Union[dict, str]] + guardrail_status: Literal["success", "failure"] + + +StandardLoggingPayloadStatus = Literal["success", "failure"] + + +class StandardLoggingPayload(TypedDict): + id: str + trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries) + call_type: str + stream: Optional[bool] + response_cost: float + response_cost_failure_debug_info: Optional[ + StandardLoggingModelCostFailureDebugInformation + ] + status: StandardLoggingPayloadStatus + custom_llm_provider: Optional[str] + total_tokens: int + prompt_tokens: int + completion_tokens: int + startTime: float # Note: making this camelCase was a mistake, everything should be snake case + endTime: float + completionStartTime: float + response_time: float + model_map_information: StandardLoggingModelInformation + model: str + model_id: Optional[str] + model_group: Optional[str] + api_base: str + metadata: StandardLoggingMetadata + cache_hit: Optional[bool] + cache_key: Optional[str] + saved_cache_cost: float + request_tags: list + end_user: Optional[str] + requester_ip_address: Optional[str] + messages: Optional[Union[str, list, dict]] + response: Optional[Union[str, list, dict]] + error_str: Optional[str] + error_information: Optional[StandardLoggingPayloadErrorInformation] + model_parameters: dict + hidden_params: StandardLoggingHiddenParams + guardrail_information: Optional[StandardLoggingGuardrailInformation] + + +from typing import AsyncIterator, Iterator + + +class CustomStreamingDecoder: + async def aiter_bytes( + self, iterator: AsyncIterator[bytes] + ) -> AsyncIterator[ + Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]] + ]: + raise NotImplementedError + + def iter_bytes( + self, iterator: Iterator[bytes] + ) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]: + raise NotImplementedError + + +class StandardPassThroughResponseObject(TypedDict): + response: str + + +OPENAI_RESPONSE_HEADERS = [ + "x-ratelimit-remaining-requests", + "x-ratelimit-remaining-tokens", + "x-ratelimit-limit-requests", + "x-ratelimit-limit-tokens", + "x-ratelimit-reset-requests", + "x-ratelimit-reset-tokens", +] + + +class StandardCallbackDynamicParams(TypedDict, total=False): + # Langfuse dynamic params + langfuse_public_key: Optional[str] + langfuse_secret: Optional[str] + langfuse_secret_key: Optional[str] + langfuse_host: Optional[str] + + # GCS dynamic params + gcs_bucket_name: Optional[str] + gcs_path_service_account: Optional[str] + + # Langsmith dynamic params + langsmith_api_key: Optional[str] + langsmith_project: Optional[str] + langsmith_base_url: Optional[str] + + # Humanloop dynamic params + humanloop_api_key: Optional[str] + + # Arize dynamic params + arize_api_key: Optional[str] + arize_space_key: Optional[str] + + # Logging settings + turn_off_message_logging: Optional[bool] # when true will not log messages + + +all_litellm_params = [ + "metadata", + "litellm_metadata", + "litellm_trace_id", + "tags", + "acompletion", + "aimg_generation", + "atext_completion", + "text_completion", + "caching", + "mock_response", + "mock_timeout", + "disable_add_transform_inline_image_block", + "api_key", + "api_version", + "prompt_id", + "provider_specific_header", + "prompt_variables", + "api_base", + "force_timeout", + "logger_fn", + "verbose", + "custom_llm_provider", + "litellm_logging_obj", + "litellm_call_id", + "use_client", + "id", + "fallbacks", + "azure", + "headers", + "model_list", + "num_retries", + "context_window_fallback_dict", + "retry_policy", + "retry_strategy", + "roles", + "final_prompt_value", + "bos_token", + "eos_token", + "request_timeout", + "complete_response", + "self", + "client", + "rpm", + "tpm", + "max_parallel_requests", + "input_cost_per_token", + "output_cost_per_token", + "input_cost_per_second", + "output_cost_per_second", + "hf_model_name", + "model_info", + "proxy_server_request", + "preset_cache_key", + "caching_groups", + "ttl", + "cache", + "no-log", + "base_model", + "stream_timeout", + "supports_system_message", + "region_name", + "allowed_model_region", + "model_config", + "fastest_response", + "cooldown_time", + "cache_key", + "max_retries", + "azure_ad_token_provider", + "tenant_id", + "client_id", + "azure_username", + "azure_password", + "client_secret", + "user_continue_message", + "configurable_clientside_auth_params", + "weight", + "ensure_alternating_roles", + "assistant_continue_message", + "user_continue_message", + "fallback_depth", + "max_fallbacks", + "max_budget", + "budget_duration", + "use_in_pass_through", + "merge_reasoning_content_in_choices", + "litellm_credential_name", +] + list(StandardCallbackDynamicParams.__annotations__.keys()) + + +class KeyGenerationConfig(TypedDict, total=False): + required_params: List[ + str + ] # specify params that must be present in the key generation request + + +class TeamUIKeyGenerationConfig(KeyGenerationConfig): + allowed_team_member_roles: List[str] + + +class PersonalUIKeyGenerationConfig(KeyGenerationConfig): + allowed_user_roles: List[str] + + +class StandardKeyGenerationConfig(TypedDict, total=False): + team_key_generation: TeamUIKeyGenerationConfig + personal_key_generation: PersonalUIKeyGenerationConfig + + +class BudgetConfig(BaseModel): + max_budget: Optional[float] = None + budget_duration: Optional[str] = None + tpm_limit: Optional[int] = None + rpm_limit: Optional[int] = None + + def __init__(self, **data: Any) -> None: + # Map time_period to budget_duration if present + if "time_period" in data: + data["budget_duration"] = data.pop("time_period") + + # Map budget_limit to max_budget if present + if "budget_limit" in data: + data["max_budget"] = data.pop("budget_limit") + + super().__init__(**data) + + +GenericBudgetConfigType = Dict[str, BudgetConfig] + + +class LlmProviders(str, Enum): + OPENAI = "openai" + OPENAI_LIKE = "openai_like" # embedding only + JINA_AI = "jina_ai" + XAI = "xai" + CUSTOM_OPENAI = "custom_openai" + TEXT_COMPLETION_OPENAI = "text-completion-openai" + COHERE = "cohere" + COHERE_CHAT = "cohere_chat" + CLARIFAI = "clarifai" + ANTHROPIC = "anthropic" + ANTHROPIC_TEXT = "anthropic_text" + REPLICATE = "replicate" + HUGGINGFACE = "huggingface" + TOGETHER_AI = "together_ai" + OPENROUTER = "openrouter" + VERTEX_AI = "vertex_ai" + VERTEX_AI_BETA = "vertex_ai_beta" + GEMINI = "gemini" + AI21 = "ai21" + BASETEN = "baseten" + AZURE = "azure" + AZURE_TEXT = "azure_text" + AZURE_AI = "azure_ai" + SAGEMAKER = "sagemaker" + SAGEMAKER_CHAT = "sagemaker_chat" + BEDROCK = "bedrock" + VLLM = "vllm" + NLP_CLOUD = "nlp_cloud" + PETALS = "petals" + OOBABOOGA = "oobabooga" + OLLAMA = "ollama" + OLLAMA_CHAT = "ollama_chat" + DEEPINFRA = "deepinfra" + PERPLEXITY = "perplexity" + MISTRAL = "mistral" + GROQ = "groq" + NVIDIA_NIM = "nvidia_nim" + CEREBRAS = "cerebras" + AI21_CHAT = "ai21_chat" + VOLCENGINE = "volcengine" + CODESTRAL = "codestral" + TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" + DEEPSEEK = "deepseek" + SAMBANOVA = "sambanova" + MARITALK = "maritalk" + VOYAGE = "voyage" + CLOUDFLARE = "cloudflare" + XINFERENCE = "xinference" + FIREWORKS_AI = "fireworks_ai" + FRIENDLIAI = "friendliai" + WATSONX = "watsonx" + WATSONX_TEXT = "watsonx_text" + TRITON = "triton" + PREDIBASE = "predibase" + DATABRICKS = "databricks" + EMPOWER = "empower" + GITHUB = "github" + CUSTOM = "custom" + LITELLM_PROXY = "litellm_proxy" + HOSTED_VLLM = "hosted_vllm" + LM_STUDIO = "lm_studio" + GALADRIEL = "galadriel" + INFINITY = "infinity" + DEEPGRAM = "deepgram" + AIOHTTP_OPENAI = "aiohttp_openai" + LANGFUSE = "langfuse" + HUMANLOOP = "humanloop" + TOPAZ = "topaz" + ASSEMBLYAI = "assemblyai" + SNOWFLAKE = "snowflake" + + +# Create a set of all provider values for quick lookup +LlmProvidersSet = {provider.value for provider in LlmProviders} + + +class LiteLLMLoggingBaseClass: + """ + Base class for logging pre and post call + + Meant to simplify type checking for logging obj. + """ + + def pre_call(self, input, api_key, model=None, additional_args={}): + pass + + def post_call( + self, original_response, input=None, api_key=None, additional_args={} + ): + pass + + +class CustomHuggingfaceTokenizer(TypedDict): + identifier: str + revision: str # usually 'main' + auth_token: Optional[str] + + +class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum): + """ + Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint. + """ + + OPENAI = LlmProviders.OPENAI.value + TOPAZ = LlmProviders.TOPAZ.value + + +class HttpHandlerRequestFields(TypedDict, total=False): + data: dict # request body + params: dict # query params + files: dict # file uploads + content: Any # raw content + + +class ProviderSpecificHeader(TypedDict): + custom_llm_provider: str + extra_headers: dict + + +class SelectTokenizerResponse(TypedDict): + type: Literal["openai_tokenizer", "huggingface_tokenizer"] + tokenizer: Any + + +class LiteLLMBatch(Batch): + _hidden_params: dict = {} + usage: Optional[Usage] = None + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def json(self, **kwargs): # type: ignore + try: + return self.model_dump() # noqa + except Exception: + # if using pydantic v1 + return self.dict() + + +class RawRequestTypedDict(TypedDict, total=False): + raw_request_api_base: Optional[str] + raw_request_body: Optional[dict] + raw_request_headers: Optional[dict] + error: Optional[str] + + +class CredentialBase(BaseModel): + credential_name: str + credential_info: dict + + +class CredentialItem(CredentialBase): + credential_values: dict + + +class CreateCredentialItem(CredentialBase): + credential_values: Optional[dict] = None + model_id: Optional[str] = None + + @model_validator(mode="before") + @classmethod + def check_credential_params(cls, values): + if not values.get("credential_values") and not values.get("model_id"): + raise ValueError("Either credential_values or model_id must be set") + return values |