aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/openai
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/openai
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/openai')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_audio_transformation.py48
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_transformation.py405
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_handler.py3
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_transformation.py156
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/common_utils.py208
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/handler.py319
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/transformation.py158
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/utils.py50
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py120
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/fine_tuning/handler.py282
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/handler.py244
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/transformation.py82
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/openai.py2870
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/realtime/handler.py73
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/responses/transformation.py216
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/transcriptions/handler.py210
16 files changed, 5444 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_audio_transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_audio_transformation.py
new file mode 100644
index 00000000..581ffea2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_audio_transformation.py
@@ -0,0 +1,48 @@
+"""
+Support for GPT-4o audio Family
+
+OpenAI Doc: https://platform.openai.com/docs/guides/audio/quickstart?audio-generation-quickstart-example=audio-in&lang=python
+"""
+
+import litellm
+
+from .gpt_transformation import OpenAIGPTConfig
+
+
+class OpenAIGPTAudioConfig(OpenAIGPTConfig):
+ """
+ Reference: https://platform.openai.com/docs/guides/audio
+ """
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ Get the supported OpenAI params for the `gpt-audio` models
+
+ """
+
+ all_openai_params = super().get_supported_openai_params(model=model)
+ audio_specific_params = ["audio"]
+ return all_openai_params + audio_specific_params
+
+ def is_model_gpt_audio_model(self, model: str) -> bool:
+ if model in litellm.open_ai_chat_completion_models and "audio" in model:
+ return True
+ return False
+
+ def _map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ) -> dict:
+ return super()._map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model=model,
+ drop_params=drop_params,
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_transformation.py
new file mode 100644
index 00000000..c765f979
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/gpt_transformation.py
@@ -0,0 +1,405 @@
+"""
+Support for gpt model family
+"""
+
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ AsyncIterator,
+ Iterator,
+ List,
+ Optional,
+ Union,
+ cast,
+)
+
+import httpx
+
+import litellm
+from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+ AllMessageValues,
+ ChatCompletionImageObject,
+ ChatCompletionImageUrlObject,
+)
+from litellm.types.utils import ModelResponse, ModelResponseStream
+from litellm.utils import convert_to_model_response_object
+
+from ..common_utils import OpenAIError
+
+if TYPE_CHECKING:
+ from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+ LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+ LiteLLMLoggingObj = Any
+
+
+class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
+ """
+ Reference: https://platform.openai.com/docs/api-reference/chat/create
+
+ The class `OpenAIConfig` provides configuration for the OpenAI's Chat API interface. Below are the parameters:
+
+ - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
+
+ - `function_call` (string or object): This optional parameter controls how the model calls functions.
+
+ - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
+
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+
+ - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
+
+ - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
+
+ - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
+
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+
+ - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
+
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+ """
+
+ frequency_penalty: Optional[int] = None
+ function_call: Optional[Union[str, dict]] = None
+ functions: Optional[list] = None
+ logit_bias: Optional[dict] = None
+ max_tokens: Optional[int] = None
+ n: Optional[int] = None
+ presence_penalty: Optional[int] = None
+ stop: Optional[Union[str, list]] = None
+ temperature: Optional[int] = None
+ top_p: Optional[int] = None
+ response_format: Optional[dict] = None
+
+ def __init__(
+ self,
+ frequency_penalty: Optional[int] = None,
+ function_call: Optional[Union[str, dict]] = None,
+ functions: Optional[list] = None,
+ logit_bias: Optional[dict] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[int] = None,
+ stop: Optional[Union[str, list]] = None,
+ temperature: Optional[int] = None,
+ top_p: Optional[int] = None,
+ response_format: Optional[dict] = None,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def get_supported_openai_params(self, model: str) -> list:
+ base_params = [
+ "frequency_penalty",
+ "logit_bias",
+ "logprobs",
+ "top_logprobs",
+ "max_tokens",
+ "max_completion_tokens",
+ "modalities",
+ "prediction",
+ "n",
+ "presence_penalty",
+ "seed",
+ "stop",
+ "stream",
+ "stream_options",
+ "temperature",
+ "top_p",
+ "tools",
+ "tool_choice",
+ "function_call",
+ "functions",
+ "max_retries",
+ "extra_headers",
+ "parallel_tool_calls",
+ "audio",
+ ] # works across all models
+
+ model_specific_params = []
+ if (
+ model != "gpt-3.5-turbo-16k" and model != "gpt-4"
+ ): # gpt-4 does not support 'response_format'
+ model_specific_params.append("response_format")
+
+ if (
+ model in litellm.open_ai_chat_completion_models
+ ) or model in litellm.open_ai_text_completion_models:
+ model_specific_params.append(
+ "user"
+ ) # user is not a param supported by all openai-compatible endpoints - e.g. azure ai
+ return base_params + model_specific_params
+
+ def _map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ) -> dict:
+ """
+ If any supported_openai_params are in non_default_params, add them to optional_params, so they are use in API call
+
+ Args:
+ non_default_params (dict): Non-default parameters to filter.
+ optional_params (dict): Optional parameters to update.
+ model (str): Model name for parameter support check.
+
+ Returns:
+ dict: Updated optional_params with supported non-default parameters.
+ """
+ supported_openai_params = self.get_supported_openai_params(model)
+ for param, value in non_default_params.items():
+ if param in supported_openai_params:
+ optional_params[param] = value
+ return optional_params
+
+ def map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ) -> dict:
+ return self._map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model=model,
+ drop_params=drop_params,
+ )
+
+ def _transform_messages(
+ self, messages: List[AllMessageValues], model: str
+ ) -> List[AllMessageValues]:
+ """OpenAI no longer supports image_url as a string, so we need to convert it to a dict"""
+ for message in messages:
+ message_content = message.get("content")
+ if message_content and isinstance(message_content, list):
+ for content_item in message_content:
+ if content_item.get("type") == "image_url":
+ content_item = cast(ChatCompletionImageObject, content_item)
+ if isinstance(content_item["image_url"], str):
+ content_item["image_url"] = {
+ "url": content_item["image_url"],
+ }
+ elif isinstance(content_item["image_url"], dict):
+ litellm_specific_params = {"format"}
+ new_image_url_obj = ChatCompletionImageUrlObject(
+ **{ # type: ignore
+ k: v
+ for k, v in content_item["image_url"].items()
+ if k not in litellm_specific_params
+ }
+ )
+ content_item["image_url"] = new_image_url_obj
+ return messages
+
+ def transform_request(
+ self,
+ model: str,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ litellm_params: dict,
+ headers: dict,
+ ) -> dict:
+ """
+ Transform the overall request to be sent to the API.
+
+ Returns:
+ dict: The transformed request. Sent as the body of the API call.
+ """
+ messages = self._transform_messages(messages=messages, model=model)
+ return {
+ "model": model,
+ "messages": messages,
+ **optional_params,
+ }
+
+ def transform_response(
+ self,
+ model: str,
+ raw_response: httpx.Response,
+ model_response: ModelResponse,
+ logging_obj: LiteLLMLoggingObj,
+ request_data: dict,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ litellm_params: dict,
+ encoding: Any,
+ api_key: Optional[str] = None,
+ json_mode: Optional[bool] = None,
+ ) -> ModelResponse:
+ """
+ Transform the response from the API.
+
+ Returns:
+ dict: The transformed response.
+ """
+
+ ## LOGGING
+ logging_obj.post_call(
+ input=messages,
+ api_key=api_key,
+ original_response=raw_response.text,
+ additional_args={"complete_input_dict": request_data},
+ )
+
+ ## RESPONSE OBJECT
+ try:
+ completion_response = raw_response.json()
+ except Exception as e:
+ response_headers = getattr(raw_response, "headers", None)
+ raise OpenAIError(
+ message="Unable to get json response - {}, Original Response: {}".format(
+ str(e), raw_response.text
+ ),
+ status_code=raw_response.status_code,
+ headers=response_headers,
+ )
+ raw_response_headers = dict(raw_response.headers)
+ final_response_obj = convert_to_model_response_object(
+ response_object=completion_response,
+ model_response_object=model_response,
+ hidden_params={"headers": raw_response_headers},
+ _response_headers=raw_response_headers,
+ )
+
+ return cast(ModelResponse, final_response_obj)
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ return OpenAIError(
+ status_code=status_code,
+ message=error_message,
+ headers=cast(httpx.Headers, headers),
+ )
+
+ def get_complete_url(
+ self,
+ api_base: Optional[str],
+ model: str,
+ optional_params: dict,
+ litellm_params: dict,
+ stream: Optional[bool] = None,
+ ) -> str:
+ """
+ Get the complete URL for the API call.
+
+ Returns:
+ str: The complete URL for the API call.
+ """
+ if api_base is None:
+ api_base = "https://api.openai.com"
+ endpoint = "chat/completions"
+
+ # Remove trailing slash from api_base if present
+ api_base = api_base.rstrip("/")
+
+ # Check if endpoint is already in the api_base
+ if endpoint in api_base:
+ return api_base
+
+ return f"{api_base}/{endpoint}"
+
+ def validate_environment(
+ self,
+ headers: dict,
+ model: str,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ ) -> dict:
+ if api_key is not None:
+ headers["Authorization"] = f"Bearer {api_key}"
+
+ # Ensure Content-Type is set to application/json
+ if "content-type" not in headers and "Content-Type" not in headers:
+ headers["Content-Type"] = "application/json"
+
+ return headers
+
+ def get_models(
+ self, api_key: Optional[str] = None, api_base: Optional[str] = None
+ ) -> List[str]:
+ """
+ Calls OpenAI's `/v1/models` endpoint and returns the list of models.
+ """
+
+ if api_base is None:
+ api_base = "https://api.openai.com"
+ if api_key is None:
+ api_key = get_secret_str("OPENAI_API_KEY")
+
+ response = litellm.module_level_client.get(
+ url=f"{api_base}/v1/models",
+ headers={"Authorization": f"Bearer {api_key}"},
+ )
+
+ if response.status_code != 200:
+ raise Exception(f"Failed to get models: {response.text}")
+
+ models = response.json()["data"]
+ return [model["id"] for model in models]
+
+ @staticmethod
+ def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+ return (
+ api_key
+ or litellm.api_key
+ or litellm.openai_key
+ or get_secret_str("OPENAI_API_KEY")
+ )
+
+ @staticmethod
+ def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
+ return (
+ api_base
+ or litellm.api_base
+ or get_secret_str("OPENAI_API_BASE")
+ or "https://api.openai.com/v1"
+ )
+
+ @staticmethod
+ def get_base_model(model: str) -> str:
+ return model
+
+ def get_model_response_iterator(
+ self,
+ streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
+ sync_stream: bool,
+ json_mode: Optional[bool] = False,
+ ) -> Any:
+ return OpenAIChatCompletionStreamingHandler(
+ streaming_response=streaming_response,
+ sync_stream=sync_stream,
+ json_mode=json_mode,
+ )
+
+
+class OpenAIChatCompletionStreamingHandler(BaseModelResponseIterator):
+
+ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
+ try:
+ return ModelResponseStream(
+ id=chunk["id"],
+ object="chat.completion.chunk",
+ created=chunk["created"],
+ model=chunk["model"],
+ choices=chunk["choices"],
+ )
+ except Exception as e:
+ raise e
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_handler.py
new file mode 100644
index 00000000..d141498c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_handler.py
@@ -0,0 +1,3 @@
+"""
+LLM Calling done in `openai/openai.py`
+"""
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_transformation.py
new file mode 100644
index 00000000..b2ffda6e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/chat/o_series_transformation.py
@@ -0,0 +1,156 @@
+"""
+Support for o1/o3 model family
+
+https://platform.openai.com/docs/guides/reasoning
+
+Translations handled by LiteLLM:
+- modalities: image => drop param (if user opts in to dropping param)
+- role: system ==> translate to role 'user'
+- streaming => faked by LiteLLM
+- Tools, response_format => drop param (if user opts in to dropping param)
+- Logprobs => drop param (if user opts in to dropping param)
+"""
+
+from typing import List, Optional
+
+import litellm
+from litellm import verbose_logger
+from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
+from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
+from litellm.utils import (
+ supports_function_calling,
+ supports_parallel_function_calling,
+ supports_response_schema,
+ supports_system_messages,
+)
+
+from .gpt_transformation import OpenAIGPTConfig
+
+
+class OpenAIOSeriesConfig(OpenAIGPTConfig):
+ """
+ Reference: https://platform.openai.com/docs/guides/reasoning
+ """
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def translate_developer_role_to_system_role(
+ self, messages: List[AllMessageValues]
+ ) -> List[AllMessageValues]:
+ """
+ O-series models support `developer` role.
+ """
+ return messages
+
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ Get the supported OpenAI params for the given model
+
+ """
+
+ all_openai_params = super().get_supported_openai_params(model=model)
+ non_supported_params = [
+ "logprobs",
+ "top_p",
+ "presence_penalty",
+ "frequency_penalty",
+ "top_logprobs",
+ ]
+
+ o_series_only_param = ["reasoning_effort"]
+
+ all_openai_params.extend(o_series_only_param)
+
+ try:
+ model, custom_llm_provider, api_base, api_key = get_llm_provider(
+ model=model
+ )
+ except Exception:
+ verbose_logger.debug(
+ f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
+ )
+ custom_llm_provider = "openai"
+
+ _supports_function_calling = supports_function_calling(
+ model, custom_llm_provider
+ )
+ _supports_response_schema = supports_response_schema(model, custom_llm_provider)
+ _supports_parallel_tool_calls = supports_parallel_function_calling(
+ model, custom_llm_provider
+ )
+
+ if not _supports_function_calling:
+ non_supported_params.append("tools")
+ non_supported_params.append("tool_choice")
+ non_supported_params.append("function_call")
+ non_supported_params.append("functions")
+
+ if not _supports_parallel_tool_calls:
+ non_supported_params.append("parallel_tool_calls")
+
+ if not _supports_response_schema:
+ non_supported_params.append("response_format")
+
+ return [
+ param for param in all_openai_params if param not in non_supported_params
+ ]
+
+ def map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ):
+ if "max_tokens" in non_default_params:
+ optional_params["max_completion_tokens"] = non_default_params.pop(
+ "max_tokens"
+ )
+ if "temperature" in non_default_params:
+ temperature_value: Optional[float] = non_default_params.pop("temperature")
+ if temperature_value is not None:
+ if temperature_value == 1:
+ optional_params["temperature"] = temperature_value
+ else:
+ ## UNSUPPORTED TOOL CHOICE VALUE
+ if litellm.drop_params is True or drop_params is True:
+ pass
+ else:
+ raise litellm.utils.UnsupportedParamsError(
+ message="O-series models don't support temperature={}. Only temperature=1 is supported. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format(
+ temperature_value
+ ),
+ status_code=400,
+ )
+
+ return super()._map_openai_params(
+ non_default_params, optional_params, model, drop_params
+ )
+
+ def is_model_o_series_model(self, model: str) -> bool:
+ if model in litellm.open_ai_chat_completion_models and (
+ "o1" in model or "o3" in model
+ ):
+ return True
+ return False
+
+ def _transform_messages(
+ self, messages: List[AllMessageValues], model: str
+ ) -> List[AllMessageValues]:
+ """
+ Handles limitations of O-1 model family.
+ - modalities: image => drop param (if user opts in to dropping param)
+ - role: system ==> translate to role 'user'
+ """
+ _supports_system_messages = supports_system_messages(model, "openai")
+ for i, message in enumerate(messages):
+ if message["role"] == "system" and not _supports_system_messages:
+ new_message = ChatCompletionUserMessage(
+ content=message["content"], role="user"
+ )
+ messages[i] = new_message # Replace the old message with the new one
+
+ messages = super()._transform_messages(messages, model)
+ return messages
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/common_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/common_utils.py
new file mode 100644
index 00000000..55da16d6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/common_utils.py
@@ -0,0 +1,208 @@
+"""
+Common helpers / utils across al OpenAI endpoints
+"""
+
+import hashlib
+import json
+from typing import Any, Dict, List, Literal, Optional, Union
+
+import httpx
+import openai
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+
+import litellm
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
+
+
+class OpenAIError(BaseLLMException):
+ def __init__(
+ self,
+ status_code: int,
+ message: str,
+ request: Optional[httpx.Request] = None,
+ response: Optional[httpx.Response] = None,
+ headers: Optional[Union[dict, httpx.Headers]] = None,
+ body: Optional[dict] = None,
+ ):
+ self.status_code = status_code
+ self.message = message
+ self.headers = headers
+ if request:
+ self.request = request
+ else:
+ self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+ if response:
+ self.response = response
+ else:
+ self.response = httpx.Response(
+ status_code=status_code, request=self.request
+ )
+ super().__init__(
+ status_code=status_code,
+ message=self.message,
+ headers=self.headers,
+ request=self.request,
+ response=self.response,
+ body=body,
+ )
+
+
+####### Error Handling Utils for OpenAI API #######################
+###################################################################
+def drop_params_from_unprocessable_entity_error(
+ e: Union[openai.UnprocessableEntityError, httpx.HTTPStatusError],
+ data: Dict[str, Any],
+) -> Dict[str, Any]:
+ """
+ Helper function to read OpenAI UnprocessableEntityError and drop the params that raised an error from the error message.
+
+ Args:
+ e (UnprocessableEntityError): The UnprocessableEntityError exception
+ data (Dict[str, Any]): The original data dictionary containing all parameters
+
+ Returns:
+ Dict[str, Any]: A new dictionary with invalid parameters removed
+ """
+ invalid_params: List[str] = []
+ if isinstance(e, httpx.HTTPStatusError):
+ error_json = e.response.json()
+ error_message = error_json.get("error", {})
+ error_body = error_message
+ else:
+ error_body = e.body
+ if (
+ error_body is not None
+ and isinstance(error_body, dict)
+ and error_body.get("message")
+ ):
+ message = error_body.get("message", {})
+ if isinstance(message, str):
+ try:
+ message = json.loads(message)
+ except json.JSONDecodeError:
+ message = {"detail": message}
+ detail = message.get("detail")
+
+ if isinstance(detail, List) and len(detail) > 0 and isinstance(detail[0], dict):
+ for error_dict in detail:
+ if (
+ error_dict.get("loc")
+ and isinstance(error_dict.get("loc"), list)
+ and len(error_dict.get("loc")) == 2
+ ):
+ invalid_params.append(error_dict["loc"][1])
+
+ new_data = {k: v for k, v in data.items() if k not in invalid_params}
+
+ return new_data
+
+
+class BaseOpenAILLM:
+ """
+ Base class for OpenAI LLMs for getting their httpx clients and SSL verification settings
+ """
+
+ @staticmethod
+ def get_cached_openai_client(
+ client_initialization_params: dict, client_type: Literal["openai", "azure"]
+ ) -> Optional[Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]]:
+ """Retrieves the OpenAI client from the in-memory cache based on the client initialization parameters"""
+ _cache_key = BaseOpenAILLM.get_openai_client_cache_key(
+ client_initialization_params=client_initialization_params,
+ client_type=client_type,
+ )
+ _cached_client = litellm.in_memory_llm_clients_cache.get_cache(_cache_key)
+ return _cached_client
+
+ @staticmethod
+ def set_cached_openai_client(
+ openai_client: Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI],
+ client_type: Literal["openai", "azure"],
+ client_initialization_params: dict,
+ ):
+ """Stores the OpenAI client in the in-memory cache for _DEFAULT_TTL_FOR_HTTPX_CLIENTS SECONDS"""
+ _cache_key = BaseOpenAILLM.get_openai_client_cache_key(
+ client_initialization_params=client_initialization_params,
+ client_type=client_type,
+ )
+ litellm.in_memory_llm_clients_cache.set_cache(
+ key=_cache_key,
+ value=openai_client,
+ ttl=_DEFAULT_TTL_FOR_HTTPX_CLIENTS,
+ )
+
+ @staticmethod
+ def get_openai_client_cache_key(
+ client_initialization_params: dict, client_type: Literal["openai", "azure"]
+ ) -> str:
+ """Creates a cache key for the OpenAI client based on the client initialization parameters"""
+ hashed_api_key = None
+ if client_initialization_params.get("api_key") is not None:
+ hash_object = hashlib.sha256(
+ client_initialization_params.get("api_key", "").encode()
+ )
+ # Hexadecimal representation of the hash
+ hashed_api_key = hash_object.hexdigest()
+
+ # Create a more readable cache key using a list of key-value pairs
+ key_parts = [
+ f"hashed_api_key={hashed_api_key}",
+ f"is_async={client_initialization_params.get('is_async')}",
+ ]
+
+ LITELLM_CLIENT_SPECIFIC_PARAMS = [
+ "timeout",
+ "max_retries",
+ "organization",
+ "api_base",
+ ]
+ openai_client_fields = (
+ BaseOpenAILLM.get_openai_client_initialization_param_fields(
+ client_type=client_type
+ )
+ + LITELLM_CLIENT_SPECIFIC_PARAMS
+ )
+
+ for param in openai_client_fields:
+ key_parts.append(f"{param}={client_initialization_params.get(param)}")
+
+ _cache_key = ",".join(key_parts)
+ return _cache_key
+
+ @staticmethod
+ def get_openai_client_initialization_param_fields(
+ client_type: Literal["openai", "azure"]
+ ) -> List[str]:
+ """Returns a list of fields that are used to initialize the OpenAI client"""
+ import inspect
+
+ from openai import AzureOpenAI, OpenAI
+
+ if client_type == "openai":
+ signature = inspect.signature(OpenAI.__init__)
+ else:
+ signature = inspect.signature(AzureOpenAI.__init__)
+
+ # Extract parameter names, excluding 'self'
+ param_names = [param for param in signature.parameters if param != "self"]
+ return param_names
+
+ @staticmethod
+ def _get_async_http_client() -> Optional[httpx.AsyncClient]:
+ if litellm.aclient_session is not None:
+ return litellm.aclient_session
+
+ return httpx.AsyncClient(
+ limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100),
+ verify=litellm.ssl_verify,
+ )
+
+ @staticmethod
+ def _get_sync_http_client() -> Optional[httpx.Client]:
+ if litellm.client_session is not None:
+ return litellm.client_session
+ return httpx.Client(
+ limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100),
+ verify=litellm.ssl_verify,
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/handler.py
new file mode 100644
index 00000000..2e60f55b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/handler.py
@@ -0,0 +1,319 @@
+import json
+from typing import Callable, List, Optional, Union
+
+from openai import AsyncOpenAI, OpenAI
+
+import litellm
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+from litellm.llms.base import BaseLLM
+from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
+from litellm.types.utils import LlmProviders, ModelResponse, TextCompletionResponse
+from litellm.utils import ProviderConfigManager
+
+from ..common_utils import OpenAIError
+from .transformation import OpenAITextCompletionConfig
+
+
+class OpenAITextCompletion(BaseLLM):
+ openai_text_completion_global_config = OpenAITextCompletionConfig()
+
+ def __init__(self) -> None:
+ super().__init__()
+
+ def validate_environment(self, api_key):
+ headers = {
+ "content-type": "application/json",
+ }
+ if api_key:
+ headers["Authorization"] = f"Bearer {api_key}"
+ return headers
+
+ def completion(
+ self,
+ model_response: ModelResponse,
+ api_key: str,
+ model: str,
+ messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+ timeout: float,
+ custom_llm_provider: str,
+ logging_obj: LiteLLMLoggingObj,
+ optional_params: dict,
+ print_verbose: Optional[Callable] = None,
+ api_base: Optional[str] = None,
+ acompletion: bool = False,
+ litellm_params=None,
+ logger_fn=None,
+ client=None,
+ organization: Optional[str] = None,
+ headers: Optional[dict] = None,
+ ):
+ try:
+ if headers is None:
+ headers = self.validate_environment(api_key=api_key)
+ if model is None or messages is None:
+ raise OpenAIError(status_code=422, message="Missing model or messages")
+
+ # don't send max retries to the api, if set
+
+ provider_config = ProviderConfigManager.get_provider_text_completion_config(
+ model=model,
+ provider=LlmProviders(custom_llm_provider),
+ )
+
+ data = provider_config.transform_text_completion_request(
+ model=model,
+ messages=messages,
+ optional_params=optional_params,
+ headers=headers,
+ )
+ max_retries = data.pop("max_retries", 2)
+ ## LOGGING
+ logging_obj.pre_call(
+ input=messages,
+ api_key=api_key,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ "complete_input_dict": data,
+ },
+ )
+ if acompletion is True:
+ if optional_params.get("stream", False):
+ return self.async_streaming(
+ logging_obj=logging_obj,
+ api_base=api_base,
+ api_key=api_key,
+ data=data,
+ headers=headers,
+ model_response=model_response,
+ model=model,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ organization=organization,
+ )
+ else:
+ return self.acompletion(api_base=api_base, data=data, headers=headers, model_response=model_response, api_key=api_key, logging_obj=logging_obj, model=model, timeout=timeout, max_retries=max_retries, organization=organization, client=client) # type: ignore
+ elif optional_params.get("stream", False):
+ return self.streaming(
+ logging_obj=logging_obj,
+ api_base=api_base,
+ api_key=api_key,
+ data=data,
+ headers=headers,
+ model_response=model_response,
+ model=model,
+ timeout=timeout,
+ max_retries=max_retries, # type: ignore
+ client=client,
+ organization=organization,
+ )
+ else:
+ if client is None:
+ openai_client = OpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=litellm.client_session,
+ timeout=timeout,
+ max_retries=max_retries, # type: ignore
+ organization=organization,
+ )
+ else:
+ openai_client = client
+
+ raw_response = openai_client.completions.with_raw_response.create(**data) # type: ignore
+ response = raw_response.parse()
+ response_json = response.model_dump()
+
+ ## LOGGING
+ logging_obj.post_call(
+ api_key=api_key,
+ original_response=response_json,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ },
+ )
+
+ ## RESPONSE OBJECT
+ return TextCompletionResponse(**response_json)
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ async def acompletion(
+ self,
+ logging_obj,
+ api_base: str,
+ data: dict,
+ headers: dict,
+ model_response: ModelResponse,
+ api_key: str,
+ model: str,
+ timeout: float,
+ max_retries: int,
+ organization: Optional[str] = None,
+ client=None,
+ ):
+ try:
+ if client is None:
+ openai_aclient = AsyncOpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=litellm.aclient_session,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ )
+ else:
+ openai_aclient = client
+
+ raw_response = await openai_aclient.completions.with_raw_response.create(
+ **data
+ )
+ response = raw_response.parse()
+ response_json = response.model_dump()
+
+ ## LOGGING
+ logging_obj.post_call(
+ api_key=api_key,
+ original_response=response,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ },
+ )
+ ## RESPONSE OBJECT
+ response_obj = TextCompletionResponse(**response_json)
+ response_obj._hidden_params.original_response = json.dumps(response_json)
+ return response_obj
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ def streaming(
+ self,
+ logging_obj,
+ api_key: str,
+ data: dict,
+ headers: dict,
+ model_response: ModelResponse,
+ model: str,
+ timeout: float,
+ api_base: Optional[str] = None,
+ max_retries=None,
+ client=None,
+ organization=None,
+ ):
+
+ if client is None:
+ openai_client = OpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=litellm.client_session,
+ timeout=timeout,
+ max_retries=max_retries, # type: ignore
+ organization=organization,
+ )
+ else:
+ openai_client = client
+
+ try:
+ raw_response = openai_client.completions.with_raw_response.create(**data)
+ response = raw_response.parse()
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+ streamwrapper = CustomStreamWrapper(
+ completion_stream=response,
+ model=model,
+ custom_llm_provider="text-completion-openai",
+ logging_obj=logging_obj,
+ stream_options=data.get("stream_options", None),
+ )
+
+ try:
+ for chunk in streamwrapper:
+ yield chunk
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ async def async_streaming(
+ self,
+ logging_obj,
+ api_key: str,
+ data: dict,
+ headers: dict,
+ model_response: ModelResponse,
+ model: str,
+ timeout: float,
+ max_retries: int,
+ api_base: Optional[str] = None,
+ client=None,
+ organization=None,
+ ):
+ if client is None:
+ openai_client = AsyncOpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=litellm.aclient_session,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ )
+ else:
+ openai_client = client
+
+ raw_response = await openai_client.completions.with_raw_response.create(**data)
+ response = raw_response.parse()
+ streamwrapper = CustomStreamWrapper(
+ completion_stream=response,
+ model=model,
+ custom_llm_provider="text-completion-openai",
+ logging_obj=logging_obj,
+ stream_options=data.get("stream_options", None),
+ )
+
+ try:
+ async for transformed_chunk in streamwrapper:
+ yield transformed_chunk
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/transformation.py
new file mode 100644
index 00000000..1aef72d3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/transformation.py
@@ -0,0 +1,158 @@
+"""
+Support for gpt model family
+"""
+
+from typing import List, Optional, Union
+
+from litellm.llms.base_llm.completion.transformation import BaseTextCompletionConfig
+from litellm.types.llms.openai import AllMessageValues, OpenAITextCompletionUserMessage
+from litellm.types.utils import Choices, Message, ModelResponse, TextCompletionResponse
+
+from ..chat.gpt_transformation import OpenAIGPTConfig
+from .utils import _transform_prompt
+
+
+class OpenAITextCompletionConfig(BaseTextCompletionConfig, OpenAIGPTConfig):
+ """
+ Reference: https://platform.openai.com/docs/api-reference/completions/create
+
+ The class `OpenAITextCompletionConfig` provides configuration for the OpenAI's text completion API interface. Below are the parameters:
+
+ - `best_of` (integer or null): This optional parameter generates server-side completions and returns the one with the highest log probability per token.
+
+ - `echo` (boolean or null): This optional parameter will echo back the prompt in addition to the completion.
+
+ - `frequency_penalty` (number or null): Defaults to 0. It is a numbers from -2.0 to 2.0, where positive values decrease the model's likelihood to repeat the same line.
+
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+
+ - `logprobs` (integer or null): This optional parameter includes the log probabilities on the most likely tokens as well as the chosen tokens.
+
+ - `max_tokens` (integer or null): This optional parameter sets the maximum number of tokens to generate in the completion.
+
+ - `n` (integer or null): This optional parameter sets how many completions to generate for each prompt.
+
+ - `presence_penalty` (number or null): Defaults to 0 and can be between -2.0 and 2.0. Positive values increase the model's likelihood to talk about new topics.
+
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+
+ - `suffix` (string or null): Defines the suffix that comes after a completion of inserted text.
+
+ - `temperature` (number or null): This optional parameter defines the sampling temperature to use.
+
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+ """
+
+ best_of: Optional[int] = None
+ echo: Optional[bool] = None
+ frequency_penalty: Optional[int] = None
+ logit_bias: Optional[dict] = None
+ logprobs: Optional[int] = None
+ max_tokens: Optional[int] = None
+ n: Optional[int] = None
+ presence_penalty: Optional[int] = None
+ stop: Optional[Union[str, list]] = None
+ suffix: Optional[str] = None
+
+ def __init__(
+ self,
+ best_of: Optional[int] = None,
+ echo: Optional[bool] = None,
+ frequency_penalty: Optional[int] = None,
+ logit_bias: Optional[dict] = None,
+ logprobs: Optional[int] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[int] = None,
+ stop: Optional[Union[str, list]] = None,
+ suffix: Optional[str] = None,
+ temperature: Optional[float] = None,
+ top_p: Optional[float] = None,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def convert_to_chat_model_response_object(
+ self,
+ response_object: Optional[TextCompletionResponse] = None,
+ model_response_object: Optional[ModelResponse] = None,
+ ):
+ try:
+ ## RESPONSE OBJECT
+ if response_object is None or model_response_object is None:
+ raise ValueError("Error in response object format")
+ choice_list = []
+ for idx, choice in enumerate(response_object["choices"]):
+ message = Message(
+ content=choice["text"],
+ role="assistant",
+ )
+ choice = Choices(
+ finish_reason=choice["finish_reason"],
+ index=idx,
+ message=message,
+ logprobs=choice.get("logprobs", None),
+ )
+ choice_list.append(choice)
+ model_response_object.choices = choice_list
+
+ if "usage" in response_object:
+ setattr(model_response_object, "usage", response_object["usage"])
+
+ if "id" in response_object:
+ model_response_object.id = response_object["id"]
+
+ if "model" in response_object:
+ model_response_object.model = response_object["model"]
+
+ model_response_object._hidden_params["original_response"] = (
+ response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
+ )
+ return model_response_object
+ except Exception as e:
+ raise e
+
+ def get_supported_openai_params(self, model: str) -> List:
+ return [
+ "functions",
+ "function_call",
+ "temperature",
+ "top_p",
+ "n",
+ "stream",
+ "stream_options",
+ "stop",
+ "max_tokens",
+ "presence_penalty",
+ "frequency_penalty",
+ "logit_bias",
+ "user",
+ "response_format",
+ "seed",
+ "tools",
+ "tool_choice",
+ "max_retries",
+ "logprobs",
+ "top_logprobs",
+ "extra_headers",
+ ]
+
+ def transform_text_completion_request(
+ self,
+ model: str,
+ messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+ optional_params: dict,
+ headers: dict,
+ ) -> dict:
+ prompt = _transform_prompt(messages)
+ return {
+ "model": model,
+ "prompt": prompt,
+ **optional_params,
+ }
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/utils.py
new file mode 100644
index 00000000..8b3efb4c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/completion/utils.py
@@ -0,0 +1,50 @@
+from typing import List, Union, cast
+
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+ convert_content_list_to_str,
+)
+from litellm.types.llms.openai import (
+ AllMessageValues,
+ AllPromptValues,
+ OpenAITextCompletionUserMessage,
+)
+
+
+def is_tokens_or_list_of_tokens(value: List):
+ # Check if it's a list of integers (tokens)
+ if isinstance(value, list) and all(isinstance(item, int) for item in value):
+ return True
+ # Check if it's a list of lists of integers (list of tokens)
+ if isinstance(value, list) and all(
+ isinstance(item, list) and all(isinstance(i, int) for i in item)
+ for item in value
+ ):
+ return True
+ return False
+
+
+def _transform_prompt(
+ messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
+) -> AllPromptValues:
+ if len(messages) == 1: # base case
+ message_content = messages[0].get("content")
+ if (
+ message_content
+ and isinstance(message_content, list)
+ and is_tokens_or_list_of_tokens(message_content)
+ ):
+ openai_prompt: AllPromptValues = cast(AllPromptValues, message_content)
+ else:
+ openai_prompt = ""
+ content = convert_content_list_to_str(cast(AllMessageValues, messages[0]))
+ openai_prompt += content
+ else:
+ prompt_str_list: List[str] = []
+ for m in messages:
+ try: # expect list of int/list of list of int to be a 1 message array only.
+ content = convert_content_list_to_str(cast(AllMessageValues, m))
+ prompt_str_list.append(content)
+ except Exception as e:
+ raise e
+ openai_prompt = prompt_str_list
+ return openai_prompt
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
new file mode 100644
index 00000000..0c26fd74
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
@@ -0,0 +1,120 @@
+"""
+Helper util for handling openai-specific cost calculation
+- e.g.: prompt caching
+"""
+
+from typing import Literal, Optional, Tuple
+
+from litellm._logging import verbose_logger
+from litellm.types.utils import CallTypes, Usage
+from litellm.utils import get_model_info
+
+
+def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_second"]:
+ if call_type == CallTypes.atranscription or call_type == CallTypes.transcription:
+ return "cost_per_second"
+ else:
+ return "cost_per_token"
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+ """
+ Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+ Input:
+ - model: str, the model name without provider prefix
+ - usage: LiteLLM Usage block, containing anthropic caching information
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+ """
+ ## GET MODEL INFO
+ model_info = get_model_info(model=model, custom_llm_provider="openai")
+ ## CALCULATE INPUT COST
+ ### Non-cached text tokens
+ non_cached_text_tokens = usage.prompt_tokens
+ cached_tokens: Optional[int] = None
+ if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
+ cached_tokens = usage.prompt_tokens_details.cached_tokens
+ non_cached_text_tokens = non_cached_text_tokens - cached_tokens
+ prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
+ ## Prompt Caching cost calculation
+ if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
+ # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
+ prompt_cost += cached_tokens * (
+ model_info.get("cache_read_input_token_cost", 0) or 0
+ )
+
+ _audio_tokens: Optional[int] = (
+ usage.prompt_tokens_details.audio_tokens
+ if usage.prompt_tokens_details is not None
+ else None
+ )
+ _audio_cost_per_token: Optional[float] = model_info.get(
+ "input_cost_per_audio_token"
+ )
+ if _audio_tokens is not None and _audio_cost_per_token is not None:
+ audio_cost: float = _audio_tokens * _audio_cost_per_token
+ prompt_cost += audio_cost
+
+ ## CALCULATE OUTPUT COST
+ completion_cost: float = (
+ usage["completion_tokens"] * model_info["output_cost_per_token"]
+ )
+ _output_cost_per_audio_token: Optional[float] = model_info.get(
+ "output_cost_per_audio_token"
+ )
+ _output_audio_tokens: Optional[int] = (
+ usage.completion_tokens_details.audio_tokens
+ if usage.completion_tokens_details is not None
+ else None
+ )
+ if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
+ audio_cost = _output_audio_tokens * _output_cost_per_audio_token
+ completion_cost += audio_cost
+
+ return prompt_cost, completion_cost
+
+
+def cost_per_second(
+ model: str, custom_llm_provider: Optional[str], duration: float = 0.0
+) -> Tuple[float, float]:
+ """
+ Calculates the cost per second for a given model, prompt tokens, and completion tokens.
+
+ Input:
+ - model: str, the model name without provider prefix
+ - custom_llm_provider: str, the custom llm provider
+ - duration: float, the duration of the response in seconds
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+ """
+ ## GET MODEL INFO
+ model_info = get_model_info(
+ model=model, custom_llm_provider=custom_llm_provider or "openai"
+ )
+ prompt_cost = 0.0
+ completion_cost = 0.0
+ ## Speech / Audio cost calculation
+ if (
+ "output_cost_per_second" in model_info
+ and model_info["output_cost_per_second"] is not None
+ ):
+ verbose_logger.debug(
+ f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
+ )
+ ## COST PER SECOND ##
+ completion_cost = model_info["output_cost_per_second"] * duration
+ elif (
+ "input_cost_per_second" in model_info
+ and model_info["input_cost_per_second"] is not None
+ ):
+ verbose_logger.debug(
+ f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
+ )
+ ## COST PER SECOND ##
+ prompt_cost = model_info["input_cost_per_second"] * duration
+ completion_cost = 0.0
+
+ return prompt_cost, completion_cost
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/fine_tuning/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/fine_tuning/handler.py
new file mode 100644
index 00000000..97b237c7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/fine_tuning/handler.py
@@ -0,0 +1,282 @@
+from typing import Any, Coroutine, Optional, Union
+
+import httpx
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from openai.types.fine_tuning import FineTuningJob
+
+from litellm._logging import verbose_logger
+
+
+class OpenAIFineTuningAPI:
+ """
+ OpenAI methods to support for batches
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+
+ def get_openai_client(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None,
+ _is_async: bool = False,
+ api_version: Optional[str] = None,
+ litellm_params: Optional[dict] = None,
+ ) -> Optional[
+ Union[
+ OpenAI,
+ AsyncOpenAI,
+ AzureOpenAI,
+ AsyncAzureOpenAI,
+ ]
+ ]:
+ received_args = locals()
+ openai_client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None
+ if client is None:
+ data = {}
+ for k, v in received_args.items():
+ if k == "self" or k == "client" or k == "_is_async":
+ pass
+ elif k == "api_base" and v is not None:
+ data["base_url"] = v
+ elif v is not None:
+ data[k] = v
+ if _is_async is True:
+ openai_client = AsyncOpenAI(**data)
+ else:
+ openai_client = OpenAI(**data) # type: ignore
+ else:
+ openai_client = client
+
+ return openai_client
+
+ async def acreate_fine_tuning_job(
+ self,
+ create_fine_tuning_job_data: dict,
+ openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ ) -> FineTuningJob:
+ response = await openai_client.fine_tuning.jobs.create(
+ **create_fine_tuning_job_data
+ )
+ return response
+
+ def create_fine_tuning_job(
+ self,
+ _is_async: bool,
+ create_fine_tuning_job_data: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ api_version: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None,
+ ) -> Union[FineTuningJob, Coroutine[Any, Any, FineTuningJob]]:
+ openai_client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ api_version=api_version,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, (AsyncOpenAI, AsyncAzureOpenAI)):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.acreate_fine_tuning_job( # type: ignore
+ create_fine_tuning_job_data=create_fine_tuning_job_data,
+ openai_client=openai_client,
+ )
+ verbose_logger.debug(
+ "creating fine tuning job, args= %s", create_fine_tuning_job_data
+ )
+ response = openai_client.fine_tuning.jobs.create(**create_fine_tuning_job_data)
+ return response
+
+ async def acancel_fine_tuning_job(
+ self,
+ fine_tuning_job_id: str,
+ openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ ) -> FineTuningJob:
+ response = await openai_client.fine_tuning.jobs.cancel(
+ fine_tuning_job_id=fine_tuning_job_id
+ )
+ return response
+
+ def cancel_fine_tuning_job(
+ self,
+ _is_async: bool,
+ fine_tuning_job_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ api_version: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None,
+ ):
+ openai_client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ api_version=api_version,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, (AsyncOpenAI, AsyncAzureOpenAI)):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.acancel_fine_tuning_job( # type: ignore
+ fine_tuning_job_id=fine_tuning_job_id,
+ openai_client=openai_client,
+ )
+ verbose_logger.debug("canceling fine tuning job, args= %s", fine_tuning_job_id)
+ response = openai_client.fine_tuning.jobs.cancel(
+ fine_tuning_job_id=fine_tuning_job_id
+ )
+ return response
+
+ async def alist_fine_tuning_jobs(
+ self,
+ openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ ):
+ response = await openai_client.fine_tuning.jobs.list(after=after, limit=limit) # type: ignore
+ return response
+
+ def list_fine_tuning_jobs(
+ self,
+ _is_async: bool,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ api_version: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None,
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ ):
+ openai_client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ api_version=api_version,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, (AsyncOpenAI, AsyncAzureOpenAI)):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.alist_fine_tuning_jobs( # type: ignore
+ after=after,
+ limit=limit,
+ openai_client=openai_client,
+ )
+ verbose_logger.debug("list fine tuning job, after= %s, limit= %s", after, limit)
+ response = openai_client.fine_tuning.jobs.list(after=after, limit=limit) # type: ignore
+ return response
+
+ async def aretrieve_fine_tuning_job(
+ self,
+ fine_tuning_job_id: str,
+ openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
+ ) -> FineTuningJob:
+ response = await openai_client.fine_tuning.jobs.retrieve(
+ fine_tuning_job_id=fine_tuning_job_id
+ )
+ return response
+
+ def retrieve_fine_tuning_job(
+ self,
+ _is_async: bool,
+ fine_tuning_job_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ api_version: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = None,
+ ):
+ openai_client: Optional[
+ Union[OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI]
+ ] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ api_version=api_version,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.aretrieve_fine_tuning_job( # type: ignore
+ fine_tuning_job_id=fine_tuning_job_id,
+ openai_client=openai_client,
+ )
+ verbose_logger.debug("retrieving fine tuning job, id= %s", fine_tuning_job_id)
+ response = openai_client.fine_tuning.jobs.retrieve(
+ fine_tuning_job_id=fine_tuning_job_id
+ )
+ return response
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/handler.py
new file mode 100644
index 00000000..f738115a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/handler.py
@@ -0,0 +1,244 @@
+"""
+OpenAI Image Variations Handler
+"""
+
+from typing import Callable, Optional
+
+import httpx
+from openai import AsyncOpenAI, OpenAI
+
+import litellm
+from litellm.types.utils import FileTypes, ImageResponse, LlmProviders
+from litellm.utils import ProviderConfigManager
+
+from ...base_llm.image_variations.transformation import BaseImageVariationConfig
+from ...custom_httpx.llm_http_handler import LiteLLMLoggingObj
+from ..common_utils import OpenAIError
+
+
+class OpenAIImageVariationsHandler:
+ def get_sync_client(
+ self,
+ client: Optional[OpenAI],
+ init_client_params: dict,
+ ):
+ if client is None:
+ openai_client = OpenAI(
+ **init_client_params,
+ )
+ else:
+ openai_client = client
+ return openai_client
+
+ def get_async_client(
+ self, client: Optional[AsyncOpenAI], init_client_params: dict
+ ) -> AsyncOpenAI:
+ if client is None:
+ openai_client = AsyncOpenAI(
+ **init_client_params,
+ )
+ else:
+ openai_client = client
+ return openai_client
+
+ async def async_image_variations(
+ self,
+ api_key: str,
+ api_base: str,
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ data: dict,
+ headers: dict,
+ model: Optional[str],
+ timeout: float,
+ max_retries: int,
+ logging_obj: LiteLLMLoggingObj,
+ model_response: ImageResponse,
+ optional_params: dict,
+ litellm_params: dict,
+ image: FileTypes,
+ provider_config: BaseImageVariationConfig,
+ ) -> ImageResponse:
+ try:
+ init_client_params = {
+ "api_key": api_key,
+ "base_url": api_base,
+ "http_client": litellm.client_session,
+ "timeout": timeout,
+ "max_retries": max_retries, # type: ignore
+ "organization": organization,
+ }
+
+ client = self.get_async_client(
+ client=client, init_client_params=init_client_params
+ )
+
+ raw_response = await client.images.with_raw_response.create_variation(**data) # type: ignore
+ response = raw_response.parse()
+ response_json = response.model_dump()
+
+ ## LOGGING
+ logging_obj.post_call(
+ api_key=api_key,
+ original_response=response_json,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ },
+ )
+
+ ## RESPONSE OBJECT
+ return provider_config.transform_response_image_variation(
+ model=model,
+ model_response=ImageResponse(**response_json),
+ raw_response=httpx.Response(
+ status_code=200,
+ request=httpx.Request(
+ method="GET", url="https://litellm.ai"
+ ), # mock request object
+ ),
+ logging_obj=logging_obj,
+ request_data=data,
+ image=image,
+ optional_params=optional_params,
+ litellm_params=litellm_params,
+ encoding=None,
+ api_key=api_key,
+ )
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ def image_variations(
+ self,
+ model_response: ImageResponse,
+ api_key: str,
+ api_base: str,
+ model: Optional[str],
+ image: FileTypes,
+ timeout: float,
+ custom_llm_provider: str,
+ logging_obj: LiteLLMLoggingObj,
+ optional_params: dict,
+ litellm_params: dict,
+ print_verbose: Optional[Callable] = None,
+ logger_fn=None,
+ client=None,
+ organization: Optional[str] = None,
+ headers: Optional[dict] = None,
+ ) -> ImageResponse:
+ try:
+ provider_config = ProviderConfigManager.get_provider_image_variation_config(
+ model=model or "", # openai defaults to dall-e-2
+ provider=LlmProviders.OPENAI,
+ )
+
+ if provider_config is None:
+ raise ValueError(
+ f"image variation provider not found: {custom_llm_provider}."
+ )
+
+ max_retries = optional_params.pop("max_retries", 2)
+
+ data = provider_config.transform_request_image_variation(
+ model=model,
+ image=image,
+ optional_params=optional_params,
+ headers=headers or {},
+ )
+ json_data = data.get("data")
+ if not json_data:
+ raise ValueError(
+ f"data field is required, for openai image variations. Got={data}"
+ )
+ ## LOGGING
+ logging_obj.pre_call(
+ input="",
+ api_key=api_key,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ "complete_input_dict": data,
+ },
+ )
+ if litellm_params.get("async_call", False):
+ return self.async_image_variations(
+ api_base=api_base,
+ data=json_data,
+ headers=headers or {},
+ model_response=model_response,
+ api_key=api_key,
+ logging_obj=logging_obj,
+ model=model,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ provider_config=provider_config,
+ image=image,
+ optional_params=optional_params,
+ litellm_params=litellm_params,
+ ) # type: ignore
+
+ init_client_params = {
+ "api_key": api_key,
+ "base_url": api_base,
+ "http_client": litellm.client_session,
+ "timeout": timeout,
+ "max_retries": max_retries, # type: ignore
+ "organization": organization,
+ }
+
+ client = self.get_sync_client(
+ client=client, init_client_params=init_client_params
+ )
+
+ raw_response = client.images.with_raw_response.create_variation(**json_data) # type: ignore
+ response = raw_response.parse()
+ response_json = response.model_dump()
+
+ ## LOGGING
+ logging_obj.post_call(
+ api_key=api_key,
+ original_response=response_json,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ },
+ )
+
+ ## RESPONSE OBJECT
+ return provider_config.transform_response_image_variation(
+ model=model,
+ model_response=ImageResponse(**response_json),
+ raw_response=httpx.Response(
+ status_code=200,
+ request=httpx.Request(
+ method="GET", url="https://litellm.ai"
+ ), # mock request object
+ ),
+ logging_obj=logging_obj,
+ request_data=json_data,
+ image=image,
+ optional_params=optional_params,
+ litellm_params=litellm_params,
+ encoding=None,
+ api_key=api_key,
+ )
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/transformation.py
new file mode 100644
index 00000000..96d1a302
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/image_variations/transformation.py
@@ -0,0 +1,82 @@
+from typing import Any, List, Optional, Union
+
+from aiohttp import ClientResponse
+from httpx import Headers, Response
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.image_variations.transformation import LiteLLMLoggingObj
+from litellm.types.llms.openai import OpenAIImageVariationOptionalParams
+from litellm.types.utils import FileTypes, HttpHandlerRequestFields, ImageResponse
+
+from ...base_llm.image_variations.transformation import BaseImageVariationConfig
+from ..common_utils import OpenAIError
+
+
+class OpenAIImageVariationConfig(BaseImageVariationConfig):
+ def get_supported_openai_params(
+ self, model: str
+ ) -> List[OpenAIImageVariationOptionalParams]:
+ return ["n", "size", "response_format", "user"]
+
+ def map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ) -> dict:
+ optional_params.update(non_default_params)
+ return optional_params
+
+ def transform_request_image_variation(
+ self,
+ model: Optional[str],
+ image: FileTypes,
+ optional_params: dict,
+ headers: dict,
+ ) -> HttpHandlerRequestFields:
+ return {
+ "data": {
+ "image": image,
+ **optional_params,
+ }
+ }
+
+ async def async_transform_response_image_variation(
+ self,
+ model: Optional[str],
+ raw_response: ClientResponse,
+ model_response: ImageResponse,
+ logging_obj: LiteLLMLoggingObj,
+ request_data: dict,
+ image: FileTypes,
+ optional_params: dict,
+ litellm_params: dict,
+ encoding: Any,
+ api_key: Optional[str] = None,
+ ) -> ImageResponse:
+ return model_response
+
+ def transform_response_image_variation(
+ self,
+ model: Optional[str],
+ raw_response: Response,
+ model_response: ImageResponse,
+ logging_obj: LiteLLMLoggingObj,
+ request_data: dict,
+ image: FileTypes,
+ optional_params: dict,
+ litellm_params: dict,
+ encoding: Any,
+ api_key: Optional[str] = None,
+ ) -> ImageResponse:
+ return model_response
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, Headers]
+ ) -> BaseLLMException:
+ return OpenAIError(
+ status_code=status_code,
+ message=error_message,
+ headers=headers,
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/openai.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/openai.py
new file mode 100644
index 00000000..deb70b48
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/openai.py
@@ -0,0 +1,2870 @@
+import time
+import types
+from typing import (
+ Any,
+ AsyncIterator,
+ Callable,
+ Coroutine,
+ Iterable,
+ Iterator,
+ List,
+ Literal,
+ Optional,
+ Union,
+ cast,
+)
+from urllib.parse import urlparse
+
+import httpx
+import openai
+from openai import AsyncOpenAI, OpenAI
+from openai.types.beta.assistant_deleted import AssistantDeleted
+from openai.types.file_deleted import FileDeleted
+from pydantic import BaseModel
+from typing_extensions import overload
+
+import litellm
+from litellm import LlmProviders
+from litellm._logging import verbose_logger
+from litellm.constants import DEFAULT_MAX_RETRIES
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.logging_utils import track_llm_api_timing
+from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
+from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
+from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
+from litellm.types.utils import (
+ EmbeddingResponse,
+ ImageResponse,
+ LiteLLMBatch,
+ ModelResponse,
+ ModelResponseStream,
+)
+from litellm.utils import (
+ CustomStreamWrapper,
+ ProviderConfigManager,
+ convert_to_model_response_object,
+)
+
+from ...types.llms.openai import *
+from ..base import BaseLLM
+from .chat.o_series_transformation import OpenAIOSeriesConfig
+from .common_utils import (
+ BaseOpenAILLM,
+ OpenAIError,
+ drop_params_from_unprocessable_entity_error,
+)
+
+openaiOSeriesConfig = OpenAIOSeriesConfig()
+
+
+class MistralEmbeddingConfig:
+ """
+ Reference: https://docs.mistral.ai/api/#operation/createEmbedding
+ """
+
+ def __init__(
+ self,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return {
+ k: v
+ for k, v in cls.__dict__.items()
+ if not k.startswith("__")
+ and not isinstance(
+ v,
+ (
+ types.FunctionType,
+ types.BuiltinFunctionType,
+ classmethod,
+ staticmethod,
+ ),
+ )
+ and v is not None
+ }
+
+ def get_supported_openai_params(self):
+ return [
+ "encoding_format",
+ ]
+
+ def map_openai_params(self, non_default_params: dict, optional_params: dict):
+ for param, value in non_default_params.items():
+ if param == "encoding_format":
+ optional_params["encoding_format"] = value
+ return optional_params
+
+
+class OpenAIConfig(BaseConfig):
+ """
+ Reference: https://platform.openai.com/docs/api-reference/chat/create
+
+ The class `OpenAIConfig` provides configuration for the OpenAI's Chat API interface. Below are the parameters:
+
+ - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
+
+ - `function_call` (string or object): This optional parameter controls how the model calls functions.
+
+ - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
+
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+
+ - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. OpenAI has now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
+
+ - `max_completion_tokens` (integer or null): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
+
+ - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
+
+ - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
+
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+
+ - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
+
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+ """
+
+ frequency_penalty: Optional[int] = None
+ function_call: Optional[Union[str, dict]] = None
+ functions: Optional[list] = None
+ logit_bias: Optional[dict] = None
+ max_completion_tokens: Optional[int] = None
+ max_tokens: Optional[int] = None
+ n: Optional[int] = None
+ presence_penalty: Optional[int] = None
+ stop: Optional[Union[str, list]] = None
+ temperature: Optional[int] = None
+ top_p: Optional[int] = None
+ response_format: Optional[dict] = None
+
+ def __init__(
+ self,
+ frequency_penalty: Optional[int] = None,
+ function_call: Optional[Union[str, dict]] = None,
+ functions: Optional[list] = None,
+ logit_bias: Optional[dict] = None,
+ max_completion_tokens: Optional[int] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[int] = None,
+ stop: Optional[Union[str, list]] = None,
+ temperature: Optional[int] = None,
+ top_p: Optional[int] = None,
+ response_format: Optional[dict] = None,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ This function returns the list
+ of supported openai parameters for a given OpenAI Model
+
+ - If O1 model, returns O1 supported params
+ - If gpt-audio model, returns gpt-audio supported params
+ - Else, returns gpt supported params
+
+ Args:
+ model (str): OpenAI model
+
+ Returns:
+ list: List of supported openai parameters
+ """
+ if openaiOSeriesConfig.is_model_o_series_model(model=model):
+ return openaiOSeriesConfig.get_supported_openai_params(model=model)
+ elif litellm.openAIGPTAudioConfig.is_model_gpt_audio_model(model=model):
+ return litellm.openAIGPTAudioConfig.get_supported_openai_params(model=model)
+ else:
+ return litellm.openAIGPTConfig.get_supported_openai_params(model=model)
+
+ def _map_openai_params(
+ self, non_default_params: dict, optional_params: dict, model: str
+ ) -> dict:
+ supported_openai_params = self.get_supported_openai_params(model)
+ for param, value in non_default_params.items():
+ if param in supported_openai_params:
+ optional_params[param] = value
+ return optional_params
+
+ def _transform_messages(
+ self, messages: List[AllMessageValues], model: str
+ ) -> List[AllMessageValues]:
+ return messages
+
+ def map_openai_params(
+ self,
+ non_default_params: dict,
+ optional_params: dict,
+ model: str,
+ drop_params: bool,
+ ) -> dict:
+ """ """
+ if openaiOSeriesConfig.is_model_o_series_model(model=model):
+ return openaiOSeriesConfig.map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model=model,
+ drop_params=drop_params,
+ )
+ elif litellm.openAIGPTAudioConfig.is_model_gpt_audio_model(model=model):
+ return litellm.openAIGPTAudioConfig.map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model=model,
+ drop_params=drop_params,
+ )
+
+ return litellm.openAIGPTConfig.map_openai_params(
+ non_default_params=non_default_params,
+ optional_params=optional_params,
+ model=model,
+ drop_params=drop_params,
+ )
+
+ def get_error_class(
+ self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+ ) -> BaseLLMException:
+ return OpenAIError(
+ status_code=status_code,
+ message=error_message,
+ headers=headers,
+ )
+
+ def transform_request(
+ self,
+ model: str,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ litellm_params: dict,
+ headers: dict,
+ ) -> dict:
+ messages = self._transform_messages(messages=messages, model=model)
+ return {"model": model, "messages": messages, **optional_params}
+
+ def transform_response(
+ self,
+ model: str,
+ raw_response: httpx.Response,
+ model_response: ModelResponse,
+ logging_obj: LiteLLMLoggingObj,
+ request_data: dict,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ litellm_params: dict,
+ encoding: Any,
+ api_key: Optional[str] = None,
+ json_mode: Optional[bool] = None,
+ ) -> ModelResponse:
+
+ logging_obj.post_call(original_response=raw_response.text)
+ logging_obj.model_call_details["response_headers"] = raw_response.headers
+ final_response_obj = cast(
+ ModelResponse,
+ convert_to_model_response_object(
+ response_object=raw_response.json(),
+ model_response_object=model_response,
+ hidden_params={"headers": raw_response.headers},
+ _response_headers=dict(raw_response.headers),
+ ),
+ )
+
+ return final_response_obj
+
+ def validate_environment(
+ self,
+ headers: dict,
+ model: str,
+ messages: List[AllMessageValues],
+ optional_params: dict,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ ) -> dict:
+ return {
+ "Authorization": f"Bearer {api_key}",
+ **headers,
+ }
+
+ def get_model_response_iterator(
+ self,
+ streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
+ sync_stream: bool,
+ json_mode: Optional[bool] = False,
+ ) -> Any:
+ return OpenAIChatCompletionResponseIterator(
+ streaming_response=streaming_response,
+ sync_stream=sync_stream,
+ json_mode=json_mode,
+ )
+
+
+class OpenAIChatCompletionResponseIterator(BaseModelResponseIterator):
+ def chunk_parser(self, chunk: dict) -> ModelResponseStream:
+ """
+ {'choices': [{'delta': {'content': '', 'role': 'assistant'}, 'finish_reason': None, 'index': 0, 'logprobs': None}], 'created': 1735763082, 'id': 'a83a2b0fbfaf4aab9c2c93cb8ba346d7', 'model': 'mistral-large', 'object': 'chat.completion.chunk'}
+ """
+ try:
+ return ModelResponseStream(**chunk)
+ except Exception as e:
+ raise e
+
+
+class OpenAIChatCompletion(BaseLLM, BaseOpenAILLM):
+
+ def __init__(self) -> None:
+ super().__init__()
+
+ def _set_dynamic_params_on_client(
+ self,
+ client: Union[OpenAI, AsyncOpenAI],
+ organization: Optional[str] = None,
+ max_retries: Optional[int] = None,
+ ):
+ if organization is not None:
+ client.organization = organization
+ if max_retries is not None:
+ client.max_retries = max_retries
+
+ def _get_openai_client(
+ self,
+ is_async: bool,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ timeout: Union[float, httpx.Timeout] = httpx.Timeout(None),
+ max_retries: Optional[int] = DEFAULT_MAX_RETRIES,
+ organization: Optional[str] = None,
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ) -> Optional[Union[OpenAI, AsyncOpenAI]]:
+ client_initialization_params: Dict = locals()
+ if client is None:
+ if not isinstance(max_retries, int):
+ raise OpenAIError(
+ status_code=422,
+ message="max retries must be an int. Passed in value: {}".format(
+ max_retries
+ ),
+ )
+ cached_client = self.get_cached_openai_client(
+ client_initialization_params=client_initialization_params,
+ client_type="openai",
+ )
+
+ if cached_client:
+ if isinstance(cached_client, OpenAI) or isinstance(
+ cached_client, AsyncOpenAI
+ ):
+ return cached_client
+ if is_async:
+ _new_client: Union[OpenAI, AsyncOpenAI] = AsyncOpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=OpenAIChatCompletion._get_async_http_client(),
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ )
+ else:
+ _new_client = OpenAI(
+ api_key=api_key,
+ base_url=api_base,
+ http_client=OpenAIChatCompletion._get_sync_http_client(),
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ )
+
+ ## SAVE CACHE KEY
+ self.set_cached_openai_client(
+ openai_client=_new_client,
+ client_initialization_params=client_initialization_params,
+ client_type="openai",
+ )
+ return _new_client
+
+ else:
+ self._set_dynamic_params_on_client(
+ client=client,
+ organization=organization,
+ max_retries=max_retries,
+ )
+ return client
+
+ @track_llm_api_timing()
+ async def make_openai_chat_completion_request(
+ self,
+ openai_aclient: AsyncOpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ logging_obj: LiteLLMLoggingObj,
+ ) -> Tuple[dict, BaseModel]:
+ """
+ Helper to:
+ - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
+ - call chat.completions.create by default
+ """
+ start_time = time.time()
+ try:
+ raw_response = (
+ await openai_aclient.chat.completions.with_raw_response.create(
+ **data, timeout=timeout
+ )
+ )
+ end_time = time.time()
+
+ if hasattr(raw_response, "headers"):
+ headers = dict(raw_response.headers)
+ else:
+ headers = {}
+ response = raw_response.parse()
+ return headers, response
+ except openai.APITimeoutError as e:
+ end_time = time.time()
+ time_delta = round(end_time - start_time, 2)
+ e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
+ raise e
+ except Exception as e:
+ raise e
+
+ @track_llm_api_timing()
+ def make_sync_openai_chat_completion_request(
+ self,
+ openai_client: OpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ logging_obj: LiteLLMLoggingObj,
+ ) -> Tuple[dict, BaseModel]:
+ """
+ Helper to:
+ - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
+ - call chat.completions.create by default
+ """
+ raw_response = None
+ try:
+ raw_response = openai_client.chat.completions.with_raw_response.create(
+ **data, timeout=timeout
+ )
+
+ if hasattr(raw_response, "headers"):
+ headers = dict(raw_response.headers)
+ else:
+ headers = {}
+ response = raw_response.parse()
+ return headers, response
+ except Exception as e:
+ if raw_response is not None:
+ raise Exception(
+ "error - {}, Received response - {}, Type of response - {}".format(
+ e, raw_response, type(raw_response)
+ )
+ )
+ else:
+ raise e
+
+ def mock_streaming(
+ self,
+ response: ModelResponse,
+ logging_obj: LiteLLMLoggingObj,
+ model: str,
+ stream_options: Optional[dict] = None,
+ ) -> CustomStreamWrapper:
+ completion_stream = MockResponseIterator(model_response=response)
+ streaming_response = CustomStreamWrapper(
+ completion_stream=completion_stream,
+ model=model,
+ custom_llm_provider="openai",
+ logging_obj=logging_obj,
+ stream_options=stream_options,
+ )
+
+ return streaming_response
+
+ def completion( # type: ignore # noqa: PLR0915
+ self,
+ model_response: ModelResponse,
+ timeout: Union[float, httpx.Timeout],
+ optional_params: dict,
+ litellm_params: dict,
+ logging_obj: Any,
+ model: Optional[str] = None,
+ messages: Optional[list] = None,
+ print_verbose: Optional[Callable] = None,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ dynamic_params: Optional[bool] = None,
+ azure_ad_token: Optional[str] = None,
+ acompletion: bool = False,
+ logger_fn=None,
+ headers: Optional[dict] = None,
+ custom_prompt_dict: dict = {},
+ client=None,
+ organization: Optional[str] = None,
+ custom_llm_provider: Optional[str] = None,
+ drop_params: Optional[bool] = None,
+ ):
+
+ super().completion()
+ try:
+ fake_stream: bool = False
+ inference_params = optional_params.copy()
+ stream_options: Optional[dict] = inference_params.pop(
+ "stream_options", None
+ )
+ stream: Optional[bool] = inference_params.pop("stream", False)
+ provider_config: Optional[BaseConfig] = None
+
+ if custom_llm_provider is not None and model is not None:
+ provider_config = ProviderConfigManager.get_provider_chat_config(
+ model=model, provider=LlmProviders(custom_llm_provider)
+ )
+
+ if provider_config:
+ fake_stream = provider_config.should_fake_stream(
+ model=model, custom_llm_provider=custom_llm_provider, stream=stream
+ )
+
+ if headers:
+ inference_params["extra_headers"] = headers
+ if model is None or messages is None:
+ raise OpenAIError(status_code=422, message="Missing model or messages")
+
+ if not isinstance(timeout, float) and not isinstance(
+ timeout, httpx.Timeout
+ ):
+ raise OpenAIError(
+ status_code=422,
+ message="Timeout needs to be a float or httpx.Timeout",
+ )
+
+ if custom_llm_provider is not None and custom_llm_provider != "openai":
+ model_response.model = f"{custom_llm_provider}/{model}"
+
+ for _ in range(
+ 2
+ ): # if call fails due to alternating messages, retry with reformatted message
+
+ if provider_config is not None:
+ data = provider_config.transform_request(
+ model=model,
+ messages=messages,
+ optional_params=inference_params,
+ litellm_params=litellm_params,
+ headers=headers or {},
+ )
+ else:
+ data = OpenAIConfig().transform_request(
+ model=model,
+ messages=messages,
+ optional_params=inference_params,
+ litellm_params=litellm_params,
+ headers=headers or {},
+ )
+ try:
+ max_retries = data.pop("max_retries", 2)
+ if acompletion is True:
+ if stream is True and fake_stream is False:
+ return self.async_streaming(
+ logging_obj=logging_obj,
+ headers=headers,
+ data=data,
+ model=model,
+ api_base=api_base,
+ api_key=api_key,
+ api_version=api_version,
+ timeout=timeout,
+ client=client,
+ max_retries=max_retries,
+ organization=organization,
+ drop_params=drop_params,
+ stream_options=stream_options,
+ )
+ else:
+ return self.acompletion(
+ data=data,
+ headers=headers,
+ model=model,
+ logging_obj=logging_obj,
+ model_response=model_response,
+ api_base=api_base,
+ api_key=api_key,
+ api_version=api_version,
+ timeout=timeout,
+ client=client,
+ max_retries=max_retries,
+ organization=organization,
+ drop_params=drop_params,
+ fake_stream=fake_stream,
+ )
+ elif stream is True and fake_stream is False:
+ return self.streaming(
+ logging_obj=logging_obj,
+ headers=headers,
+ data=data,
+ model=model,
+ api_base=api_base,
+ api_key=api_key,
+ api_version=api_version,
+ timeout=timeout,
+ client=client,
+ max_retries=max_retries,
+ organization=organization,
+ stream_options=stream_options,
+ )
+ else:
+ if not isinstance(max_retries, int):
+ raise OpenAIError(
+ status_code=422, message="max retries must be an int"
+ )
+ openai_client: OpenAI = self._get_openai_client( # type: ignore
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ api_version=api_version,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ ## LOGGING
+ logging_obj.pre_call(
+ input=messages,
+ api_key=openai_client.api_key,
+ additional_args={
+ "headers": headers,
+ "api_base": openai_client._base_url._uri_reference,
+ "acompletion": acompletion,
+ "complete_input_dict": data,
+ },
+ )
+
+ headers, response = (
+ self.make_sync_openai_chat_completion_request(
+ openai_client=openai_client,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ )
+ )
+
+ logging_obj.model_call_details["response_headers"] = headers
+ stringified_response = response.model_dump()
+ logging_obj.post_call(
+ input=messages,
+ api_key=api_key,
+ original_response=stringified_response,
+ additional_args={"complete_input_dict": data},
+ )
+
+ final_response_obj = convert_to_model_response_object(
+ response_object=stringified_response,
+ model_response_object=model_response,
+ _response_headers=headers,
+ )
+ if fake_stream is True:
+ return self.mock_streaming(
+ response=cast(ModelResponse, final_response_obj),
+ logging_obj=logging_obj,
+ model=model,
+ stream_options=stream_options,
+ )
+
+ return final_response_obj
+ except openai.UnprocessableEntityError as e:
+ ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+ if litellm.drop_params is True or drop_params is True:
+ inference_params = drop_params_from_unprocessable_entity_error(
+ e, inference_params
+ )
+ else:
+ raise e
+ # e.message
+ except Exception as e:
+ if print_verbose is not None:
+ print_verbose(f"openai.py: Received openai error - {str(e)}")
+ if (
+ "Conversation roles must alternate user/assistant" in str(e)
+ or "user and assistant roles should be alternating" in str(e)
+ ) and messages is not None:
+ if print_verbose is not None:
+ print_verbose("openai.py: REFORMATS THE MESSAGE!")
+ # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
+ new_messages = []
+ for i in range(len(messages) - 1): # type: ignore
+ new_messages.append(messages[i])
+ if messages[i]["role"] == messages[i + 1]["role"]:
+ if messages[i]["role"] == "user":
+ new_messages.append(
+ {"role": "assistant", "content": ""}
+ )
+ else:
+ new_messages.append({"role": "user", "content": ""})
+ new_messages.append(messages[-1])
+ messages = new_messages
+ elif (
+ "Last message must have role `user`" in str(e)
+ ) and messages is not None:
+ new_messages = messages
+ new_messages.append({"role": "user", "content": ""})
+ messages = new_messages
+ elif "unknown field: parameter index is not a valid field" in str(
+ e
+ ):
+ litellm.remove_index_from_tool_calls(messages=messages)
+ else:
+ raise e
+ except OpenAIError as e:
+ raise e
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ error_body = getattr(e, "body", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code,
+ message=error_text,
+ headers=error_headers,
+ body=error_body,
+ )
+
+ async def acompletion(
+ self,
+ data: dict,
+ model: str,
+ model_response: ModelResponse,
+ logging_obj: LiteLLMLoggingObj,
+ timeout: Union[float, httpx.Timeout],
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ organization: Optional[str] = None,
+ client=None,
+ max_retries=None,
+ headers=None,
+ drop_params: Optional[bool] = None,
+ stream_options: Optional[dict] = None,
+ fake_stream: bool = False,
+ ):
+ response = None
+ for _ in range(
+ 2
+ ): # if call fails due to alternating messages, retry with reformatted message
+
+ try:
+ openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ api_version=api_version,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ ## LOGGING
+ logging_obj.pre_call(
+ input=data["messages"],
+ api_key=openai_aclient.api_key,
+ additional_args={
+ "headers": {
+ "Authorization": f"Bearer {openai_aclient.api_key}"
+ },
+ "api_base": openai_aclient._base_url._uri_reference,
+ "acompletion": True,
+ "complete_input_dict": data,
+ },
+ )
+
+ headers, response = await self.make_openai_chat_completion_request(
+ openai_aclient=openai_aclient,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ )
+ stringified_response = response.model_dump()
+
+ logging_obj.post_call(
+ input=data["messages"],
+ api_key=api_key,
+ original_response=stringified_response,
+ additional_args={"complete_input_dict": data},
+ )
+ logging_obj.model_call_details["response_headers"] = headers
+ final_response_obj = convert_to_model_response_object(
+ response_object=stringified_response,
+ model_response_object=model_response,
+ hidden_params={"headers": headers},
+ _response_headers=headers,
+ )
+
+ if fake_stream is True:
+ return self.mock_streaming(
+ response=cast(ModelResponse, final_response_obj),
+ logging_obj=logging_obj,
+ model=model,
+ stream_options=stream_options,
+ )
+
+ return final_response_obj
+ except openai.UnprocessableEntityError as e:
+ ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+ if litellm.drop_params is True or drop_params is True:
+ data = drop_params_from_unprocessable_entity_error(e, data)
+ else:
+ raise e
+ # e.message
+ except Exception as e:
+ exception_response = getattr(e, "response", None)
+ status_code = getattr(e, "status_code", 500)
+ exception_body = getattr(e, "body", None)
+ error_headers = getattr(e, "headers", None)
+ if error_headers is None and exception_response:
+ error_headers = getattr(exception_response, "headers", None)
+ message = getattr(e, "message", str(e))
+
+ raise OpenAIError(
+ status_code=status_code,
+ message=message,
+ headers=error_headers,
+ body=exception_body,
+ )
+
+ def streaming(
+ self,
+ logging_obj,
+ timeout: Union[float, httpx.Timeout],
+ data: dict,
+ model: str,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ organization: Optional[str] = None,
+ client=None,
+ max_retries=None,
+ headers=None,
+ stream_options: Optional[dict] = None,
+ ):
+ data["stream"] = True
+ data.update(
+ self.get_stream_options(stream_options=stream_options, api_base=api_base)
+ )
+
+ openai_client: OpenAI = self._get_openai_client( # type: ignore
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ api_version=api_version,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ ## LOGGING
+ logging_obj.pre_call(
+ input=data["messages"],
+ api_key=api_key,
+ additional_args={
+ "headers": {"Authorization": f"Bearer {openai_client.api_key}"},
+ "api_base": openai_client._base_url._uri_reference,
+ "acompletion": False,
+ "complete_input_dict": data,
+ },
+ )
+ headers, response = self.make_sync_openai_chat_completion_request(
+ openai_client=openai_client,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ )
+
+ logging_obj.model_call_details["response_headers"] = headers
+ streamwrapper = CustomStreamWrapper(
+ completion_stream=response,
+ model=model,
+ custom_llm_provider="openai",
+ logging_obj=logging_obj,
+ stream_options=data.get("stream_options", None),
+ _response_headers=headers,
+ )
+ return streamwrapper
+
+ async def async_streaming(
+ self,
+ timeout: Union[float, httpx.Timeout],
+ data: dict,
+ model: str,
+ logging_obj: LiteLLMLoggingObj,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ api_version: Optional[str] = None,
+ organization: Optional[str] = None,
+ client=None,
+ max_retries=None,
+ headers=None,
+ drop_params: Optional[bool] = None,
+ stream_options: Optional[dict] = None,
+ ):
+ response = None
+ data["stream"] = True
+ data.update(
+ self.get_stream_options(stream_options=stream_options, api_base=api_base)
+ )
+ for _ in range(2):
+ try:
+ openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ api_version=api_version,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ ## LOGGING
+ logging_obj.pre_call(
+ input=data["messages"],
+ api_key=api_key,
+ additional_args={
+ "headers": headers,
+ "api_base": api_base,
+ "acompletion": True,
+ "complete_input_dict": data,
+ },
+ )
+
+ headers, response = await self.make_openai_chat_completion_request(
+ openai_aclient=openai_aclient,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ )
+ logging_obj.model_call_details["response_headers"] = headers
+ streamwrapper = CustomStreamWrapper(
+ completion_stream=response,
+ model=model,
+ custom_llm_provider="openai",
+ logging_obj=logging_obj,
+ stream_options=data.get("stream_options", None),
+ _response_headers=headers,
+ )
+ return streamwrapper
+ except openai.UnprocessableEntityError as e:
+ ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+ if litellm.drop_params is True or drop_params is True:
+ data = drop_params_from_unprocessable_entity_error(e, data)
+ else:
+ raise e
+ except (
+ Exception
+ ) as e: # need to exception handle here. async exceptions don't get caught in sync functions.
+
+ if isinstance(e, OpenAIError):
+ raise e
+
+ error_headers = getattr(e, "headers", None)
+ status_code = getattr(e, "status_code", 500)
+ error_response = getattr(e, "response", None)
+ exception_body = getattr(e, "body", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ if response is not None and hasattr(response, "text"):
+ raise OpenAIError(
+ status_code=status_code,
+ message=f"{str(e)}\n\nOriginal Response: {response.text}", # type: ignore
+ headers=error_headers,
+ body=exception_body,
+ )
+ else:
+ if type(e).__name__ == "ReadTimeout":
+ raise OpenAIError(
+ status_code=408,
+ message=f"{type(e).__name__}",
+ headers=error_headers,
+ body=exception_body,
+ )
+ elif hasattr(e, "status_code"):
+ raise OpenAIError(
+ status_code=getattr(e, "status_code", 500),
+ message=str(e),
+ headers=error_headers,
+ body=exception_body,
+ )
+ else:
+ raise OpenAIError(
+ status_code=500,
+ message=f"{str(e)}",
+ headers=error_headers,
+ body=exception_body,
+ )
+
+ def get_stream_options(
+ self, stream_options: Optional[dict], api_base: Optional[str]
+ ) -> dict:
+ """
+ Pass `stream_options` to the data dict for OpenAI requests
+ """
+ if stream_options is not None:
+ return {"stream_options": stream_options}
+ else:
+ # by default litellm will include usage for openai endpoints
+ if api_base is None or urlparse(api_base).hostname == "api.openai.com":
+ return {"stream_options": {"include_usage": True}}
+ return {}
+
+ # Embedding
+ @track_llm_api_timing()
+ async def make_openai_embedding_request(
+ self,
+ openai_aclient: AsyncOpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ logging_obj: LiteLLMLoggingObj,
+ ):
+ """
+ Helper to:
+ - call embeddings.create.with_raw_response when litellm.return_response_headers is True
+ - call embeddings.create by default
+ """
+ try:
+ raw_response = await openai_aclient.embeddings.with_raw_response.create(
+ **data, timeout=timeout
+ ) # type: ignore
+ headers = dict(raw_response.headers)
+ response = raw_response.parse()
+ return headers, response
+ except Exception as e:
+ raise e
+
+ @track_llm_api_timing()
+ def make_sync_openai_embedding_request(
+ self,
+ openai_client: OpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ logging_obj: LiteLLMLoggingObj,
+ ):
+ """
+ Helper to:
+ - call embeddings.create.with_raw_response when litellm.return_response_headers is True
+ - call embeddings.create by default
+ """
+ try:
+ raw_response = openai_client.embeddings.with_raw_response.create(
+ **data, timeout=timeout
+ ) # type: ignore
+
+ headers = dict(raw_response.headers)
+ response = raw_response.parse()
+ return headers, response
+ except Exception as e:
+ raise e
+
+ async def aembedding(
+ self,
+ input: list,
+ data: dict,
+ model_response: EmbeddingResponse,
+ timeout: float,
+ logging_obj: LiteLLMLoggingObj,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ client: Optional[AsyncOpenAI] = None,
+ max_retries=None,
+ ):
+ try:
+ openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+ headers, response = await self.make_openai_embedding_request(
+ openai_aclient=openai_aclient,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ )
+ logging_obj.model_call_details["response_headers"] = headers
+ stringified_response = response.model_dump()
+ ## LOGGING
+ logging_obj.post_call(
+ input=input,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=stringified_response,
+ )
+ returned_response: EmbeddingResponse = convert_to_model_response_object(
+ response_object=stringified_response,
+ model_response_object=model_response,
+ response_type="embedding",
+ _response_headers=headers,
+ ) # type: ignore
+ return returned_response
+ except OpenAIError as e:
+ ## LOGGING
+ logging_obj.post_call(
+ input=input,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=str(e),
+ )
+ raise e
+ except Exception as e:
+ ## LOGGING
+ logging_obj.post_call(
+ input=input,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=str(e),
+ )
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ def embedding( # type: ignore
+ self,
+ model: str,
+ input: list,
+ timeout: float,
+ logging_obj,
+ model_response: EmbeddingResponse,
+ optional_params: dict,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ client=None,
+ aembedding=None,
+ max_retries: Optional[int] = None,
+ ) -> EmbeddingResponse:
+ super().embedding()
+ try:
+ model = model
+ data = {"model": model, "input": input, **optional_params}
+ max_retries = max_retries or litellm.DEFAULT_MAX_RETRIES
+ if not isinstance(max_retries, int):
+ raise OpenAIError(status_code=422, message="max retries must be an int")
+ ## LOGGING
+ logging_obj.pre_call(
+ input=input,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data, "api_base": api_base},
+ )
+
+ if aembedding is True:
+ return self.aembedding( # type: ignore
+ data=data,
+ input=input,
+ logging_obj=logging_obj,
+ model_response=model_response,
+ api_base=api_base,
+ api_key=api_key,
+ timeout=timeout,
+ client=client,
+ max_retries=max_retries,
+ )
+
+ openai_client: OpenAI = self._get_openai_client( # type: ignore
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ ## embedding CALL
+ headers: Optional[Dict] = None
+ headers, sync_embedding_response = self.make_sync_openai_embedding_request(
+ openai_client=openai_client,
+ data=data,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ ) # type: ignore
+
+ ## LOGGING
+ logging_obj.model_call_details["response_headers"] = headers
+ logging_obj.post_call(
+ input=input,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=sync_embedding_response,
+ )
+ response: EmbeddingResponse = convert_to_model_response_object(
+ response_object=sync_embedding_response.model_dump(),
+ model_response_object=model_response,
+ _response_headers=headers,
+ response_type="embedding",
+ ) # type: ignore
+ return response
+ except OpenAIError as e:
+ raise e
+ except Exception as e:
+ status_code = getattr(e, "status_code", 500)
+ error_headers = getattr(e, "headers", None)
+ error_text = getattr(e, "text", str(e))
+ error_response = getattr(e, "response", None)
+ if error_headers is None and error_response:
+ error_headers = getattr(error_response, "headers", None)
+ raise OpenAIError(
+ status_code=status_code, message=error_text, headers=error_headers
+ )
+
+ async def aimage_generation(
+ self,
+ prompt: str,
+ data: dict,
+ model_response: ModelResponse,
+ timeout: float,
+ logging_obj: Any,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ client=None,
+ max_retries=None,
+ ):
+ response = None
+ try:
+
+ openai_aclient = self._get_openai_client(
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ response = await openai_aclient.images.generate(**data, timeout=timeout) # type: ignore
+ stringified_response = response.model_dump()
+ ## LOGGING
+ logging_obj.post_call(
+ input=prompt,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=stringified_response,
+ )
+ return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="image_generation") # type: ignore
+ except Exception as e:
+ ## LOGGING
+ logging_obj.post_call(
+ input=prompt,
+ api_key=api_key,
+ original_response=str(e),
+ )
+ raise e
+
+ def image_generation(
+ self,
+ model: Optional[str],
+ prompt: str,
+ timeout: float,
+ optional_params: dict,
+ logging_obj: Any,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ model_response: Optional[ImageResponse] = None,
+ client=None,
+ aimg_generation=None,
+ ) -> ImageResponse:
+ data = {}
+ try:
+ model = model
+ data = {"model": model, "prompt": prompt, **optional_params}
+ max_retries = data.pop("max_retries", 2)
+ if not isinstance(max_retries, int):
+ raise OpenAIError(status_code=422, message="max retries must be an int")
+
+ if aimg_generation is True:
+ return self.aimage_generation(data=data, prompt=prompt, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
+
+ openai_client: OpenAI = self._get_openai_client( # type: ignore
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ ## LOGGING
+ logging_obj.pre_call(
+ input=prompt,
+ api_key=openai_client.api_key,
+ additional_args={
+ "headers": {"Authorization": f"Bearer {openai_client.api_key}"},
+ "api_base": openai_client._base_url._uri_reference,
+ "acompletion": True,
+ "complete_input_dict": data,
+ },
+ )
+
+ ## COMPLETION CALL
+ _response = openai_client.images.generate(**data, timeout=timeout) # type: ignore
+
+ response = _response.model_dump()
+ ## LOGGING
+ logging_obj.post_call(
+ input=prompt,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=response,
+ )
+ return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation") # type: ignore
+ except OpenAIError as e:
+
+ ## LOGGING
+ logging_obj.post_call(
+ input=prompt,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=str(e),
+ )
+ raise e
+ except Exception as e:
+ ## LOGGING
+ logging_obj.post_call(
+ input=prompt,
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=str(e),
+ )
+ if hasattr(e, "status_code"):
+ raise OpenAIError(
+ status_code=getattr(e, "status_code", 500), message=str(e)
+ )
+ else:
+ raise OpenAIError(status_code=500, message=str(e))
+
+ def audio_speech(
+ self,
+ model: str,
+ input: str,
+ voice: str,
+ optional_params: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ organization: Optional[str],
+ project: Optional[str],
+ max_retries: int,
+ timeout: Union[float, httpx.Timeout],
+ aspeech: Optional[bool] = None,
+ client=None,
+ ) -> HttpxBinaryResponseContent:
+
+ if aspeech is not None and aspeech is True:
+ return self.async_audio_speech(
+ model=model,
+ input=input,
+ voice=voice,
+ optional_params=optional_params,
+ api_key=api_key,
+ api_base=api_base,
+ organization=organization,
+ project=project,
+ max_retries=max_retries,
+ timeout=timeout,
+ client=client,
+ ) # type: ignore
+
+ openai_client = self._get_openai_client(
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ response = cast(OpenAI, openai_client).audio.speech.create(
+ model=model,
+ voice=voice, # type: ignore
+ input=input,
+ **optional_params,
+ )
+ return HttpxBinaryResponseContent(response=response.response)
+
+ async def async_audio_speech(
+ self,
+ model: str,
+ input: str,
+ voice: str,
+ optional_params: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ organization: Optional[str],
+ project: Optional[str],
+ max_retries: int,
+ timeout: Union[float, httpx.Timeout],
+ client=None,
+ ) -> HttpxBinaryResponseContent:
+
+ openai_client = cast(
+ AsyncOpenAI,
+ self._get_openai_client(
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ ),
+ )
+
+ response = await openai_client.audio.speech.create(
+ model=model,
+ voice=voice, # type: ignore
+ input=input,
+ **optional_params,
+ )
+
+ return HttpxBinaryResponseContent(response=response.response)
+
+
+class OpenAIFilesAPI(BaseLLM):
+ """
+ OpenAI methods to support for batches
+ - create_file()
+ - retrieve_file()
+ - list_files()
+ - delete_file()
+ - file_content()
+ - update_file()
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+
+ def get_openai_client(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ _is_async: bool = False,
+ ) -> Optional[Union[OpenAI, AsyncOpenAI]]:
+ received_args = locals()
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
+ if client is None:
+ data = {}
+ for k, v in received_args.items():
+ if k == "self" or k == "client" or k == "_is_async":
+ pass
+ elif k == "api_base" and v is not None:
+ data["base_url"] = v
+ elif v is not None:
+ data[k] = v
+ if _is_async is True:
+ openai_client = AsyncOpenAI(**data)
+ else:
+ openai_client = OpenAI(**data) # type: ignore
+ else:
+ openai_client = client
+
+ return openai_client
+
+ async def acreate_file(
+ self,
+ create_file_data: CreateFileRequest,
+ openai_client: AsyncOpenAI,
+ ) -> FileObject:
+ response = await openai_client.files.create(**create_file_data)
+ return response
+
+ def create_file(
+ self,
+ _is_async: bool,
+ create_file_data: CreateFileRequest,
+ api_base: str,
+ api_key: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ) -> Union[FileObject, Coroutine[Any, Any, FileObject]]:
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.acreate_file( # type: ignore
+ create_file_data=create_file_data, openai_client=openai_client
+ )
+ response = openai_client.files.create(**create_file_data)
+ return response
+
+ async def afile_content(
+ self,
+ file_content_request: FileContentRequest,
+ openai_client: AsyncOpenAI,
+ ) -> HttpxBinaryResponseContent:
+ response = await openai_client.files.content(**file_content_request)
+ return HttpxBinaryResponseContent(response=response.response)
+
+ def file_content(
+ self,
+ _is_async: bool,
+ file_content_request: FileContentRequest,
+ api_base: str,
+ api_key: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ) -> Union[
+ HttpxBinaryResponseContent, Coroutine[Any, Any, HttpxBinaryResponseContent]
+ ]:
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.afile_content( # type: ignore
+ file_content_request=file_content_request,
+ openai_client=openai_client,
+ )
+ response = cast(OpenAI, openai_client).files.content(**file_content_request)
+
+ return HttpxBinaryResponseContent(response=response.response)
+
+ async def aretrieve_file(
+ self,
+ file_id: str,
+ openai_client: AsyncOpenAI,
+ ) -> FileObject:
+ response = await openai_client.files.retrieve(file_id=file_id)
+ return response
+
+ def retrieve_file(
+ self,
+ _is_async: bool,
+ file_id: str,
+ api_base: str,
+ api_key: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.aretrieve_file( # type: ignore
+ file_id=file_id,
+ openai_client=openai_client,
+ )
+ response = openai_client.files.retrieve(file_id=file_id)
+
+ return response
+
+ async def adelete_file(
+ self,
+ file_id: str,
+ openai_client: AsyncOpenAI,
+ ) -> FileDeleted:
+ response = await openai_client.files.delete(file_id=file_id)
+ return response
+
+ def delete_file(
+ self,
+ _is_async: bool,
+ file_id: str,
+ api_base: str,
+ api_key: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.adelete_file( # type: ignore
+ file_id=file_id,
+ openai_client=openai_client,
+ )
+ response = openai_client.files.delete(file_id=file_id)
+
+ return response
+
+ async def alist_files(
+ self,
+ openai_client: AsyncOpenAI,
+ purpose: Optional[str] = None,
+ ):
+ if isinstance(purpose, str):
+ response = await openai_client.files.list(purpose=purpose)
+ else:
+ response = await openai_client.files.list()
+ return response
+
+ def list_files(
+ self,
+ _is_async: bool,
+ api_base: str,
+ api_key: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ purpose: Optional[str] = None,
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.alist_files( # type: ignore
+ purpose=purpose,
+ openai_client=openai_client,
+ )
+
+ if isinstance(purpose, str):
+ response = openai_client.files.list(purpose=purpose)
+ else:
+ response = openai_client.files.list()
+
+ return response
+
+
+class OpenAIBatchesAPI(BaseLLM):
+ """
+ OpenAI methods to support for batches
+ - create_batch()
+ - retrieve_batch()
+ - cancel_batch()
+ - list_batch()
+ """
+
+ def __init__(self) -> None:
+ super().__init__()
+
+ def get_openai_client(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ _is_async: bool = False,
+ ) -> Optional[Union[OpenAI, AsyncOpenAI]]:
+ received_args = locals()
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = None
+ if client is None:
+ data = {}
+ for k, v in received_args.items():
+ if k == "self" or k == "client" or k == "_is_async":
+ pass
+ elif k == "api_base" and v is not None:
+ data["base_url"] = v
+ elif v is not None:
+ data[k] = v
+ if _is_async is True:
+ openai_client = AsyncOpenAI(**data)
+ else:
+ openai_client = OpenAI(**data) # type: ignore
+ else:
+ openai_client = client
+
+ return openai_client
+
+ async def acreate_batch(
+ self,
+ create_batch_data: CreateBatchRequest,
+ openai_client: AsyncOpenAI,
+ ) -> LiteLLMBatch:
+ response = await openai_client.batches.create(**create_batch_data)
+ return LiteLLMBatch(**response.model_dump())
+
+ def create_batch(
+ self,
+ _is_async: bool,
+ create_batch_data: CreateBatchRequest,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
+ ) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.acreate_batch( # type: ignore
+ create_batch_data=create_batch_data, openai_client=openai_client
+ )
+ response = cast(OpenAI, openai_client).batches.create(**create_batch_data)
+
+ return LiteLLMBatch(**response.model_dump())
+
+ async def aretrieve_batch(
+ self,
+ retrieve_batch_data: RetrieveBatchRequest,
+ openai_client: AsyncOpenAI,
+ ) -> LiteLLMBatch:
+ verbose_logger.debug("retrieving batch, args= %s", retrieve_batch_data)
+ response = await openai_client.batches.retrieve(**retrieve_batch_data)
+ return LiteLLMBatch(**response.model_dump())
+
+ def retrieve_batch(
+ self,
+ _is_async: bool,
+ retrieve_batch_data: RetrieveBatchRequest,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.aretrieve_batch( # type: ignore
+ retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
+ )
+ response = cast(OpenAI, openai_client).batches.retrieve(**retrieve_batch_data)
+ return LiteLLMBatch(**response.model_dump())
+
+ async def acancel_batch(
+ self,
+ cancel_batch_data: CancelBatchRequest,
+ openai_client: AsyncOpenAI,
+ ) -> Batch:
+ verbose_logger.debug("async cancelling batch, args= %s", cancel_batch_data)
+ response = await openai_client.batches.cancel(**cancel_batch_data)
+ return response
+
+ def cancel_batch(
+ self,
+ _is_async: bool,
+ cancel_batch_data: CancelBatchRequest,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.acancel_batch( # type: ignore
+ cancel_batch_data=cancel_batch_data, openai_client=openai_client
+ )
+
+ response = openai_client.batches.cancel(**cancel_batch_data)
+ return response
+
+ async def alist_batches(
+ self,
+ openai_client: AsyncOpenAI,
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ ):
+ verbose_logger.debug("listing batches, after= %s, limit= %s", after, limit)
+ response = await openai_client.batches.list(after=after, limit=limit) # type: ignore
+ return response
+
+ def list_batches(
+ self,
+ _is_async: bool,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ after: Optional[str] = None,
+ limit: Optional[int] = None,
+ client: Optional[OpenAI] = None,
+ ):
+ openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ _is_async=_is_async,
+ )
+ if openai_client is None:
+ raise ValueError(
+ "OpenAI client is not initialized. Make sure api_key is passed or OPENAI_API_KEY is set in the environment."
+ )
+
+ if _is_async is True:
+ if not isinstance(openai_client, AsyncOpenAI):
+ raise ValueError(
+ "OpenAI client is not an instance of AsyncOpenAI. Make sure you passed an AsyncOpenAI client."
+ )
+ return self.alist_batches( # type: ignore
+ openai_client=openai_client, after=after, limit=limit
+ )
+ response = openai_client.batches.list(after=after, limit=limit) # type: ignore
+ return response
+
+
+class OpenAIAssistantsAPI(BaseLLM):
+ def __init__(self) -> None:
+ super().__init__()
+
+ def get_openai_client(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI] = None,
+ ) -> OpenAI:
+ received_args = locals()
+ if client is None:
+ data = {}
+ for k, v in received_args.items():
+ if k == "self" or k == "client":
+ pass
+ elif k == "api_base" and v is not None:
+ data["base_url"] = v
+ elif v is not None:
+ data[k] = v
+ openai_client = OpenAI(**data) # type: ignore
+ else:
+ openai_client = client
+
+ return openai_client
+
+ def async_get_openai_client(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI] = None,
+ ) -> AsyncOpenAI:
+ received_args = locals()
+ if client is None:
+ data = {}
+ for k, v in received_args.items():
+ if k == "self" or k == "client":
+ pass
+ elif k == "api_base" and v is not None:
+ data["base_url"] = v
+ elif v is not None:
+ data[k] = v
+ openai_client = AsyncOpenAI(**data) # type: ignore
+ else:
+ openai_client = client
+
+ return openai_client
+
+ ### ASSISTANTS ###
+
+ async def async_get_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ order: Optional[str] = "desc",
+ limit: Optional[int] = 20,
+ before: Optional[str] = None,
+ after: Optional[str] = None,
+ ) -> AsyncCursorPage[Assistant]:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ request_params = {
+ "order": order,
+ "limit": limit,
+ }
+ if before:
+ request_params["before"] = before
+ if after:
+ request_params["after"] = after
+
+ response = await openai_client.beta.assistants.list(**request_params) # type: ignore
+
+ return response
+
+ # fmt: off
+
+ @overload
+ def get_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ aget_assistants: Literal[True],
+ ) -> Coroutine[None, None, AsyncCursorPage[Assistant]]:
+ ...
+
+ @overload
+ def get_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI],
+ aget_assistants: Optional[Literal[False]],
+ ) -> SyncCursorPage[Assistant]:
+ ...
+
+ # fmt: on
+
+ def get_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client=None,
+ aget_assistants=None,
+ order: Optional[str] = "desc",
+ limit: Optional[int] = 20,
+ before: Optional[str] = None,
+ after: Optional[str] = None,
+ ):
+ if aget_assistants is not None and aget_assistants is True:
+ return self.async_get_assistants(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ request_params = {
+ "order": order,
+ "limit": limit,
+ }
+
+ if before:
+ request_params["before"] = before
+ if after:
+ request_params["after"] = after
+
+ response = openai_client.beta.assistants.list(**request_params) # type: ignore
+
+ return response
+
+ # Create Assistant
+ async def async_create_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ create_assistant_data: dict,
+ ) -> Assistant:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = await openai_client.beta.assistants.create(**create_assistant_data)
+
+ return response
+
+ def create_assistants(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ create_assistant_data: dict,
+ client=None,
+ async_create_assistants=None,
+ ):
+ if async_create_assistants is not None and async_create_assistants is True:
+ return self.async_create_assistants(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ create_assistant_data=create_assistant_data,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = openai_client.beta.assistants.create(**create_assistant_data)
+ return response
+
+ # Delete Assistant
+ async def async_delete_assistant(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ assistant_id: str,
+ ) -> AssistantDeleted:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = await openai_client.beta.assistants.delete(assistant_id=assistant_id)
+
+ return response
+
+ def delete_assistant(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ assistant_id: str,
+ client=None,
+ async_delete_assistants=None,
+ ):
+ if async_delete_assistants is not None and async_delete_assistants is True:
+ return self.async_delete_assistant(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ assistant_id=assistant_id,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = openai_client.beta.assistants.delete(assistant_id=assistant_id)
+ return response
+
+ ### MESSAGES ###
+
+ async def a_add_message(
+ self,
+ thread_id: str,
+ message_data: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI] = None,
+ ) -> OpenAIMessage:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ thread_message: OpenAIMessage = await openai_client.beta.threads.messages.create( # type: ignore
+ thread_id, **message_data # type: ignore
+ )
+
+ response_obj: Optional[OpenAIMessage] = None
+ if getattr(thread_message, "status", None) is None:
+ thread_message.status = "completed"
+ response_obj = OpenAIMessage(**thread_message.dict())
+ else:
+ response_obj = OpenAIMessage(**thread_message.dict())
+ return response_obj
+
+ # fmt: off
+
+ @overload
+ def add_message(
+ self,
+ thread_id: str,
+ message_data: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ a_add_message: Literal[True],
+ ) -> Coroutine[None, None, OpenAIMessage]:
+ ...
+
+ @overload
+ def add_message(
+ self,
+ thread_id: str,
+ message_data: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI],
+ a_add_message: Optional[Literal[False]],
+ ) -> OpenAIMessage:
+ ...
+
+ # fmt: on
+
+ def add_message(
+ self,
+ thread_id: str,
+ message_data: dict,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client=None,
+ a_add_message: Optional[bool] = None,
+ ):
+ if a_add_message is not None and a_add_message is True:
+ return self.a_add_message(
+ thread_id=thread_id,
+ message_data=message_data,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ thread_message: OpenAIMessage = openai_client.beta.threads.messages.create( # type: ignore
+ thread_id, **message_data # type: ignore
+ )
+
+ response_obj: Optional[OpenAIMessage] = None
+ if getattr(thread_message, "status", None) is None:
+ thread_message.status = "completed"
+ response_obj = OpenAIMessage(**thread_message.dict())
+ else:
+ response_obj = OpenAIMessage(**thread_message.dict())
+ return response_obj
+
+ async def async_get_messages(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI] = None,
+ ) -> AsyncCursorPage[OpenAIMessage]:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = await openai_client.beta.threads.messages.list(thread_id=thread_id)
+
+ return response
+
+ # fmt: off
+
+ @overload
+ def get_messages(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ aget_messages: Literal[True],
+ ) -> Coroutine[None, None, AsyncCursorPage[OpenAIMessage]]:
+ ...
+
+ @overload
+ def get_messages(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI],
+ aget_messages: Optional[Literal[False]],
+ ) -> SyncCursorPage[OpenAIMessage]:
+ ...
+
+ # fmt: on
+
+ def get_messages(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client=None,
+ aget_messages=None,
+ ):
+ if aget_messages is not None and aget_messages is True:
+ return self.async_get_messages(
+ thread_id=thread_id,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = openai_client.beta.threads.messages.list(thread_id=thread_id)
+
+ return response
+
+ ### THREADS ###
+
+ async def async_create_thread(
+ self,
+ metadata: Optional[dict],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
+ ) -> Thread:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ data = {}
+ if messages is not None:
+ data["messages"] = messages # type: ignore
+ if metadata is not None:
+ data["metadata"] = metadata # type: ignore
+
+ message_thread = await openai_client.beta.threads.create(**data) # type: ignore
+
+ return Thread(**message_thread.dict())
+
+ # fmt: off
+
+ @overload
+ def create_thread(
+ self,
+ metadata: Optional[dict],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
+ client: Optional[AsyncOpenAI],
+ acreate_thread: Literal[True],
+ ) -> Coroutine[None, None, Thread]:
+ ...
+
+ @overload
+ def create_thread(
+ self,
+ metadata: Optional[dict],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
+ client: Optional[OpenAI],
+ acreate_thread: Optional[Literal[False]],
+ ) -> Thread:
+ ...
+
+ # fmt: on
+
+ def create_thread(
+ self,
+ metadata: Optional[dict],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ messages: Optional[Iterable[OpenAICreateThreadParamsMessage]],
+ client=None,
+ acreate_thread=None,
+ ):
+ """
+ Here's an example:
+ ```
+ from litellm.llms.openai.openai import OpenAIAssistantsAPI, MessageData
+
+ # create thread
+ message: MessageData = {"role": "user", "content": "Hey, how's it going?"}
+ openai_api.create_thread(messages=[message])
+ ```
+ """
+ if acreate_thread is not None and acreate_thread is True:
+ return self.async_create_thread(
+ metadata=metadata,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ messages=messages,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ data = {}
+ if messages is not None:
+ data["messages"] = messages # type: ignore
+ if metadata is not None:
+ data["metadata"] = metadata # type: ignore
+
+ message_thread = openai_client.beta.threads.create(**data) # type: ignore
+
+ return Thread(**message_thread.dict())
+
+ async def async_get_thread(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ ) -> Thread:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = await openai_client.beta.threads.retrieve(thread_id=thread_id)
+
+ return Thread(**response.dict())
+
+ # fmt: off
+
+ @overload
+ def get_thread(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ aget_thread: Literal[True],
+ ) -> Coroutine[None, None, Thread]:
+ ...
+
+ @overload
+ def get_thread(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[OpenAI],
+ aget_thread: Optional[Literal[False]],
+ ) -> Thread:
+ ...
+
+ # fmt: on
+
+ def get_thread(
+ self,
+ thread_id: str,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client=None,
+ aget_thread=None,
+ ):
+ if aget_thread is not None and aget_thread is True:
+ return self.async_get_thread(
+ thread_id=thread_id,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = openai_client.beta.threads.retrieve(thread_id=thread_id)
+
+ return Thread(**response.dict())
+
+ def delete_thread(self):
+ pass
+
+ ### RUNS ###
+
+ async def arun_thread(
+ self,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ stream: Optional[bool],
+ tools: Optional[Iterable[AssistantToolParam]],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client: Optional[AsyncOpenAI],
+ ) -> Run:
+ openai_client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ response = await openai_client.beta.threads.runs.create_and_poll( # type: ignore
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ additional_instructions=additional_instructions,
+ instructions=instructions,
+ metadata=metadata,
+ model=model,
+ tools=tools,
+ )
+
+ return response
+
+ def async_run_thread_stream(
+ self,
+ client: AsyncOpenAI,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ tools: Optional[Iterable[AssistantToolParam]],
+ event_handler: Optional[AssistantEventHandler],
+ ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
+ data: Dict[str, Any] = {
+ "thread_id": thread_id,
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "instructions": instructions,
+ "metadata": metadata,
+ "model": model,
+ "tools": tools,
+ }
+ if event_handler is not None:
+ data["event_handler"] = event_handler
+ return client.beta.threads.runs.stream(**data) # type: ignore
+
+ def run_thread_stream(
+ self,
+ client: OpenAI,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ tools: Optional[Iterable[AssistantToolParam]],
+ event_handler: Optional[AssistantEventHandler],
+ ) -> AssistantStreamManager[AssistantEventHandler]:
+ data: Dict[str, Any] = {
+ "thread_id": thread_id,
+ "assistant_id": assistant_id,
+ "additional_instructions": additional_instructions,
+ "instructions": instructions,
+ "metadata": metadata,
+ "model": model,
+ "tools": tools,
+ }
+ if event_handler is not None:
+ data["event_handler"] = event_handler
+ return client.beta.threads.runs.stream(**data) # type: ignore
+
+ # fmt: off
+
+ @overload
+ def run_thread(
+ self,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ stream: Optional[bool],
+ tools: Optional[Iterable[AssistantToolParam]],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client,
+ arun_thread: Literal[True],
+ event_handler: Optional[AssistantEventHandler],
+ ) -> Coroutine[None, None, Run]:
+ ...
+
+ @overload
+ def run_thread(
+ self,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ stream: Optional[bool],
+ tools: Optional[Iterable[AssistantToolParam]],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client,
+ arun_thread: Optional[Literal[False]],
+ event_handler: Optional[AssistantEventHandler],
+ ) -> Run:
+ ...
+
+ # fmt: on
+
+ def run_thread(
+ self,
+ thread_id: str,
+ assistant_id: str,
+ additional_instructions: Optional[str],
+ instructions: Optional[str],
+ metadata: Optional[Dict],
+ model: Optional[str],
+ stream: Optional[bool],
+ tools: Optional[Iterable[AssistantToolParam]],
+ api_key: Optional[str],
+ api_base: Optional[str],
+ timeout: Union[float, httpx.Timeout],
+ max_retries: Optional[int],
+ organization: Optional[str],
+ client=None,
+ arun_thread=None,
+ event_handler: Optional[AssistantEventHandler] = None,
+ ):
+ if arun_thread is not None and arun_thread is True:
+ if stream is not None and stream is True:
+ _client = self.async_get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ return self.async_run_thread_stream(
+ client=_client,
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ additional_instructions=additional_instructions,
+ instructions=instructions,
+ metadata=metadata,
+ model=model,
+ tools=tools,
+ event_handler=event_handler,
+ )
+ return self.arun_thread(
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ additional_instructions=additional_instructions,
+ instructions=instructions,
+ metadata=metadata,
+ model=model,
+ stream=stream,
+ tools=tools,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+ openai_client = self.get_openai_client(
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ organization=organization,
+ client=client,
+ )
+
+ if stream is not None and stream is True:
+ return self.run_thread_stream(
+ client=openai_client,
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ additional_instructions=additional_instructions,
+ instructions=instructions,
+ metadata=metadata,
+ model=model,
+ tools=tools,
+ event_handler=event_handler,
+ )
+
+ response = openai_client.beta.threads.runs.create_and_poll( # type: ignore
+ thread_id=thread_id,
+ assistant_id=assistant_id,
+ additional_instructions=additional_instructions,
+ instructions=instructions,
+ metadata=metadata,
+ model=model,
+ tools=tools,
+ )
+
+ return response
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/realtime/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/realtime/handler.py
new file mode 100644
index 00000000..83398ad1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/realtime/handler.py
@@ -0,0 +1,73 @@
+"""
+This file contains the calling Azure OpenAI's `/openai/realtime` endpoint.
+
+This requires websockets, and is currently only supported on LiteLLM Proxy.
+"""
+
+from typing import Any, Optional
+
+from ....litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
+from ....litellm_core_utils.realtime_streaming import RealTimeStreaming
+from ..openai import OpenAIChatCompletion
+
+
+class OpenAIRealtime(OpenAIChatCompletion):
+ def _construct_url(self, api_base: str, model: str) -> str:
+ """
+ Example output:
+ "BACKEND_WS_URL = "wss://localhost:8080/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"";
+ """
+ api_base = api_base.replace("https://", "wss://")
+ api_base = api_base.replace("http://", "ws://")
+ return f"{api_base}/v1/realtime?model={model}"
+
+ async def async_realtime(
+ self,
+ model: str,
+ websocket: Any,
+ logging_obj: LiteLLMLogging,
+ api_base: Optional[str] = None,
+ api_key: Optional[str] = None,
+ client: Optional[Any] = None,
+ timeout: Optional[float] = None,
+ ):
+ import websockets
+
+ if api_base is None:
+ raise ValueError("api_base is required for Azure OpenAI calls")
+ if api_key is None:
+ raise ValueError("api_key is required for Azure OpenAI calls")
+
+ url = self._construct_url(api_base, model)
+
+ try:
+ async with websockets.connect( # type: ignore
+ url,
+ extra_headers={
+ "Authorization": f"Bearer {api_key}", # type: ignore
+ "OpenAI-Beta": "realtime=v1",
+ },
+ ) as backend_ws:
+ realtime_streaming = RealTimeStreaming(
+ websocket, backend_ws, logging_obj
+ )
+ await realtime_streaming.bidirectional_forward()
+
+ except websockets.exceptions.InvalidStatusCode as e: # type: ignore
+ await websocket.close(code=e.status_code, reason=str(e))
+ except Exception as e:
+ try:
+ await websocket.close(
+ code=1011, reason=f"Internal server error: {str(e)}"
+ )
+ except RuntimeError as close_error:
+ if "already completed" in str(close_error) or "websocket.close" in str(
+ close_error
+ ):
+ # The WebSocket is already closed or the response is completed, so we can ignore this error
+ pass
+ else:
+ # If it's a different RuntimeError, we might want to log it or handle it differently
+ raise Exception(
+ f"Unexpected error while closing WebSocket: {close_error}"
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/responses/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/responses/transformation.py
new file mode 100644
index 00000000..e062c0c9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/responses/transformation.py
@@ -0,0 +1,216 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast
+
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import *
+from litellm.types.router import GenericLiteLLMParams
+
+from ..common_utils import OpenAIError
+
+if TYPE_CHECKING:
+ from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+ LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+ LiteLLMLoggingObj = Any
+
+
+class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
+ def get_supported_openai_params(self, model: str) -> list:
+ """
+ All OpenAI Responses API params are supported
+ """
+ return [
+ "input",
+ "model",
+ "include",
+ "instructions",
+ "max_output_tokens",
+ "metadata",
+ "parallel_tool_calls",
+ "previous_response_id",
+ "reasoning",
+ "store",
+ "stream",
+ "temperature",
+ "text",
+ "tool_choice",
+ "tools",
+ "top_p",
+ "truncation",
+ "user",
+ "extra_headers",
+ "extra_query",
+ "extra_body",
+ "timeout",
+ ]
+
+ def map_openai_params(
+ self,
+ response_api_optional_params: ResponsesAPIOptionalRequestParams,
+ model: str,
+ drop_params: bool,
+ ) -> Dict:
+ """No mapping applied since inputs are in OpenAI spec already"""
+ return dict(response_api_optional_params)
+
+ def transform_responses_api_request(
+ self,
+ model: str,
+ input: Union[str, ResponseInputParam],
+ response_api_optional_request_params: Dict,
+ litellm_params: GenericLiteLLMParams,
+ headers: dict,
+ ) -> Dict:
+ """No transform applied since inputs are in OpenAI spec already"""
+ return dict(
+ ResponsesAPIRequestParams(
+ model=model, input=input, **response_api_optional_request_params
+ )
+ )
+
+ def transform_response_api_response(
+ self,
+ model: str,
+ raw_response: httpx.Response,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> ResponsesAPIResponse:
+ """No transform applied since outputs are in OpenAI spec already"""
+ try:
+ raw_response_json = raw_response.json()
+ except Exception:
+ raise OpenAIError(
+ message=raw_response.text, status_code=raw_response.status_code
+ )
+ return ResponsesAPIResponse(**raw_response_json)
+
+ def validate_environment(
+ self,
+ headers: dict,
+ model: str,
+ api_key: Optional[str] = None,
+ ) -> dict:
+ api_key = (
+ api_key
+ or litellm.api_key
+ or litellm.openai_key
+ or get_secret_str("OPENAI_API_KEY")
+ )
+ headers.update(
+ {
+ "Authorization": f"Bearer {api_key}",
+ }
+ )
+ return headers
+
+ def get_complete_url(
+ self,
+ api_base: Optional[str],
+ model: str,
+ stream: Optional[bool] = None,
+ ) -> str:
+ """
+ Get the endpoint for OpenAI responses API
+ """
+ api_base = (
+ api_base
+ or litellm.api_base
+ or get_secret_str("OPENAI_API_BASE")
+ or "https://api.openai.com/v1"
+ )
+
+ # Remove trailing slashes
+ api_base = api_base.rstrip("/")
+
+ return f"{api_base}/responses"
+
+ def transform_streaming_response(
+ self,
+ model: str,
+ parsed_chunk: dict,
+ logging_obj: LiteLLMLoggingObj,
+ ) -> ResponsesAPIStreamingResponse:
+ """
+ Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
+ """
+ # Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
+ verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
+ event_type = str(parsed_chunk.get("type"))
+ event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
+ event_type=event_type
+ )
+ return event_pydantic_model(**parsed_chunk)
+
+ @staticmethod
+ def get_event_model_class(event_type: str) -> Any:
+ """
+ Returns the appropriate event model class based on the event type.
+
+ Args:
+ event_type (str): The type of event from the response chunk
+
+ Returns:
+ Any: The corresponding event model class
+
+ Raises:
+ ValueError: If the event type is unknown
+ """
+ event_models = {
+ ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
+ ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
+ ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
+ ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
+ ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
+ ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
+ ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
+ ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
+ ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
+ ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
+ ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
+ ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
+ ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
+ ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
+ ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
+ ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
+ ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
+ ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
+ ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
+ ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
+ ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
+ ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
+ ResponsesAPIStreamEvents.ERROR: ErrorEvent,
+ }
+
+ model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
+ if not model_class:
+ raise ValueError(f"Unknown event type: {event_type}")
+
+ return model_class
+
+ def should_fake_stream(
+ self,
+ model: Optional[str],
+ stream: Optional[bool],
+ custom_llm_provider: Optional[str] = None,
+ ) -> bool:
+ if stream is not True:
+ return False
+ if model is not None:
+ try:
+ if (
+ litellm.utils.supports_native_streaming(
+ model=model,
+ custom_llm_provider=custom_llm_provider,
+ )
+ is False
+ ):
+ return True
+ except Exception as e:
+ verbose_logger.debug(
+ f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
+ )
+ return False
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/transcriptions/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/transcriptions/handler.py
new file mode 100644
index 00000000..d9dd3c12
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/transcriptions/handler.py
@@ -0,0 +1,210 @@
+from typing import Optional, Union
+
+import httpx
+from openai import AsyncOpenAI, OpenAI
+from pydantic import BaseModel
+
+import litellm
+from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.types.utils import FileTypes
+from litellm.utils import (
+ TranscriptionResponse,
+ convert_to_model_response_object,
+ extract_duration_from_srt_or_vtt,
+)
+
+from ..openai import OpenAIChatCompletion
+
+
+class OpenAIAudioTranscription(OpenAIChatCompletion):
+ # Audio Transcriptions
+ async def make_openai_audio_transcriptions_request(
+ self,
+ openai_aclient: AsyncOpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ ):
+ """
+ Helper to:
+ - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+ - call openai_aclient.audio.transcriptions.create by default
+ """
+ try:
+ raw_response = (
+ await openai_aclient.audio.transcriptions.with_raw_response.create(
+ **data, timeout=timeout
+ )
+ ) # type: ignore
+ headers = dict(raw_response.headers)
+ response = raw_response.parse()
+
+ return headers, response
+ except Exception as e:
+ raise e
+
+ def make_sync_openai_audio_transcriptions_request(
+ self,
+ openai_client: OpenAI,
+ data: dict,
+ timeout: Union[float, httpx.Timeout],
+ ):
+ """
+ Helper to:
+ - call openai_aclient.audio.transcriptions.with_raw_response when litellm.return_response_headers is True
+ - call openai_aclient.audio.transcriptions.create by default
+ """
+ try:
+ if litellm.return_response_headers is True:
+ raw_response = (
+ openai_client.audio.transcriptions.with_raw_response.create(
+ **data, timeout=timeout
+ )
+ ) # type: ignore
+ headers = dict(raw_response.headers)
+ response = raw_response.parse()
+ return headers, response
+ else:
+ response = openai_client.audio.transcriptions.create(**data, timeout=timeout) # type: ignore
+ return None, response
+ except Exception as e:
+ raise e
+
+ def audio_transcriptions(
+ self,
+ model: str,
+ audio_file: FileTypes,
+ optional_params: dict,
+ model_response: TranscriptionResponse,
+ timeout: float,
+ max_retries: int,
+ logging_obj: LiteLLMLoggingObj,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ client=None,
+ atranscription: bool = False,
+ ) -> TranscriptionResponse:
+ data = {"model": model, "file": audio_file, **optional_params}
+
+ if "response_format" not in data or (
+ data["response_format"] == "text" or data["response_format"] == "json"
+ ):
+ data["response_format"] = (
+ "verbose_json" # ensures 'duration' is received - used for cost calculation
+ )
+
+ if atranscription is True:
+ return self.async_audio_transcriptions( # type: ignore
+ audio_file=audio_file,
+ data=data,
+ model_response=model_response,
+ timeout=timeout,
+ api_key=api_key,
+ api_base=api_base,
+ client=client,
+ max_retries=max_retries,
+ logging_obj=logging_obj,
+ )
+
+ openai_client: OpenAI = self._get_openai_client( # type: ignore
+ is_async=False,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ ## LOGGING
+ logging_obj.pre_call(
+ input=None,
+ api_key=openai_client.api_key,
+ additional_args={
+ "api_base": openai_client._base_url._uri_reference,
+ "atranscription": True,
+ "complete_input_dict": data,
+ },
+ )
+ _, response = self.make_sync_openai_audio_transcriptions_request(
+ openai_client=openai_client,
+ data=data,
+ timeout=timeout,
+ )
+
+ if isinstance(response, BaseModel):
+ stringified_response = response.model_dump()
+ else:
+ stringified_response = TranscriptionResponse(text=response).model_dump()
+
+ ## LOGGING
+ logging_obj.post_call(
+ input=get_audio_file_name(audio_file),
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=stringified_response,
+ )
+ hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+ final_response: TranscriptionResponse = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
+ return final_response
+
+ async def async_audio_transcriptions(
+ self,
+ audio_file: FileTypes,
+ data: dict,
+ model_response: TranscriptionResponse,
+ timeout: float,
+ logging_obj: LiteLLMLoggingObj,
+ api_key: Optional[str] = None,
+ api_base: Optional[str] = None,
+ client=None,
+ max_retries=None,
+ ):
+ try:
+ openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore
+ is_async=True,
+ api_key=api_key,
+ api_base=api_base,
+ timeout=timeout,
+ max_retries=max_retries,
+ client=client,
+ )
+
+ ## LOGGING
+ logging_obj.pre_call(
+ input=None,
+ api_key=openai_aclient.api_key,
+ additional_args={
+ "api_base": openai_aclient._base_url._uri_reference,
+ "atranscription": True,
+ "complete_input_dict": data,
+ },
+ )
+ headers, response = await self.make_openai_audio_transcriptions_request(
+ openai_aclient=openai_aclient,
+ data=data,
+ timeout=timeout,
+ )
+ logging_obj.model_call_details["response_headers"] = headers
+ if isinstance(response, BaseModel):
+ stringified_response = response.model_dump()
+ else:
+ duration = extract_duration_from_srt_or_vtt(response)
+ stringified_response = TranscriptionResponse(text=response).model_dump()
+ stringified_response["duration"] = duration
+ ## LOGGING
+ logging_obj.post_call(
+ input=get_audio_file_name(audio_file),
+ api_key=api_key,
+ additional_args={"complete_input_dict": data},
+ original_response=stringified_response,
+ )
+ hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+ return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription") # type: ignore
+ except Exception as e:
+ ## LOGGING
+ logging_obj.post_call(
+ input=input,
+ api_key=api_key,
+ original_response=str(e),
+ )
+ raise e