diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py new file mode 100644 index 00000000..0c26fd74 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py @@ -0,0 +1,120 @@ +""" +Helper util for handling openai-specific cost calculation +- e.g.: prompt caching +""" + +from typing import Literal, Optional, Tuple + +from litellm._logging import verbose_logger +from litellm.types.utils import CallTypes, Usage +from litellm.utils import get_model_info + + +def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_second"]: + if call_type == CallTypes.atranscription or call_type == CallTypes.transcription: + return "cost_per_second" + else: + return "cost_per_token" + + +def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]: + """ + Calculates the cost per token for a given model, prompt tokens, and completion tokens. + + Input: + - model: str, the model name without provider prefix + - usage: LiteLLM Usage block, containing anthropic caching information + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + ## GET MODEL INFO + model_info = get_model_info(model=model, custom_llm_provider="openai") + ## CALCULATE INPUT COST + ### Non-cached text tokens + non_cached_text_tokens = usage.prompt_tokens + cached_tokens: Optional[int] = None + if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens: + cached_tokens = usage.prompt_tokens_details.cached_tokens + non_cached_text_tokens = non_cached_text_tokens - cached_tokens + prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"] + ## Prompt Caching cost calculation + if model_info.get("cache_read_input_token_cost") is not None and cached_tokens: + # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens + prompt_cost += cached_tokens * ( + model_info.get("cache_read_input_token_cost", 0) or 0 + ) + + _audio_tokens: Optional[int] = ( + usage.prompt_tokens_details.audio_tokens + if usage.prompt_tokens_details is not None + else None + ) + _audio_cost_per_token: Optional[float] = model_info.get( + "input_cost_per_audio_token" + ) + if _audio_tokens is not None and _audio_cost_per_token is not None: + audio_cost: float = _audio_tokens * _audio_cost_per_token + prompt_cost += audio_cost + + ## CALCULATE OUTPUT COST + completion_cost: float = ( + usage["completion_tokens"] * model_info["output_cost_per_token"] + ) + _output_cost_per_audio_token: Optional[float] = model_info.get( + "output_cost_per_audio_token" + ) + _output_audio_tokens: Optional[int] = ( + usage.completion_tokens_details.audio_tokens + if usage.completion_tokens_details is not None + else None + ) + if _output_cost_per_audio_token is not None and _output_audio_tokens is not None: + audio_cost = _output_audio_tokens * _output_cost_per_audio_token + completion_cost += audio_cost + + return prompt_cost, completion_cost + + +def cost_per_second( + model: str, custom_llm_provider: Optional[str], duration: float = 0.0 +) -> Tuple[float, float]: + """ + Calculates the cost per second for a given model, prompt tokens, and completion tokens. + + Input: + - model: str, the model name without provider prefix + - custom_llm_provider: str, the custom llm provider + - duration: float, the duration of the response in seconds + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + ## GET MODEL INFO + model_info = get_model_info( + model=model, custom_llm_provider=custom_llm_provider or "openai" + ) + prompt_cost = 0.0 + completion_cost = 0.0 + ## Speech / Audio cost calculation + if ( + "output_cost_per_second" in model_info + and model_info["output_cost_per_second"] is not None + ): + verbose_logger.debug( + f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}" + ) + ## COST PER SECOND ## + completion_cost = model_info["output_cost_per_second"] * duration + elif ( + "input_cost_per_second" in model_info + and model_info["input_cost_per_second"] is not None + ): + verbose_logger.debug( + f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}" + ) + ## COST PER SECOND ## + prompt_cost = model_info["input_cost_per_second"] * duration + completion_cost = 0.0 + + return prompt_cost, completion_cost |