aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
new file mode 100644
index 00000000..0c26fd74
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai/cost_calculation.py
@@ -0,0 +1,120 @@
+"""
+Helper util for handling openai-specific cost calculation
+- e.g.: prompt caching
+"""
+
+from typing import Literal, Optional, Tuple
+
+from litellm._logging import verbose_logger
+from litellm.types.utils import CallTypes, Usage
+from litellm.utils import get_model_info
+
+
+def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_second"]:
+ if call_type == CallTypes.atranscription or call_type == CallTypes.transcription:
+ return "cost_per_second"
+ else:
+ return "cost_per_token"
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+ """
+ Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+ Input:
+ - model: str, the model name without provider prefix
+ - usage: LiteLLM Usage block, containing anthropic caching information
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+ """
+ ## GET MODEL INFO
+ model_info = get_model_info(model=model, custom_llm_provider="openai")
+ ## CALCULATE INPUT COST
+ ### Non-cached text tokens
+ non_cached_text_tokens = usage.prompt_tokens
+ cached_tokens: Optional[int] = None
+ if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
+ cached_tokens = usage.prompt_tokens_details.cached_tokens
+ non_cached_text_tokens = non_cached_text_tokens - cached_tokens
+ prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
+ ## Prompt Caching cost calculation
+ if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
+ # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
+ prompt_cost += cached_tokens * (
+ model_info.get("cache_read_input_token_cost", 0) or 0
+ )
+
+ _audio_tokens: Optional[int] = (
+ usage.prompt_tokens_details.audio_tokens
+ if usage.prompt_tokens_details is not None
+ else None
+ )
+ _audio_cost_per_token: Optional[float] = model_info.get(
+ "input_cost_per_audio_token"
+ )
+ if _audio_tokens is not None and _audio_cost_per_token is not None:
+ audio_cost: float = _audio_tokens * _audio_cost_per_token
+ prompt_cost += audio_cost
+
+ ## CALCULATE OUTPUT COST
+ completion_cost: float = (
+ usage["completion_tokens"] * model_info["output_cost_per_token"]
+ )
+ _output_cost_per_audio_token: Optional[float] = model_info.get(
+ "output_cost_per_audio_token"
+ )
+ _output_audio_tokens: Optional[int] = (
+ usage.completion_tokens_details.audio_tokens
+ if usage.completion_tokens_details is not None
+ else None
+ )
+ if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
+ audio_cost = _output_audio_tokens * _output_cost_per_audio_token
+ completion_cost += audio_cost
+
+ return prompt_cost, completion_cost
+
+
+def cost_per_second(
+ model: str, custom_llm_provider: Optional[str], duration: float = 0.0
+) -> Tuple[float, float]:
+ """
+ Calculates the cost per second for a given model, prompt tokens, and completion tokens.
+
+ Input:
+ - model: str, the model name without provider prefix
+ - custom_llm_provider: str, the custom llm provider
+ - duration: float, the duration of the response in seconds
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+ """
+ ## GET MODEL INFO
+ model_info = get_model_info(
+ model=model, custom_llm_provider=custom_llm_provider or "openai"
+ )
+ prompt_cost = 0.0
+ completion_cost = 0.0
+ ## Speech / Audio cost calculation
+ if (
+ "output_cost_per_second" in model_info
+ and model_info["output_cost_per_second"] is not None
+ ):
+ verbose_logger.debug(
+ f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
+ )
+ ## COST PER SECOND ##
+ completion_cost = model_info["output_cost_per_second"] * duration
+ elif (
+ "input_cost_per_second" in model_info
+ and model_info["input_cost_per_second"] is not None
+ ):
+ verbose_logger.debug(
+ f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
+ )
+ ## COST PER SECOND ##
+ prompt_cost = model_info["input_cost_per_second"] * duration
+ completion_cost = 0.0
+
+ return prompt_cost, completion_cost