diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/azure/cost_calculation.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/azure/cost_calculation.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/azure/cost_calculation.py b/.venv/lib/python3.12/site-packages/litellm/llms/azure/cost_calculation.py new file mode 100644 index 00000000..96c58d95 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/azure/cost_calculation.py @@ -0,0 +1,61 @@ +""" +Helper util for handling azure openai-specific cost calculation +- e.g.: prompt caching +""" + +from typing import Optional, Tuple + +from litellm._logging import verbose_logger +from litellm.types.utils import Usage +from litellm.utils import get_model_info + + +def cost_per_token( + model: str, usage: Usage, response_time_ms: Optional[float] = 0.0 +) -> Tuple[float, float]: + """ + Calculates the cost per token for a given model, prompt tokens, and completion tokens. + + Input: + - model: str, the model name without provider prefix + - usage: LiteLLM Usage block, containing anthropic caching information + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + ## GET MODEL INFO + model_info = get_model_info(model=model, custom_llm_provider="azure") + cached_tokens: Optional[int] = None + ## CALCULATE INPUT COST + non_cached_text_tokens = usage.prompt_tokens + if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens: + cached_tokens = usage.prompt_tokens_details.cached_tokens + non_cached_text_tokens = non_cached_text_tokens - cached_tokens + prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"] + + ## CALCULATE OUTPUT COST + completion_cost: float = ( + usage["completion_tokens"] * model_info["output_cost_per_token"] + ) + + ## Prompt Caching cost calculation + if model_info.get("cache_read_input_token_cost") is not None and cached_tokens: + # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens + prompt_cost += cached_tokens * ( + model_info.get("cache_read_input_token_cost", 0) or 0 + ) + + ## Speech / Audio cost calculation + if ( + "output_cost_per_second" in model_info + and model_info["output_cost_per_second"] is not None + and response_time_ms is not None + ): + verbose_logger.debug( + f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}" + ) + ## COST PER SECOND ## + prompt_cost = 0 + completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000 + + return prompt_cost, completion_cost |