aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/databricks
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/databricks')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/handler.py84
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/transformation.py106
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/common_utils.py82
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/cost_calculator.py66
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/handler.py49
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/transformation.py48
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/exceptions.py12
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/databricks/streaming_utils.py166
8 files changed, 613 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/handler.py
new file mode 100644
index 00000000..abb71474
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/handler.py
@@ -0,0 +1,84 @@
+"""
+Handles the chat completion request for Databricks
+"""
+
+from typing import Callable, List, Optional, Union, cast
+
+from httpx._config import Timeout
+
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import CustomStreamingDecoder
+from litellm.utils import ModelResponse
+
+from ...openai_like.chat.handler import OpenAILikeChatHandler
+from ..common_utils import DatabricksBase
+from .transformation import DatabricksConfig
+
+
+class DatabricksChatCompletion(OpenAILikeChatHandler, DatabricksBase):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ def completion(
+ self,
+ *,
+ model: str,
+ messages: list,
+ api_base: str,
+ custom_llm_provider: str,
+ custom_prompt_dict: dict,
+ model_response: ModelResponse,
+ print_verbose: Callable,
+ encoding,
+ api_key: Optional[str],
+ logging_obj,
+ optional_params: dict,
+ acompletion=None,
+ litellm_params=None,
+ logger_fn=None,
+ headers: Optional[dict] = None,
+ timeout: Optional[Union[float, Timeout]] = None,
+ client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+ custom_endpoint: Optional[bool] = None,
+ streaming_decoder: Optional[CustomStreamingDecoder] = None,
+ fake_stream: bool = False,
+ ):
+ messages = DatabricksConfig()._transform_messages(
+ messages=cast(List[AllMessageValues], messages), model=model
+ )
+ api_base, headers = self.databricks_validate_environment(
+ api_base=api_base,
+ api_key=api_key,
+ endpoint_type="chat_completions",
+ custom_endpoint=custom_endpoint,
+ headers=headers,
+ )
+
+ if optional_params.get("stream") is True:
+ fake_stream = DatabricksConfig()._should_fake_stream(optional_params)
+ else:
+ fake_stream = False
+
+ return super().completion(
+ model=model,
+ messages=messages,
+ api_base=api_base,
+ custom_llm_provider=custom_llm_provider,
+ custom_prompt_dict=custom_prompt_dict,
+ model_response=model_response,
+ print_verbose=print_verbose,
+ encoding=encoding,
+ api_key=api_key,
+ logging_obj=logging_obj,
+ optional_params=optional_params,
+ acompletion=acompletion,
+ litellm_params=litellm_params,
+ logger_fn=logger_fn,
+ headers=headers,
+ timeout=timeout,
+ client=client,
+ custom_endpoint=True,
+ streaming_decoder=streaming_decoder,
+ fake_stream=fake_stream,
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/transformation.py
new file mode 100644
index 00000000..94e02034
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/chat/transformation.py
@@ -0,0 +1,106 @@
+"""
+Translates from OpenAI's `/v1/chat/completions` to Databricks' `/chat/completions`
+"""
+
+from typing import List, Optional, Union
+
+from pydantic import BaseModel
+
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+ handle_messages_with_content_list_to_str_conversion,
+ strip_name_from_messages,
+)
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ProviderField
+
+from ...openai_like.chat.transformation import OpenAILikeChatConfig
+
+
+class DatabricksConfig(OpenAILikeChatConfig):
+ """
+ Reference: https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html#chat-request
+ """
+
+ max_tokens: Optional[int] = None
+ temperature: Optional[int] = None
+ top_p: Optional[int] = None
+ top_k: Optional[int] = None
+ stop: Optional[Union[List[str], str]] = None
+ n: Optional[int] = None
+
+ def __init__(
+ self,
+ max_tokens: Optional[int] = None,
+ temperature: Optional[int] = None,
+ top_p: Optional[int] = None,
+ top_k: Optional[int] = None,
+ stop: Optional[Union[List[str], str]] = None,
+ n: Optional[int] = None,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return super().get_config()
+
+ def get_required_params(self) -> List[ProviderField]:
+ """For a given provider, return it's required fields with a description"""
+ return [
+ ProviderField(
+ field_name="api_key",
+ field_type="string",
+ field_description="Your Databricks API Key.",
+ field_value="dapi...",
+ ),
+ ProviderField(
+ field_name="api_base",
+ field_type="string",
+ field_description="Your Databricks API Base.",
+ field_value="https://adb-..",
+ ),
+ ]
+
+ def get_supported_openai_params(self, model: Optional[str] = None) -> list:
+ return [
+ "stream",
+ "stop",
+ "temperature",
+ "top_p",
+ "max_tokens",
+ "max_completion_tokens",
+ "n",
+ "response_format",
+ "tools",
+ "tool_choice",
+ ]
+
+ def _should_fake_stream(self, optional_params: dict) -> bool:
+ """
+ Databricks doesn't support 'response_format' while streaming
+ """
+ if optional_params.get("response_format") is not None:
+ return True
+
+ return False
+
+ def _transform_messages(
+ self, messages: List[AllMessageValues], model: str
+ ) -> List[AllMessageValues]:
+ """
+ Databricks does not support:
+ - content in list format.
+ - 'name' in user message.
+ """
+ new_messages = []
+ for idx, message in enumerate(messages):
+ if isinstance(message, BaseModel):
+ _message = message.model_dump(exclude_none=True)
+ else:
+ _message = message
+ new_messages.append(_message)
+ new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
+ new_messages = strip_name_from_messages(new_messages)
+ return super()._transform_messages(messages=new_messages, model=model)
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/common_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/common_utils.py
new file mode 100644
index 00000000..e8481e25
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/common_utils.py
@@ -0,0 +1,82 @@
+from typing import Literal, Optional, Tuple
+
+from .exceptions import DatabricksError
+
+
+class DatabricksBase:
+ def _get_databricks_credentials(
+ self, api_key: Optional[str], api_base: Optional[str], headers: Optional[dict]
+ ) -> Tuple[str, dict]:
+ headers = headers or {"Content-Type": "application/json"}
+ try:
+ from databricks.sdk import WorkspaceClient
+
+ databricks_client = WorkspaceClient()
+
+ api_base = api_base or f"{databricks_client.config.host}/serving-endpoints"
+
+ if api_key is None:
+ databricks_auth_headers: dict[str, str] = (
+ databricks_client.config.authenticate()
+ )
+ headers = {**databricks_auth_headers, **headers}
+
+ return api_base, headers
+ except ImportError:
+ raise DatabricksError(
+ status_code=400,
+ message=(
+ "If the Databricks base URL and API key are not set, the databricks-sdk "
+ "Python library must be installed. Please install the databricks-sdk, set "
+ "{LLM_PROVIDER}_API_BASE and {LLM_PROVIDER}_API_KEY environment variables, "
+ "or provide the base URL and API key as arguments."
+ ),
+ )
+
+ def databricks_validate_environment(
+ self,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ endpoint_type: Literal["chat_completions", "embeddings"],
+ custom_endpoint: Optional[bool],
+ headers: Optional[dict],
+ ) -> Tuple[str, dict]:
+ if api_key is None and headers is None:
+ if custom_endpoint is not None:
+ raise DatabricksError(
+ status_code=400,
+ message="Missing API Key - A call is being made to LLM Provider but no key is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params",
+ )
+ else:
+ api_base, headers = self._get_databricks_credentials(
+ api_base=api_base, api_key=api_key, headers=headers
+ )
+
+ if api_base is None:
+ if custom_endpoint:
+ raise DatabricksError(
+ status_code=400,
+ message="Missing API Base - A call is being made to LLM Provider but no api base is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params",
+ )
+ else:
+ api_base, headers = self._get_databricks_credentials(
+ api_base=api_base, api_key=api_key, headers=headers
+ )
+
+ if headers is None:
+ headers = {
+ "Authorization": "Bearer {}".format(api_key),
+ "Content-Type": "application/json",
+ }
+ else:
+ if api_key is not None:
+ headers.update({"Authorization": "Bearer {}".format(api_key)})
+
+ if api_key is not None:
+ headers["Authorization"] = f"Bearer {api_key}"
+
+ if endpoint_type == "chat_completions" and custom_endpoint is not True:
+ api_base = "{}/chat/completions".format(api_base)
+ elif endpoint_type == "embeddings" and custom_endpoint is not True:
+ api_base = "{}/embeddings".format(api_base)
+ return api_base, headers
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/cost_calculator.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/cost_calculator.py
new file mode 100644
index 00000000..5558e133
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/cost_calculator.py
@@ -0,0 +1,66 @@
+"""
+Helper util for handling databricks-specific cost calculation
+- e.g.: handling 'dbrx-instruct-*'
+"""
+
+from typing import Tuple
+
+from litellm.types.utils import Usage
+from litellm.utils import get_model_info
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+ """
+ Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+ Input:
+ - model: str, the model name without provider prefix
+ - usage: LiteLLM Usage block, containing anthropic caching information
+
+ Returns:
+ Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+ """
+ base_model = model
+ if model.startswith("databricks/dbrx-instruct") or model.startswith(
+ "dbrx-instruct"
+ ):
+ base_model = "databricks-dbrx-instruct"
+ elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith(
+ "meta-llama-3.1-70b-instruct"
+ ):
+ base_model = "databricks-meta-llama-3-1-70b-instruct"
+ elif model.startswith(
+ "databricks/meta-llama-3.1-405b-instruct"
+ ) or model.startswith("meta-llama-3.1-405b-instruct"):
+ base_model = "databricks-meta-llama-3-1-405b-instruct"
+ elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
+ "mixtral-8x7b-instruct-v0.1"
+ ):
+ base_model = "databricks-mixtral-8x7b-instruct"
+ elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
+ "mixtral-8x7b-instruct-v0.1"
+ ):
+ base_model = "databricks-mixtral-8x7b-instruct"
+ elif model.startswith("databricks/bge-large-en") or model.startswith(
+ "bge-large-en"
+ ):
+ base_model = "databricks-bge-large-en"
+ elif model.startswith("databricks/gte-large-en") or model.startswith(
+ "gte-large-en"
+ ):
+ base_model = "databricks-gte-large-en"
+ elif model.startswith("databricks/llama-2-70b-chat") or model.startswith(
+ "llama-2-70b-chat"
+ ):
+ base_model = "databricks-llama-2-70b-chat"
+ ## GET MODEL INFO
+ model_info = get_model_info(model=base_model, custom_llm_provider="databricks")
+
+ ## CALCULATE INPUT COST
+
+ prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
+
+ ## CALCULATE OUTPUT COST
+ completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]
+
+ return prompt_cost, completion_cost
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/handler.py
new file mode 100644
index 00000000..2eabcdbc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/handler.py
@@ -0,0 +1,49 @@
+"""
+Calling logic for Databricks embeddings
+"""
+
+from typing import Optional
+
+from litellm.utils import EmbeddingResponse
+
+from ...openai_like.embedding.handler import OpenAILikeEmbeddingHandler
+from ..common_utils import DatabricksBase
+
+
+class DatabricksEmbeddingHandler(OpenAILikeEmbeddingHandler, DatabricksBase):
+ def embedding(
+ self,
+ model: str,
+ input: list,
+ timeout: float,
+ logging_obj,
+ api_key: Optional[str],
+ api_base: Optional[str],
+ optional_params: dict,
+ model_response: Optional[EmbeddingResponse] = None,
+ client=None,
+ aembedding=None,
+ custom_endpoint: Optional[bool] = None,
+ headers: Optional[dict] = None,
+ ) -> EmbeddingResponse:
+ api_base, headers = self.databricks_validate_environment(
+ api_base=api_base,
+ api_key=api_key,
+ endpoint_type="embeddings",
+ custom_endpoint=custom_endpoint,
+ headers=headers,
+ )
+ return super().embedding(
+ model=model,
+ input=input,
+ timeout=timeout,
+ logging_obj=logging_obj,
+ api_key=api_key,
+ api_base=api_base,
+ optional_params=optional_params,
+ model_response=model_response,
+ client=client,
+ aembedding=aembedding,
+ custom_endpoint=True,
+ headers=headers,
+ )
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/transformation.py
new file mode 100644
index 00000000..53e3b30d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/embed/transformation.py
@@ -0,0 +1,48 @@
+"""
+Translates from OpenAI's `/v1/embeddings` to Databricks' `/embeddings`
+"""
+
+import types
+from typing import Optional
+
+
+class DatabricksEmbeddingConfig:
+ """
+ Reference: https://learn.microsoft.com/en-us/azure/databricks/machine-learning/foundation-models/api-reference#--embedding-task
+ """
+
+ instruction: Optional[str] = (
+ None # An optional instruction to pass to the embedding model. BGE Authors recommend 'Represent this sentence for searching relevant passages:' for retrieval queries
+ )
+
+ def __init__(self, instruction: Optional[str] = None) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return {
+ k: v
+ for k, v in cls.__dict__.items()
+ if not k.startswith("__")
+ and not isinstance(
+ v,
+ (
+ types.FunctionType,
+ types.BuiltinFunctionType,
+ classmethod,
+ staticmethod,
+ ),
+ )
+ and v is not None
+ }
+
+ def get_supported_openai_params(
+ self,
+ ): # no optional openai embedding params supported
+ return []
+
+ def map_openai_params(self, non_default_params: dict, optional_params: dict):
+ return optional_params
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/exceptions.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/exceptions.py
new file mode 100644
index 00000000..8bb3d435
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/exceptions.py
@@ -0,0 +1,12 @@
+import httpx
+
+
+class DatabricksError(Exception):
+ def __init__(self, status_code, message):
+ self.status_code = status_code
+ self.message = message
+ self.request = httpx.Request(method="POST", url="https://docs.databricks.com/")
+ self.response = httpx.Response(status_code=status_code, request=self.request)
+ super().__init__(
+ self.message
+ ) # Call the base class constructor with the parameters it needs
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/databricks/streaming_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/streaming_utils.py
new file mode 100644
index 00000000..2db53df9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/databricks/streaming_utils.py
@@ -0,0 +1,166 @@
+import json
+from typing import Optional
+
+import litellm
+from litellm import verbose_logger
+from litellm.types.llms.openai import (
+ ChatCompletionToolCallChunk,
+ ChatCompletionToolCallFunctionChunk,
+ ChatCompletionUsageBlock,
+)
+from litellm.types.utils import GenericStreamingChunk, Usage
+
+
+class ModelResponseIterator:
+ def __init__(self, streaming_response, sync_stream: bool):
+ self.streaming_response = streaming_response
+
+ def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
+ try:
+ processed_chunk = litellm.ModelResponseStream(**chunk)
+
+ text = ""
+ tool_use: Optional[ChatCompletionToolCallChunk] = None
+ is_finished = False
+ finish_reason = ""
+ usage: Optional[ChatCompletionUsageBlock] = None
+
+ if processed_chunk.choices[0].delta.content is not None: # type: ignore
+ text = processed_chunk.choices[0].delta.content # type: ignore
+
+ if (
+ processed_chunk.choices[0].delta.tool_calls is not None # type: ignore
+ and len(processed_chunk.choices[0].delta.tool_calls) > 0 # type: ignore
+ and processed_chunk.choices[0].delta.tool_calls[0].function is not None # type: ignore
+ and processed_chunk.choices[0].delta.tool_calls[0].function.arguments # type: ignore
+ is not None
+ ):
+ tool_use = ChatCompletionToolCallChunk(
+ id=processed_chunk.choices[0].delta.tool_calls[0].id, # type: ignore
+ type="function",
+ function=ChatCompletionToolCallFunctionChunk(
+ name=processed_chunk.choices[0]
+ .delta.tool_calls[0] # type: ignore
+ .function.name,
+ arguments=processed_chunk.choices[0]
+ .delta.tool_calls[0] # type: ignore
+ .function.arguments,
+ ),
+ index=processed_chunk.choices[0].delta.tool_calls[0].index,
+ )
+
+ if processed_chunk.choices[0].finish_reason is not None:
+ is_finished = True
+ finish_reason = processed_chunk.choices[0].finish_reason
+
+ usage_chunk: Optional[Usage] = getattr(processed_chunk, "usage", None)
+ if usage_chunk is not None:
+
+ usage = ChatCompletionUsageBlock(
+ prompt_tokens=usage_chunk.prompt_tokens,
+ completion_tokens=usage_chunk.completion_tokens,
+ total_tokens=usage_chunk.total_tokens,
+ )
+
+ return GenericStreamingChunk(
+ text=text,
+ tool_use=tool_use,
+ is_finished=is_finished,
+ finish_reason=finish_reason,
+ usage=usage,
+ index=0,
+ )
+ except json.JSONDecodeError:
+ raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
+
+ # Sync iterator
+ def __iter__(self):
+ self.response_iterator = self.streaming_response
+ return self
+
+ def __next__(self):
+ if not hasattr(self, "response_iterator"):
+ self.response_iterator = self.streaming_response
+ try:
+ chunk = self.response_iterator.__next__()
+ except StopIteration:
+ raise StopIteration
+ except ValueError as e:
+ raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+ try:
+ chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
+ chunk = chunk.strip()
+ if len(chunk) > 0:
+ json_chunk = json.loads(chunk)
+ return self.chunk_parser(chunk=json_chunk)
+ else:
+ return GenericStreamingChunk(
+ text="",
+ is_finished=False,
+ finish_reason="",
+ usage=None,
+ index=0,
+ tool_use=None,
+ )
+ except StopIteration:
+ raise StopIteration
+ except ValueError as e:
+ verbose_logger.debug(
+ f"Error parsing chunk: {e},\nReceived chunk: {chunk}. Defaulting to empty chunk here."
+ )
+ return GenericStreamingChunk(
+ text="",
+ is_finished=False,
+ finish_reason="",
+ usage=None,
+ index=0,
+ tool_use=None,
+ )
+
+ # Async iterator
+ def __aiter__(self):
+ self.async_response_iterator = self.streaming_response.__aiter__()
+ return self
+
+ async def __anext__(self):
+ try:
+ chunk = await self.async_response_iterator.__anext__()
+ except StopAsyncIteration:
+ raise StopAsyncIteration
+ except ValueError as e:
+ raise RuntimeError(f"Error receiving chunk from stream: {e}")
+ except Exception as e:
+ raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+ try:
+ chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
+ chunk = chunk.strip()
+ if chunk == "[DONE]":
+ raise StopAsyncIteration
+ if len(chunk) > 0:
+ json_chunk = json.loads(chunk)
+ return self.chunk_parser(chunk=json_chunk)
+ else:
+ return GenericStreamingChunk(
+ text="",
+ is_finished=False,
+ finish_reason="",
+ usage=None,
+ index=0,
+ tool_use=None,
+ )
+ except StopAsyncIteration:
+ raise StopAsyncIteration
+ except ValueError as e:
+ verbose_logger.debug(
+ f"Error parsing chunk: {e},\nReceived chunk: {chunk}. Defaulting to empty chunk here."
+ )
+ return GenericStreamingChunk(
+ text="",
+ is_finished=False,
+ finish_reason="",
+ usage=None,
+ index=0,
+ tool_use=None,
+ )