aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py228
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py265
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py62
3 files changed, 555 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
new file mode 100644
index 00000000..3ef40703
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -0,0 +1,228 @@
+from typing import Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObject
+from litellm.llms.custom_httpx.http_handler import (
+ AsyncHTTPHandler,
+ HTTPHandler,
+ _get_httpx_client,
+ get_async_httpx_client,
+)
+from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+from litellm.types.llms.vertex_ai import *
+from litellm.types.utils import EmbeddingResponse
+
+from .types import *
+
+
+class VertexEmbedding(VertexBase):
+ def __init__(self) -> None:
+ super().__init__()
+
+ def embedding(
+ self,
+ model: str,
+ input: Union[list, str],
+ print_verbose,
+ model_response: EmbeddingResponse,
+ optional_params: dict,
+ logging_obj: LiteLLMLoggingObject,
+ custom_llm_provider: Literal[
+ "vertex_ai", "vertex_ai_beta", "gemini"
+ ], # if it's vertex_ai or gemini (google ai studio)
+ timeout: Optional[Union[float, httpx.Timeout]],
+ api_key: Optional[str] = None,
+ encoding=None,
+ aembedding=False,
+ api_base: Optional[str] = None,
+ client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
+ vertex_project: Optional[str] = None,
+ vertex_location: Optional[str] = None,
+ vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+ gemini_api_key: Optional[str] = None,
+ extra_headers: Optional[dict] = None,
+ ) -> EmbeddingResponse:
+ if aembedding is True:
+ return self.async_embedding( # type: ignore
+ model=model,
+ input=input,
+ logging_obj=logging_obj,
+ model_response=model_response,
+ optional_params=optional_params,
+ encoding=encoding,
+ custom_llm_provider=custom_llm_provider,
+ timeout=timeout,
+ api_base=api_base,
+ vertex_project=vertex_project,
+ vertex_location=vertex_location,
+ vertex_credentials=vertex_credentials,
+ gemini_api_key=gemini_api_key,
+ extra_headers=extra_headers,
+ )
+
+ should_use_v1beta1_features = self.is_using_v1beta1_features(
+ optional_params=optional_params
+ )
+
+ _auth_header, vertex_project = self._ensure_access_token(
+ credentials=vertex_credentials,
+ project_id=vertex_project,
+ custom_llm_provider=custom_llm_provider,
+ )
+ auth_header, api_base = self._get_token_and_url(
+ model=model,
+ gemini_api_key=gemini_api_key,
+ auth_header=_auth_header,
+ vertex_project=vertex_project,
+ vertex_location=vertex_location,
+ vertex_credentials=vertex_credentials,
+ stream=False,
+ custom_llm_provider=custom_llm_provider,
+ api_base=api_base,
+ should_use_v1beta1_features=should_use_v1beta1_features,
+ mode="embedding",
+ )
+ headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
+ vertex_request: VertexEmbeddingRequest = (
+ litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
+ input=input, optional_params=optional_params, model=model
+ )
+ )
+
+ _client_params = {}
+ if timeout:
+ _client_params["timeout"] = timeout
+ if client is None or not isinstance(client, HTTPHandler):
+ client = _get_httpx_client(params=_client_params)
+ else:
+ client = client # type: ignore
+ ## LOGGING
+ logging_obj.pre_call(
+ input=vertex_request,
+ api_key="",
+ additional_args={
+ "complete_input_dict": vertex_request,
+ "api_base": api_base,
+ "headers": headers,
+ },
+ )
+
+ try:
+ response = client.post(api_base, headers=headers, json=vertex_request) # type: ignore
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err:
+ error_code = err.response.status_code
+ raise VertexAIError(status_code=error_code, message=err.response.text)
+ except httpx.TimeoutException:
+ raise VertexAIError(status_code=408, message="Timeout error occurred.")
+
+ _json_response = response.json()
+ ## LOGGING POST-CALL
+ logging_obj.post_call(
+ input=input, api_key=None, original_response=_json_response
+ )
+
+ model_response = (
+ litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
+ response=_json_response, model=model, model_response=model_response
+ )
+ )
+
+ return model_response
+
+ async def async_embedding(
+ self,
+ model: str,
+ input: Union[list, str],
+ model_response: litellm.EmbeddingResponse,
+ logging_obj: LiteLLMLoggingObject,
+ optional_params: dict,
+ custom_llm_provider: Literal[
+ "vertex_ai", "vertex_ai_beta", "gemini"
+ ], # if it's vertex_ai or gemini (google ai studio)
+ timeout: Optional[Union[float, httpx.Timeout]],
+ api_base: Optional[str] = None,
+ client: Optional[AsyncHTTPHandler] = None,
+ vertex_project: Optional[str] = None,
+ vertex_location: Optional[str] = None,
+ vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
+ gemini_api_key: Optional[str] = None,
+ extra_headers: Optional[dict] = None,
+ encoding=None,
+ ) -> litellm.EmbeddingResponse:
+ """
+ Async embedding implementation
+ """
+ should_use_v1beta1_features = self.is_using_v1beta1_features(
+ optional_params=optional_params
+ )
+ _auth_header, vertex_project = await self._ensure_access_token_async(
+ credentials=vertex_credentials,
+ project_id=vertex_project,
+ custom_llm_provider=custom_llm_provider,
+ )
+ auth_header, api_base = self._get_token_and_url(
+ model=model,
+ gemini_api_key=gemini_api_key,
+ auth_header=_auth_header,
+ vertex_project=vertex_project,
+ vertex_location=vertex_location,
+ vertex_credentials=vertex_credentials,
+ stream=False,
+ custom_llm_provider=custom_llm_provider,
+ api_base=api_base,
+ should_use_v1beta1_features=should_use_v1beta1_features,
+ mode="embedding",
+ )
+ headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
+ vertex_request: VertexEmbeddingRequest = (
+ litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request(
+ input=input, optional_params=optional_params, model=model
+ )
+ )
+
+ _async_client_params = {}
+ if timeout:
+ _async_client_params["timeout"] = timeout
+ if client is None or not isinstance(client, AsyncHTTPHandler):
+ client = get_async_httpx_client(
+ params=_async_client_params, llm_provider=litellm.LlmProviders.VERTEX_AI
+ )
+ else:
+ client = client # type: ignore
+ ## LOGGING
+ logging_obj.pre_call(
+ input=vertex_request,
+ api_key="",
+ additional_args={
+ "complete_input_dict": vertex_request,
+ "api_base": api_base,
+ "headers": headers,
+ },
+ )
+
+ try:
+ response = await client.post(api_base, headers=headers, json=vertex_request) # type: ignore
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err:
+ error_code = err.response.status_code
+ raise VertexAIError(status_code=error_code, message=err.response.text)
+ except httpx.TimeoutException:
+ raise VertexAIError(status_code=408, message="Timeout error occurred.")
+
+ _json_response = response.json()
+ ## LOGGING POST-CALL
+ logging_obj.post_call(
+ input=input, api_key=None, original_response=_json_response
+ )
+
+ model_response = (
+ litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai(
+ response=_json_response, model=model, model_response=model_response
+ )
+ )
+
+ return model_response
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
new file mode 100644
index 00000000..d9e84fca
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
@@ -0,0 +1,265 @@
+import types
+from typing import List, Literal, Optional, Union
+
+from pydantic import BaseModel
+
+from litellm.types.utils import EmbeddingResponse, Usage
+
+from .types import *
+
+
+class VertexAITextEmbeddingConfig(BaseModel):
+ """
+ Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput
+
+ Args:
+ auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length.
+ task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY".
+ title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT).
+ """
+
+ auto_truncate: Optional[bool] = None
+ task_type: Optional[
+ Literal[
+ "RETRIEVAL_QUERY",
+ "RETRIEVAL_DOCUMENT",
+ "SEMANTIC_SIMILARITY",
+ "CLASSIFICATION",
+ "CLUSTERING",
+ "QUESTION_ANSWERING",
+ "FACT_VERIFICATION",
+ ]
+ ] = None
+ title: Optional[str] = None
+
+ def __init__(
+ self,
+ auto_truncate: Optional[bool] = None,
+ task_type: Optional[
+ Literal[
+ "RETRIEVAL_QUERY",
+ "RETRIEVAL_DOCUMENT",
+ "SEMANTIC_SIMILARITY",
+ "CLASSIFICATION",
+ "CLUSTERING",
+ "QUESTION_ANSWERING",
+ "FACT_VERIFICATION",
+ ]
+ ] = None,
+ title: Optional[str] = None,
+ ) -> None:
+ locals_ = locals().copy()
+ for key, value in locals_.items():
+ if key != "self" and value is not None:
+ setattr(self.__class__, key, value)
+
+ @classmethod
+ def get_config(cls):
+ return {
+ k: v
+ for k, v in cls.__dict__.items()
+ if not k.startswith("__")
+ and not isinstance(
+ v,
+ (
+ types.FunctionType,
+ types.BuiltinFunctionType,
+ classmethod,
+ staticmethod,
+ ),
+ )
+ and v is not None
+ }
+
+ def get_supported_openai_params(self):
+ return ["dimensions"]
+
+ def map_openai_params(
+ self, non_default_params: dict, optional_params: dict, kwargs: dict
+ ):
+ for param, value in non_default_params.items():
+ if param == "dimensions":
+ optional_params["outputDimensionality"] = value
+
+ if "input_type" in kwargs:
+ optional_params["task_type"] = kwargs.pop("input_type")
+ return optional_params, kwargs
+
+ def get_mapped_special_auth_params(self) -> dict:
+ """
+ Common auth params across bedrock/vertex_ai/azure/watsonx
+ """
+ return {"project": "vertex_project", "region_name": "vertex_location"}
+
+ def map_special_auth_params(self, non_default_params: dict, optional_params: dict):
+ mapped_params = self.get_mapped_special_auth_params()
+
+ for param, value in non_default_params.items():
+ if param in mapped_params:
+ optional_params[mapped_params[param]] = value
+ return optional_params
+
+ def transform_openai_request_to_vertex_embedding_request(
+ self, input: Union[list, str], optional_params: dict, model: str
+ ) -> VertexEmbeddingRequest:
+ """
+ Transforms an openai request to a vertex embedding request.
+ """
+ if model.isdigit():
+ return self._transform_openai_request_to_fine_tuned_embedding_request(
+ input, optional_params, model
+ )
+
+ vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
+ vertex_text_embedding_input_list: List[TextEmbeddingInput] = []
+ task_type: Optional[TaskType] = optional_params.get("task_type")
+ title = optional_params.get("title")
+
+ if isinstance(input, str):
+ input = [input] # Convert single string to list for uniform processing
+
+ for text in input:
+ embedding_input = self.create_embedding_input(
+ content=text, task_type=task_type, title=title
+ )
+ vertex_text_embedding_input_list.append(embedding_input)
+
+ vertex_request["instances"] = vertex_text_embedding_input_list
+ vertex_request["parameters"] = EmbeddingParameters(**optional_params)
+
+ return vertex_request
+
+ def _transform_openai_request_to_fine_tuned_embedding_request(
+ self, input: Union[list, str], optional_params: dict, model: str
+ ) -> VertexEmbeddingRequest:
+ """
+ Transforms an openai request to a vertex fine-tuned embedding request.
+
+ Vertex Doc: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
+ Sample Request:
+
+ ```json
+ {
+ "instances" : [
+ {
+ "inputs": "How would the Future of AI in 10 Years look?",
+ "parameters": {
+ "max_new_tokens": 128,
+ "temperature": 1.0,
+ "top_p": 0.9,
+ "top_k": 10
+ }
+ }
+ ]
+ }
+ ```
+ """
+ vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest()
+ vertex_text_embedding_input_list: List[TextEmbeddingFineTunedInput] = []
+ if isinstance(input, str):
+ input = [input] # Convert single string to list for uniform processing
+
+ for text in input:
+ embedding_input = TextEmbeddingFineTunedInput(inputs=text)
+ vertex_text_embedding_input_list.append(embedding_input)
+
+ vertex_request["instances"] = vertex_text_embedding_input_list
+ vertex_request["parameters"] = TextEmbeddingFineTunedParameters(
+ **optional_params
+ )
+
+ return vertex_request
+
+ def create_embedding_input(
+ self,
+ content: str,
+ task_type: Optional[TaskType] = None,
+ title: Optional[str] = None,
+ ) -> TextEmbeddingInput:
+ """
+ Creates a TextEmbeddingInput object.
+
+ Vertex requires a List of TextEmbeddingInput objects. This helper function creates a single TextEmbeddingInput object.
+
+ Args:
+ content (str): The content to be embedded.
+ task_type (Optional[TaskType]): The type of task to be performed".
+ title (Optional[str]): The title of the document to be embedded
+
+ Returns:
+ TextEmbeddingInput: A TextEmbeddingInput object.
+ """
+ text_embedding_input = TextEmbeddingInput(content=content)
+ if task_type is not None:
+ text_embedding_input["task_type"] = task_type
+ if title is not None:
+ text_embedding_input["title"] = title
+ return text_embedding_input
+
+ def transform_vertex_response_to_openai(
+ self, response: dict, model: str, model_response: EmbeddingResponse
+ ) -> EmbeddingResponse:
+ """
+ Transforms a vertex embedding response to an openai response.
+ """
+ if model.isdigit():
+ return self._transform_vertex_response_to_openai_for_fine_tuned_models(
+ response, model, model_response
+ )
+
+ _predictions = response["predictions"]
+
+ embedding_response = []
+ input_tokens: int = 0
+ for idx, element in enumerate(_predictions):
+
+ embedding = element["embeddings"]
+ embedding_response.append(
+ {
+ "object": "embedding",
+ "index": idx,
+ "embedding": embedding["values"],
+ }
+ )
+ input_tokens += embedding["statistics"]["token_count"]
+
+ model_response.object = "list"
+ model_response.data = embedding_response
+ model_response.model = model
+ usage = Usage(
+ prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
+ )
+ setattr(model_response, "usage", usage)
+ return model_response
+
+ def _transform_vertex_response_to_openai_for_fine_tuned_models(
+ self, response: dict, model: str, model_response: EmbeddingResponse
+ ) -> EmbeddingResponse:
+ """
+ Transforms a vertex fine-tuned model embedding response to an openai response format.
+ """
+ _predictions = response["predictions"]
+
+ embedding_response = []
+ # For fine-tuned models, we don't get token counts in the response
+ input_tokens = 0
+
+ for idx, embedding_values in enumerate(_predictions):
+ embedding_response.append(
+ {
+ "object": "embedding",
+ "index": idx,
+ "embedding": embedding_values[
+ 0
+ ], # The embedding values are nested one level deeper
+ }
+ )
+
+ model_response.object = "list"
+ model_response.data = embedding_response
+ model_response.model = model
+ usage = Usage(
+ prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
+ )
+ setattr(model_response, "usage", usage)
+ return model_response
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py
new file mode 100644
index 00000000..c0c53b17
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py
@@ -0,0 +1,62 @@
+"""
+Types for Vertex Embeddings Requests
+"""
+
+from enum import Enum
+from typing import List, Optional, TypedDict, Union
+
+
+class TaskType(str, Enum):
+ RETRIEVAL_QUERY = "RETRIEVAL_QUERY"
+ RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT"
+ SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY"
+ CLASSIFICATION = "CLASSIFICATION"
+ CLUSTERING = "CLUSTERING"
+ QUESTION_ANSWERING = "QUESTION_ANSWERING"
+ FACT_VERIFICATION = "FACT_VERIFICATION"
+ CODE_RETRIEVAL_QUERY = "CODE_RETRIEVAL_QUERY"
+
+
+class TextEmbeddingInput(TypedDict, total=False):
+ content: str
+ task_type: Optional[TaskType]
+ title: Optional[str]
+
+
+# Fine-tuned models require a different input format
+# Ref: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22))
+class TextEmbeddingFineTunedInput(TypedDict, total=False):
+ inputs: str
+
+
+class TextEmbeddingFineTunedParameters(TypedDict, total=False):
+ max_new_tokens: Optional[int]
+ temperature: Optional[float]
+ top_p: Optional[float]
+ top_k: Optional[int]
+
+
+class EmbeddingParameters(TypedDict, total=False):
+ auto_truncate: Optional[bool]
+ output_dimensionality: Optional[int]
+
+
+class VertexEmbeddingRequest(TypedDict, total=False):
+ instances: Union[List[TextEmbeddingInput], List[TextEmbeddingFineTunedInput]]
+ parameters: Optional[Union[EmbeddingParameters, TextEmbeddingFineTunedParameters]]
+
+
+# Example usage:
+# example_request: VertexEmbeddingRequest = {
+# "instances": [
+# {
+# "content": "I would like embeddings for this text!",
+# "task_type": "RETRIEVAL_DOCUMENT",
+# "title": "document title"
+# }
+# ],
+# "parameters": {
+# "auto_truncate": True,
+# "output_dimensionality": None
+# }
+# }