diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings')
3 files changed, 555 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py new file mode 100644 index 00000000..3ef40703 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py @@ -0,0 +1,228 @@ +from typing import Literal, Optional, Union + +import httpx + +import litellm +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObject +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + _get_httpx_client, + get_async_httpx_client, +) +from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError +from litellm.llms.vertex_ai.vertex_llm_base import VertexBase +from litellm.types.llms.vertex_ai import * +from litellm.types.utils import EmbeddingResponse + +from .types import * + + +class VertexEmbedding(VertexBase): + def __init__(self) -> None: + super().__init__() + + def embedding( + self, + model: str, + input: Union[list, str], + print_verbose, + model_response: EmbeddingResponse, + optional_params: dict, + logging_obj: LiteLLMLoggingObject, + custom_llm_provider: Literal[ + "vertex_ai", "vertex_ai_beta", "gemini" + ], # if it's vertex_ai or gemini (google ai studio) + timeout: Optional[Union[float, httpx.Timeout]], + api_key: Optional[str] = None, + encoding=None, + aembedding=False, + api_base: Optional[str] = None, + client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None, + vertex_project: Optional[str] = None, + vertex_location: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, + gemini_api_key: Optional[str] = None, + extra_headers: Optional[dict] = None, + ) -> EmbeddingResponse: + if aembedding is True: + return self.async_embedding( # type: ignore + model=model, + input=input, + logging_obj=logging_obj, + model_response=model_response, + optional_params=optional_params, + encoding=encoding, + custom_llm_provider=custom_llm_provider, + timeout=timeout, + api_base=api_base, + vertex_project=vertex_project, + vertex_location=vertex_location, + vertex_credentials=vertex_credentials, + gemini_api_key=gemini_api_key, + extra_headers=extra_headers, + ) + + should_use_v1beta1_features = self.is_using_v1beta1_features( + optional_params=optional_params + ) + + _auth_header, vertex_project = self._ensure_access_token( + credentials=vertex_credentials, + project_id=vertex_project, + custom_llm_provider=custom_llm_provider, + ) + auth_header, api_base = self._get_token_and_url( + model=model, + gemini_api_key=gemini_api_key, + auth_header=_auth_header, + vertex_project=vertex_project, + vertex_location=vertex_location, + vertex_credentials=vertex_credentials, + stream=False, + custom_llm_provider=custom_llm_provider, + api_base=api_base, + should_use_v1beta1_features=should_use_v1beta1_features, + mode="embedding", + ) + headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers) + vertex_request: VertexEmbeddingRequest = ( + litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request( + input=input, optional_params=optional_params, model=model + ) + ) + + _client_params = {} + if timeout: + _client_params["timeout"] = timeout + if client is None or not isinstance(client, HTTPHandler): + client = _get_httpx_client(params=_client_params) + else: + client = client # type: ignore + ## LOGGING + logging_obj.pre_call( + input=vertex_request, + api_key="", + additional_args={ + "complete_input_dict": vertex_request, + "api_base": api_base, + "headers": headers, + }, + ) + + try: + response = client.post(api_base, headers=headers, json=vertex_request) # type: ignore + response.raise_for_status() + except httpx.HTTPStatusError as err: + error_code = err.response.status_code + raise VertexAIError(status_code=error_code, message=err.response.text) + except httpx.TimeoutException: + raise VertexAIError(status_code=408, message="Timeout error occurred.") + + _json_response = response.json() + ## LOGGING POST-CALL + logging_obj.post_call( + input=input, api_key=None, original_response=_json_response + ) + + model_response = ( + litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai( + response=_json_response, model=model, model_response=model_response + ) + ) + + return model_response + + async def async_embedding( + self, + model: str, + input: Union[list, str], + model_response: litellm.EmbeddingResponse, + logging_obj: LiteLLMLoggingObject, + optional_params: dict, + custom_llm_provider: Literal[ + "vertex_ai", "vertex_ai_beta", "gemini" + ], # if it's vertex_ai or gemini (google ai studio) + timeout: Optional[Union[float, httpx.Timeout]], + api_base: Optional[str] = None, + client: Optional[AsyncHTTPHandler] = None, + vertex_project: Optional[str] = None, + vertex_location: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, + gemini_api_key: Optional[str] = None, + extra_headers: Optional[dict] = None, + encoding=None, + ) -> litellm.EmbeddingResponse: + """ + Async embedding implementation + """ + should_use_v1beta1_features = self.is_using_v1beta1_features( + optional_params=optional_params + ) + _auth_header, vertex_project = await self._ensure_access_token_async( + credentials=vertex_credentials, + project_id=vertex_project, + custom_llm_provider=custom_llm_provider, + ) + auth_header, api_base = self._get_token_and_url( + model=model, + gemini_api_key=gemini_api_key, + auth_header=_auth_header, + vertex_project=vertex_project, + vertex_location=vertex_location, + vertex_credentials=vertex_credentials, + stream=False, + custom_llm_provider=custom_llm_provider, + api_base=api_base, + should_use_v1beta1_features=should_use_v1beta1_features, + mode="embedding", + ) + headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers) + vertex_request: VertexEmbeddingRequest = ( + litellm.vertexAITextEmbeddingConfig.transform_openai_request_to_vertex_embedding_request( + input=input, optional_params=optional_params, model=model + ) + ) + + _async_client_params = {} + if timeout: + _async_client_params["timeout"] = timeout + if client is None or not isinstance(client, AsyncHTTPHandler): + client = get_async_httpx_client( + params=_async_client_params, llm_provider=litellm.LlmProviders.VERTEX_AI + ) + else: + client = client # type: ignore + ## LOGGING + logging_obj.pre_call( + input=vertex_request, + api_key="", + additional_args={ + "complete_input_dict": vertex_request, + "api_base": api_base, + "headers": headers, + }, + ) + + try: + response = await client.post(api_base, headers=headers, json=vertex_request) # type: ignore + response.raise_for_status() + except httpx.HTTPStatusError as err: + error_code = err.response.status_code + raise VertexAIError(status_code=error_code, message=err.response.text) + except httpx.TimeoutException: + raise VertexAIError(status_code=408, message="Timeout error occurred.") + + _json_response = response.json() + ## LOGGING POST-CALL + logging_obj.post_call( + input=input, api_key=None, original_response=_json_response + ) + + model_response = ( + litellm.vertexAITextEmbeddingConfig.transform_vertex_response_to_openai( + response=_json_response, model=model, model_response=model_response + ) + ) + + return model_response diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py new file mode 100644 index 00000000..d9e84fca --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/transformation.py @@ -0,0 +1,265 @@ +import types +from typing import List, Literal, Optional, Union + +from pydantic import BaseModel + +from litellm.types.utils import EmbeddingResponse, Usage + +from .types import * + + +class VertexAITextEmbeddingConfig(BaseModel): + """ + Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#TextEmbeddingInput + + Args: + auto_truncate: Optional(bool) If True, will truncate input text to fit within the model's max input length. + task_type: Optional(str) The type of task to be performed. The default is "RETRIEVAL_QUERY". + title: Optional(str) The title of the document to be embedded. (only valid with task_type=RETRIEVAL_DOCUMENT). + """ + + auto_truncate: Optional[bool] = None + task_type: Optional[ + Literal[ + "RETRIEVAL_QUERY", + "RETRIEVAL_DOCUMENT", + "SEMANTIC_SIMILARITY", + "CLASSIFICATION", + "CLUSTERING", + "QUESTION_ANSWERING", + "FACT_VERIFICATION", + ] + ] = None + title: Optional[str] = None + + def __init__( + self, + auto_truncate: Optional[bool] = None, + task_type: Optional[ + Literal[ + "RETRIEVAL_QUERY", + "RETRIEVAL_DOCUMENT", + "SEMANTIC_SIMILARITY", + "CLASSIFICATION", + "CLUSTERING", + "QUESTION_ANSWERING", + "FACT_VERIFICATION", + ] + ] = None, + title: Optional[str] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self): + return ["dimensions"] + + def map_openai_params( + self, non_default_params: dict, optional_params: dict, kwargs: dict + ): + for param, value in non_default_params.items(): + if param == "dimensions": + optional_params["outputDimensionality"] = value + + if "input_type" in kwargs: + optional_params["task_type"] = kwargs.pop("input_type") + return optional_params, kwargs + + def get_mapped_special_auth_params(self) -> dict: + """ + Common auth params across bedrock/vertex_ai/azure/watsonx + """ + return {"project": "vertex_project", "region_name": "vertex_location"} + + def map_special_auth_params(self, non_default_params: dict, optional_params: dict): + mapped_params = self.get_mapped_special_auth_params() + + for param, value in non_default_params.items(): + if param in mapped_params: + optional_params[mapped_params[param]] = value + return optional_params + + def transform_openai_request_to_vertex_embedding_request( + self, input: Union[list, str], optional_params: dict, model: str + ) -> VertexEmbeddingRequest: + """ + Transforms an openai request to a vertex embedding request. + """ + if model.isdigit(): + return self._transform_openai_request_to_fine_tuned_embedding_request( + input, optional_params, model + ) + + vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest() + vertex_text_embedding_input_list: List[TextEmbeddingInput] = [] + task_type: Optional[TaskType] = optional_params.get("task_type") + title = optional_params.get("title") + + if isinstance(input, str): + input = [input] # Convert single string to list for uniform processing + + for text in input: + embedding_input = self.create_embedding_input( + content=text, task_type=task_type, title=title + ) + vertex_text_embedding_input_list.append(embedding_input) + + vertex_request["instances"] = vertex_text_embedding_input_list + vertex_request["parameters"] = EmbeddingParameters(**optional_params) + + return vertex_request + + def _transform_openai_request_to_fine_tuned_embedding_request( + self, input: Union[list, str], optional_params: dict, model: str + ) -> VertexEmbeddingRequest: + """ + Transforms an openai request to a vertex fine-tuned embedding request. + + Vertex Doc: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22)) + Sample Request: + + ```json + { + "instances" : [ + { + "inputs": "How would the Future of AI in 10 Years look?", + "parameters": { + "max_new_tokens": 128, + "temperature": 1.0, + "top_p": 0.9, + "top_k": 10 + } + } + ] + } + ``` + """ + vertex_request: VertexEmbeddingRequest = VertexEmbeddingRequest() + vertex_text_embedding_input_list: List[TextEmbeddingFineTunedInput] = [] + if isinstance(input, str): + input = [input] # Convert single string to list for uniform processing + + for text in input: + embedding_input = TextEmbeddingFineTunedInput(inputs=text) + vertex_text_embedding_input_list.append(embedding_input) + + vertex_request["instances"] = vertex_text_embedding_input_list + vertex_request["parameters"] = TextEmbeddingFineTunedParameters( + **optional_params + ) + + return vertex_request + + def create_embedding_input( + self, + content: str, + task_type: Optional[TaskType] = None, + title: Optional[str] = None, + ) -> TextEmbeddingInput: + """ + Creates a TextEmbeddingInput object. + + Vertex requires a List of TextEmbeddingInput objects. This helper function creates a single TextEmbeddingInput object. + + Args: + content (str): The content to be embedded. + task_type (Optional[TaskType]): The type of task to be performed". + title (Optional[str]): The title of the document to be embedded + + Returns: + TextEmbeddingInput: A TextEmbeddingInput object. + """ + text_embedding_input = TextEmbeddingInput(content=content) + if task_type is not None: + text_embedding_input["task_type"] = task_type + if title is not None: + text_embedding_input["title"] = title + return text_embedding_input + + def transform_vertex_response_to_openai( + self, response: dict, model: str, model_response: EmbeddingResponse + ) -> EmbeddingResponse: + """ + Transforms a vertex embedding response to an openai response. + """ + if model.isdigit(): + return self._transform_vertex_response_to_openai_for_fine_tuned_models( + response, model, model_response + ) + + _predictions = response["predictions"] + + embedding_response = [] + input_tokens: int = 0 + for idx, element in enumerate(_predictions): + + embedding = element["embeddings"] + embedding_response.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding["values"], + } + ) + input_tokens += embedding["statistics"]["token_count"] + + model_response.object = "list" + model_response.data = embedding_response + model_response.model = model + usage = Usage( + prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + ) + setattr(model_response, "usage", usage) + return model_response + + def _transform_vertex_response_to_openai_for_fine_tuned_models( + self, response: dict, model: str, model_response: EmbeddingResponse + ) -> EmbeddingResponse: + """ + Transforms a vertex fine-tuned model embedding response to an openai response format. + """ + _predictions = response["predictions"] + + embedding_response = [] + # For fine-tuned models, we don't get token counts in the response + input_tokens = 0 + + for idx, embedding_values in enumerate(_predictions): + embedding_response.append( + { + "object": "embedding", + "index": idx, + "embedding": embedding_values[ + 0 + ], # The embedding values are nested one level deeper + } + ) + + model_response.object = "list" + model_response.data = embedding_response + model_response.model = model + usage = Usage( + prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + ) + setattr(model_response, "usage", usage) + return model_response diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py new file mode 100644 index 00000000..c0c53b17 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/vertex_embeddings/types.py @@ -0,0 +1,62 @@ +""" +Types for Vertex Embeddings Requests +""" + +from enum import Enum +from typing import List, Optional, TypedDict, Union + + +class TaskType(str, Enum): + RETRIEVAL_QUERY = "RETRIEVAL_QUERY" + RETRIEVAL_DOCUMENT = "RETRIEVAL_DOCUMENT" + SEMANTIC_SIMILARITY = "SEMANTIC_SIMILARITY" + CLASSIFICATION = "CLASSIFICATION" + CLUSTERING = "CLUSTERING" + QUESTION_ANSWERING = "QUESTION_ANSWERING" + FACT_VERIFICATION = "FACT_VERIFICATION" + CODE_RETRIEVAL_QUERY = "CODE_RETRIEVAL_QUERY" + + +class TextEmbeddingInput(TypedDict, total=False): + content: str + task_type: Optional[TaskType] + title: Optional[str] + + +# Fine-tuned models require a different input format +# Ref: https://console.cloud.google.com/vertex-ai/model-garden?hl=en&project=adroit-crow-413218&pageState=(%22galleryStateKey%22:(%22f%22:(%22g%22:%5B%5D,%22o%22:%5B%5D),%22s%22:%22%22)) +class TextEmbeddingFineTunedInput(TypedDict, total=False): + inputs: str + + +class TextEmbeddingFineTunedParameters(TypedDict, total=False): + max_new_tokens: Optional[int] + temperature: Optional[float] + top_p: Optional[float] + top_k: Optional[int] + + +class EmbeddingParameters(TypedDict, total=False): + auto_truncate: Optional[bool] + output_dimensionality: Optional[int] + + +class VertexEmbeddingRequest(TypedDict, total=False): + instances: Union[List[TextEmbeddingInput], List[TextEmbeddingFineTunedInput]] + parameters: Optional[Union[EmbeddingParameters, TextEmbeddingFineTunedParameters]] + + +# Example usage: +# example_request: VertexEmbeddingRequest = { +# "instances": [ +# { +# "content": "I would like embeddings for this text!", +# "task_type": "RETRIEVAL_DOCUMENT", +# "title": "document title" +# } +# ], +# "parameters": { +# "auto_truncate": True, +# "output_dimensionality": None +# } +# } |