diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py new file mode 100644 index 00000000..a124e205 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/files/transformation.py @@ -0,0 +1,163 @@ +import json +import uuid +from typing import Any, Dict, List, Optional, Tuple, Union + +from litellm.llms.vertex_ai.common_utils import ( + _convert_vertex_datetime_to_openai_datetime, +) +from litellm.llms.vertex_ai.gemini.transformation import _transform_request_body +from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( + VertexGeminiConfig, +) +from litellm.types.llms.openai import CreateFileRequest, FileObject, FileTypes, PathLike + + +class VertexAIFilesTransformation(VertexGeminiConfig): + """ + Transforms OpenAI /v1/files/* requests to VertexAI /v1/files/* requests + """ + + def transform_openai_file_content_to_vertex_ai_file_content( + self, openai_file_content: Optional[FileTypes] = None + ) -> Tuple[str, str]: + """ + Transforms OpenAI FileContentRequest to VertexAI FileContentRequest + """ + + if openai_file_content is None: + raise ValueError("contents of file are None") + # Read the content of the file + file_content = self._get_content_from_openai_file(openai_file_content) + + # Split into lines and parse each line as JSON + openai_jsonl_content = [ + json.loads(line) for line in file_content.splitlines() if line.strip() + ] + vertex_jsonl_content = ( + self._transform_openai_jsonl_content_to_vertex_ai_jsonl_content( + openai_jsonl_content + ) + ) + vertex_jsonl_string = "\n".join( + json.dumps(item) for item in vertex_jsonl_content + ) + object_name = self._get_gcs_object_name( + openai_jsonl_content=openai_jsonl_content + ) + return vertex_jsonl_string, object_name + + def _transform_openai_jsonl_content_to_vertex_ai_jsonl_content( + self, openai_jsonl_content: List[Dict[str, Any]] + ): + """ + Transforms OpenAI JSONL content to VertexAI JSONL content + + jsonl body for vertex is {"request": <request_body>} + Example Vertex jsonl + {"request":{"contents": [{"role": "user", "parts": [{"text": "What is the relation between the following video and image samples?"}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/animals.mp4", "mimeType": "video/mp4"}}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/image/cricket.jpeg", "mimeType": "image/jpeg"}}]}]}} + {"request":{"contents": [{"role": "user", "parts": [{"text": "Describe what is happening in this video."}, {"fileData": {"fileUri": "gs://cloud-samples-data/generative-ai/video/another_video.mov", "mimeType": "video/mov"}}]}]}} + """ + + vertex_jsonl_content = [] + for _openai_jsonl_content in openai_jsonl_content: + openai_request_body = _openai_jsonl_content.get("body") or {} + vertex_request_body = _transform_request_body( + messages=openai_request_body.get("messages", []), + model=openai_request_body.get("model", ""), + optional_params=self._map_openai_to_vertex_params(openai_request_body), + custom_llm_provider="vertex_ai", + litellm_params={}, + cached_content=None, + ) + vertex_jsonl_content.append({"request": vertex_request_body}) + return vertex_jsonl_content + + def _get_gcs_object_name( + self, + openai_jsonl_content: List[Dict[str, Any]], + ) -> str: + """ + Gets a unique GCS object name for the VertexAI batch prediction job + + named as: litellm-vertex-{model}-{uuid} + """ + _model = openai_jsonl_content[0].get("body", {}).get("model", "") + if "publishers/google/models" not in _model: + _model = f"publishers/google/models/{_model}" + object_name = f"litellm-vertex-files/{_model}/{uuid.uuid4()}" + return object_name + + def _map_openai_to_vertex_params( + self, + openai_request_body: Dict[str, Any], + ) -> Dict[str, Any]: + """ + wrapper to call VertexGeminiConfig.map_openai_params + """ + _model = openai_request_body.get("model", "") + vertex_params = self.map_openai_params( + model=_model, + non_default_params=openai_request_body, + optional_params={}, + drop_params=False, + ) + return vertex_params + + def _get_content_from_openai_file(self, openai_file_content: FileTypes) -> str: + """ + Helper to extract content from various OpenAI file types and return as string. + + Handles: + - Direct content (str, bytes, IO[bytes]) + - Tuple formats: (filename, content, [content_type], [headers]) + - PathLike objects + """ + content: Union[str, bytes] = b"" + # Extract file content from tuple if necessary + if isinstance(openai_file_content, tuple): + # Take the second element which is always the file content + file_content = openai_file_content[1] + else: + file_content = openai_file_content + + # Handle different file content types + if isinstance(file_content, str): + # String content can be used directly + content = file_content + elif isinstance(file_content, bytes): + # Bytes content can be decoded + content = file_content + elif isinstance(file_content, PathLike): # PathLike + with open(str(file_content), "rb") as f: + content = f.read() + elif hasattr(file_content, "read"): # IO[bytes] + # File-like objects need to be read + content = file_content.read() + + # Ensure content is string + if isinstance(content, bytes): + content = content.decode("utf-8") + + return content + + def transform_gcs_bucket_response_to_openai_file_object( + self, create_file_data: CreateFileRequest, gcs_upload_response: Dict[str, Any] + ) -> FileObject: + """ + Transforms GCS Bucket upload file response to OpenAI FileObject + """ + gcs_id = gcs_upload_response.get("id", "") + # Remove the last numeric ID from the path + gcs_id = "/".join(gcs_id.split("/")[:-1]) if gcs_id else "" + + return FileObject( + purpose=create_file_data.get("purpose", "batch"), + id=f"gs://{gcs_id}", + filename=gcs_upload_response.get("name", ""), + created_at=_convert_vertex_datetime_to_openai_datetime( + vertex_datetime=gcs_upload_response.get("timeCreated", "") + ), + status="uploaded", + bytes=gcs_upload_response.get("size", 0), + object="file", + ) |