about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py243
1 files changed, 243 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py
new file mode 100644
index 00000000..18bc72db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py
@@ -0,0 +1,243 @@
+from typing import Optional, TypedDict, Union
+
+import httpx
+
+import litellm
+from litellm.llms.custom_httpx.http_handler import (
+    _get_httpx_client,
+    get_async_httpx_client,
+)
+from litellm.llms.openai.openai import HttpxBinaryResponseContent
+from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
+from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
+
+
+class VertexInput(TypedDict, total=False):
+    text: Optional[str]
+    ssml: Optional[str]
+
+
+class VertexVoice(TypedDict, total=False):
+    languageCode: str
+    name: str
+
+
+class VertexAudioConfig(TypedDict, total=False):
+    audioEncoding: str
+    speakingRate: str
+
+
+class VertexTextToSpeechRequest(TypedDict, total=False):
+    input: VertexInput
+    voice: VertexVoice
+    audioConfig: Optional[VertexAudioConfig]
+
+
+class VertexTextToSpeechAPI(VertexLLM):
+    """
+    Vertex methods to support for batches
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def audio_speech(
+        self,
+        logging_obj,
+        vertex_project: Optional[str],
+        vertex_location: Optional[str],
+        vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
+        api_base: Optional[str],
+        timeout: Union[float, httpx.Timeout],
+        model: str,
+        input: str,
+        voice: Optional[dict] = None,
+        _is_async: Optional[bool] = False,
+        optional_params: Optional[dict] = None,
+        kwargs: Optional[dict] = None,
+    ) -> HttpxBinaryResponseContent:
+        import base64
+
+        ####### Authenticate with Vertex AI ########
+        _auth_header, vertex_project = self._ensure_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+            custom_llm_provider="vertex_ai_beta",
+        )
+
+        auth_header, _ = self._get_token_and_url(
+            model="",
+            auth_header=_auth_header,
+            gemini_api_key=None,
+            vertex_credentials=vertex_credentials,
+            vertex_project=vertex_project,
+            vertex_location=vertex_location,
+            stream=False,
+            custom_llm_provider="vertex_ai_beta",
+            api_base=api_base,
+        )
+
+        headers = {
+            "Authorization": f"Bearer {auth_header}",
+            "x-goog-user-project": vertex_project,
+            "Content-Type": "application/json",
+            "charset": "UTF-8",
+        }
+
+        ######### End of Authentication ###########
+
+        ####### Build the request ################
+        # API Ref: https://cloud.google.com/text-to-speech/docs/reference/rest/v1/text/synthesize
+        kwargs = kwargs or {}
+        optional_params = optional_params or {}
+
+        vertex_input = VertexInput(text=input)
+        validate_vertex_input(vertex_input, kwargs, optional_params)
+
+        # required param
+        if voice is not None:
+            vertex_voice = VertexVoice(**voice)
+        elif "voice" in kwargs:
+            vertex_voice = VertexVoice(**kwargs["voice"])
+        else:
+            # use defaults to not fail the request
+            vertex_voice = VertexVoice(
+                languageCode="en-US",
+                name="en-US-Studio-O",
+            )
+
+        if "audioConfig" in kwargs:
+            vertex_audio_config = VertexAudioConfig(**kwargs["audioConfig"])
+        else:
+            # use defaults to not fail the request
+            vertex_audio_config = VertexAudioConfig(
+                audioEncoding="LINEAR16",
+                speakingRate="1",
+            )
+
+        request = VertexTextToSpeechRequest(
+            input=vertex_input,
+            voice=vertex_voice,
+            audioConfig=vertex_audio_config,
+        )
+
+        url = "https://texttospeech.googleapis.com/v1/text:synthesize"
+        ########## End of building request ############
+
+        ########## Log the request for debugging / logging ############
+        logging_obj.pre_call(
+            input=[],
+            api_key="",
+            additional_args={
+                "complete_input_dict": request,
+                "api_base": url,
+                "headers": headers,
+            },
+        )
+
+        ########## End of logging ############
+        ####### Send the request ###################
+        if _is_async is True:
+            return self.async_audio_speech(  # type:ignore
+                logging_obj=logging_obj, url=url, headers=headers, request=request
+            )
+        sync_handler = _get_httpx_client()
+
+        response = sync_handler.post(
+            url=url,
+            headers=headers,
+            json=request,  # type: ignore
+        )
+        if response.status_code != 200:
+            raise Exception(
+                f"Request failed with status code {response.status_code}, {response.text}"
+            )
+        ############ Process the response ############
+        _json_response = response.json()
+
+        response_content = _json_response["audioContent"]
+
+        # Decode base64 to get binary content
+        binary_data = base64.b64decode(response_content)
+
+        # Create an httpx.Response object
+        response = httpx.Response(
+            status_code=200,
+            content=binary_data,
+        )
+
+        # Initialize the HttpxBinaryResponseContent instance
+        http_binary_response = HttpxBinaryResponseContent(response)
+        return http_binary_response
+
+    async def async_audio_speech(
+        self,
+        logging_obj,
+        url: str,
+        headers: dict,
+        request: VertexTextToSpeechRequest,
+    ) -> HttpxBinaryResponseContent:
+        import base64
+
+        async_handler = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.VERTEX_AI
+        )
+
+        response = await async_handler.post(
+            url=url,
+            headers=headers,
+            json=request,  # type: ignore
+        )
+
+        if response.status_code != 200:
+            raise Exception(
+                f"Request did not return a 200 status code: {response.status_code}, {response.text}"
+            )
+
+        _json_response = response.json()
+
+        response_content = _json_response["audioContent"]
+
+        # Decode base64 to get binary content
+        binary_data = base64.b64decode(response_content)
+
+        # Create an httpx.Response object
+        response = httpx.Response(
+            status_code=200,
+            content=binary_data,
+        )
+
+        # Initialize the HttpxBinaryResponseContent instance
+        http_binary_response = HttpxBinaryResponseContent(response)
+        return http_binary_response
+
+
+def validate_vertex_input(
+    input_data: VertexInput, kwargs: dict, optional_params: dict
+) -> None:
+    # Remove None values
+    if input_data.get("text") is None:
+        input_data.pop("text", None)
+    if input_data.get("ssml") is None:
+        input_data.pop("ssml", None)
+
+    # Check if use_ssml is set
+    use_ssml = kwargs.get("use_ssml", optional_params.get("use_ssml", False))
+
+    if use_ssml:
+        if "text" in input_data:
+            input_data["ssml"] = input_data.pop("text")
+        elif "ssml" not in input_data:
+            raise ValueError("SSML input is required when use_ssml is True.")
+    else:
+        # LiteLLM will auto-detect if text is in ssml format
+        # check if "text" is an ssml - in this case we should pass it as ssml instead of text
+        if input_data:
+            _text = input_data.get("text", None) or ""
+            if "<speak>" in _text:
+                input_data["ssml"] = input_data.pop("text")
+
+    if not input_data:
+        raise ValueError("Either 'text' or 'ssml' must be provided.")
+    if "text" in input_data and "ssml" in input_data:
+        raise ValueError("Only one of 'text' or 'ssml' should be provided, not both.")