diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/openai_like | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/openai_like')
4 files changed, 738 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/handler.py new file mode 100644 index 00000000..821fc9b7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/handler.py @@ -0,0 +1,403 @@ +""" +OpenAI-like chat completion handler + +For handling OpenAI-like chat completions, like IBM WatsonX, etc. +""" + +import json +from typing import Any, Callable, Optional, Union + +import httpx + +import litellm +from litellm import LlmProviders +from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.llms.databricks.streaming_utils import ModelResponseIterator +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig +from litellm.llms.openai.openai import OpenAIConfig +from litellm.types.utils import CustomStreamingDecoder, ModelResponse +from litellm.utils import CustomStreamWrapper, ProviderConfigManager + +from ..common_utils import OpenAILikeBase, OpenAILikeError +from .transformation import OpenAILikeChatConfig + + +async def make_call( + client: Optional[AsyncHTTPHandler], + api_base: str, + headers: dict, + data: str, + model: str, + messages: list, + logging_obj, + streaming_decoder: Optional[CustomStreamingDecoder] = None, + fake_stream: bool = False, +): + if client is None: + client = litellm.module_level_aclient + + response = await client.post( + api_base, headers=headers, data=data, stream=not fake_stream + ) + + if streaming_decoder is not None: + completion_stream: Any = streaming_decoder.aiter_bytes( + response.aiter_bytes(chunk_size=1024) + ) + elif fake_stream: + model_response = ModelResponse(**response.json()) + completion_stream = MockResponseIterator(model_response=model_response) + else: + completion_stream = ModelResponseIterator( + streaming_response=response.aiter_lines(), sync_stream=False + ) + # LOGGING + logging_obj.post_call( + input=messages, + api_key="", + original_response=completion_stream, # Pass the completion stream for logging + additional_args={"complete_input_dict": data}, + ) + + return completion_stream + + +def make_sync_call( + client: Optional[HTTPHandler], + api_base: str, + headers: dict, + data: str, + model: str, + messages: list, + logging_obj, + streaming_decoder: Optional[CustomStreamingDecoder] = None, + fake_stream: bool = False, + timeout: Optional[Union[float, httpx.Timeout]] = None, +): + if client is None: + client = litellm.module_level_client # Create a new client if none provided + + response = client.post( + api_base, headers=headers, data=data, stream=not fake_stream, timeout=timeout + ) + + if response.status_code != 200: + raise OpenAILikeError(status_code=response.status_code, message=response.read()) + + if streaming_decoder is not None: + completion_stream = streaming_decoder.iter_bytes( + response.iter_bytes(chunk_size=1024) + ) + elif fake_stream: + model_response = ModelResponse(**response.json()) + completion_stream = MockResponseIterator(model_response=model_response) + else: + completion_stream = ModelResponseIterator( + streaming_response=response.iter_lines(), sync_stream=True + ) + + # LOGGING + logging_obj.post_call( + input=messages, + api_key="", + original_response="first stream response received", + additional_args={"complete_input_dict": data}, + ) + + return completion_stream + + +class OpenAILikeChatHandler(OpenAILikeBase): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + async def acompletion_stream_function( + self, + model: str, + messages: list, + custom_llm_provider: str, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + stream, + data: dict, + optional_params=None, + litellm_params=None, + logger_fn=None, + headers={}, + client: Optional[AsyncHTTPHandler] = None, + streaming_decoder: Optional[CustomStreamingDecoder] = None, + fake_stream: bool = False, + ) -> CustomStreamWrapper: + data["stream"] = True + completion_stream = await make_call( + client=client, + api_base=api_base, + headers=headers, + data=json.dumps(data), + model=model, + messages=messages, + logging_obj=logging_obj, + streaming_decoder=streaming_decoder, + ) + streamwrapper = CustomStreamWrapper( + completion_stream=completion_stream, + model=model, + custom_llm_provider=custom_llm_provider, + logging_obj=logging_obj, + ) + + return streamwrapper + + async def acompletion_function( + self, + model: str, + messages: list, + api_base: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + custom_llm_provider: str, + print_verbose: Callable, + client: Optional[AsyncHTTPHandler], + encoding, + api_key, + logging_obj, + stream, + data: dict, + base_model: Optional[str], + optional_params: dict, + litellm_params=None, + logger_fn=None, + headers={}, + timeout: Optional[Union[float, httpx.Timeout]] = None, + json_mode: bool = False, + ) -> ModelResponse: + if timeout is None: + timeout = httpx.Timeout(timeout=600.0, connect=5.0) + + if client is None: + client = litellm.module_level_aclient + + try: + response = await client.post( + api_base, headers=headers, data=json.dumps(data), timeout=timeout + ) + response.raise_for_status() + except httpx.HTTPStatusError as e: + raise OpenAILikeError( + status_code=e.response.status_code, + message=e.response.text, + ) + except httpx.TimeoutException: + raise OpenAILikeError(status_code=408, message="Timeout error occurred.") + except Exception as e: + raise OpenAILikeError(status_code=500, message=str(e)) + + return OpenAILikeChatConfig._transform_response( + model=model, + response=response, + model_response=model_response, + stream=stream, + logging_obj=logging_obj, + optional_params=optional_params, + api_key=api_key, + data=data, + messages=messages, + print_verbose=print_verbose, + encoding=encoding, + json_mode=json_mode, + custom_llm_provider=custom_llm_provider, + base_model=base_model, + ) + + def completion( + self, + *, + model: str, + messages: list, + api_base: str, + custom_llm_provider: str, + custom_prompt_dict: dict, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key: Optional[str], + logging_obj, + optional_params: dict, + acompletion=None, + litellm_params: dict = {}, + logger_fn=None, + headers: Optional[dict] = None, + timeout: Optional[Union[float, httpx.Timeout]] = None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + custom_endpoint: Optional[bool] = None, + streaming_decoder: Optional[ + CustomStreamingDecoder + ] = None, # if openai-compatible api needs custom stream decoder - e.g. sagemaker + fake_stream: bool = False, + ): + custom_endpoint = custom_endpoint or optional_params.pop( + "custom_endpoint", None + ) + base_model: Optional[str] = optional_params.pop("base_model", None) + api_base, headers = self._validate_environment( + api_base=api_base, + api_key=api_key, + endpoint_type="chat_completions", + custom_endpoint=custom_endpoint, + headers=headers, + ) + + stream: bool = optional_params.pop("stream", None) or False + extra_body = optional_params.pop("extra_body", {}) + json_mode = optional_params.pop("json_mode", None) + optional_params.pop("max_retries", None) + if not fake_stream: + optional_params["stream"] = stream + + if messages is not None and custom_llm_provider is not None: + provider_config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders(custom_llm_provider) + ) + if isinstance(provider_config, OpenAIGPTConfig) or isinstance( + provider_config, OpenAIConfig + ): + messages = provider_config._transform_messages( + messages=messages, model=model + ) + + data = { + "model": model, + "messages": messages, + **optional_params, + **extra_body, + } + + ## LOGGING + logging_obj.pre_call( + input=messages, + api_key=api_key, + additional_args={ + "complete_input_dict": data, + "api_base": api_base, + "headers": headers, + }, + ) + if acompletion is True: + if client is None or not isinstance(client, AsyncHTTPHandler): + client = None + if ( + stream is True + ): # if function call - fake the streaming (need complete blocks for output parsing in openai format) + data["stream"] = stream + return self.acompletion_stream_function( + model=model, + messages=messages, + data=data, + api_base=api_base, + custom_prompt_dict=custom_prompt_dict, + model_response=model_response, + print_verbose=print_verbose, + encoding=encoding, + api_key=api_key, + logging_obj=logging_obj, + optional_params=optional_params, + stream=stream, + litellm_params=litellm_params, + logger_fn=logger_fn, + headers=headers, + client=client, + custom_llm_provider=custom_llm_provider, + streaming_decoder=streaming_decoder, + fake_stream=fake_stream, + ) + else: + return self.acompletion_function( + model=model, + messages=messages, + data=data, + api_base=api_base, + custom_prompt_dict=custom_prompt_dict, + custom_llm_provider=custom_llm_provider, + model_response=model_response, + print_verbose=print_verbose, + encoding=encoding, + api_key=api_key, + logging_obj=logging_obj, + optional_params=optional_params, + stream=stream, + litellm_params=litellm_params, + logger_fn=logger_fn, + headers=headers, + timeout=timeout, + base_model=base_model, + client=client, + json_mode=json_mode, + ) + else: + ## COMPLETION CALL + if stream is True: + completion_stream = make_sync_call( + client=( + client + if client is not None and isinstance(client, HTTPHandler) + else None + ), + api_base=api_base, + headers=headers, + data=json.dumps(data), + model=model, + messages=messages, + logging_obj=logging_obj, + streaming_decoder=streaming_decoder, + fake_stream=fake_stream, + timeout=timeout, + ) + # completion_stream.__iter__() + return CustomStreamWrapper( + completion_stream=completion_stream, + model=model, + custom_llm_provider=custom_llm_provider, + logging_obj=logging_obj, + ) + else: + if client is None or not isinstance(client, HTTPHandler): + client = HTTPHandler(timeout=timeout) # type: ignore + try: + response = client.post( + url=api_base, headers=headers, data=json.dumps(data) + ) + response.raise_for_status() + + except httpx.HTTPStatusError as e: + raise OpenAILikeError( + status_code=e.response.status_code, + message=e.response.text, + ) + except httpx.TimeoutException: + raise OpenAILikeError( + status_code=408, message="Timeout error occurred." + ) + except Exception as e: + raise OpenAILikeError(status_code=500, message=str(e)) + return OpenAILikeChatConfig._transform_response( + model=model, + response=response, + model_response=model_response, + stream=stream, + logging_obj=logging_obj, + optional_params=optional_params, + api_key=api_key, + data=data, + messages=messages, + print_verbose=print_verbose, + encoding=encoding, + json_mode=json_mode, + custom_llm_provider=custom_llm_provider, + base_model=base_model, + ) diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/transformation.py new file mode 100644 index 00000000..37cfabda --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/chat/transformation.py @@ -0,0 +1,126 @@ +""" +OpenAI-like chat completion transformation +""" + +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union + +import httpx + +from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import ChatCompletionAssistantMessage +from litellm.types.utils import ModelResponse + +from ...openai.chat.gpt_transformation import OpenAIGPTConfig + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + + +class OpenAILikeChatConfig(OpenAIGPTConfig): + def _get_openai_compatible_provider_info( + self, + api_base: Optional[str], + api_key: Optional[str], + model: Optional[str] = None, + ) -> Tuple[Optional[str], Optional[str]]: + api_base = api_base or get_secret_str("OPENAI_LIKE_API_BASE") # type: ignore + dynamic_api_key = ( + api_key or get_secret_str("OPENAI_LIKE_API_KEY") or "" + ) # vllm does not require an api key + return api_base, dynamic_api_key + + @staticmethod + def _convert_tool_response_to_message( + message: ChatCompletionAssistantMessage, json_mode: bool + ) -> ChatCompletionAssistantMessage: + """ + if json_mode is true, convert the returned tool call response to a content with json str + + e.g. input: + + {"role": "assistant", "tool_calls": [{"id": "call_5ms4", "type": "function", "function": {"name": "json_tool_call", "arguments": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"}}]} + + output: + + {"role": "assistant", "content": "{\"key\": \"question\", \"value\": \"What is the capital of France?\"}"} + """ + if not json_mode: + return message + + _tool_calls = message.get("tool_calls") + + if _tool_calls is None or len(_tool_calls) != 1: + return message + + message["content"] = _tool_calls[0]["function"].get("arguments") or "" + message["tool_calls"] = None + + return message + + @staticmethod + def _transform_response( + model: str, + response: httpx.Response, + model_response: ModelResponse, + stream: bool, + logging_obj: LiteLLMLoggingObj, + optional_params: dict, + api_key: Optional[str], + data: Union[dict, str], + messages: List, + print_verbose, + encoding, + json_mode: bool, + custom_llm_provider: str, + base_model: Optional[str], + ) -> ModelResponse: + response_json = response.json() + logging_obj.post_call( + input=messages, + api_key="", + original_response=response_json, + additional_args={"complete_input_dict": data}, + ) + + if json_mode: + for choice in response_json["choices"]: + message = OpenAILikeChatConfig._convert_tool_response_to_message( + choice.get("message"), json_mode + ) + choice["message"] = message + + returned_response = ModelResponse(**response_json) + + returned_response.model = ( + custom_llm_provider + "/" + (returned_response.model or "") + ) + + if base_model is not None: + returned_response._hidden_params["model"] = base_model + return returned_response + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + replace_max_completion_tokens_with_max_tokens: bool = True, + ) -> dict: + mapped_params = super().map_openai_params( + non_default_params, optional_params, model, drop_params + ) + if ( + "max_completion_tokens" in non_default_params + and replace_max_completion_tokens_with_max_tokens + ): + mapped_params["max_tokens"] = non_default_params[ + "max_completion_tokens" + ] # most openai-compatible providers support 'max_tokens' not 'max_completion_tokens' + mapped_params.pop("max_completion_tokens", None) + + return mapped_params diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/common_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/common_utils.py new file mode 100644 index 00000000..116277b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/common_utils.py @@ -0,0 +1,56 @@ +from typing import Literal, Optional, Tuple + +import httpx + + +class OpenAILikeError(Exception): + def __init__(self, status_code, message): + self.status_code = status_code + self.message = message + self.request = httpx.Request(method="POST", url="https://www.litellm.ai") + self.response = httpx.Response(status_code=status_code, request=self.request) + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs + + +class OpenAILikeBase: + def __init__(self, **kwargs): + pass + + def _validate_environment( + self, + api_key: Optional[str], + api_base: Optional[str], + endpoint_type: Literal["chat_completions", "embeddings"], + headers: Optional[dict], + custom_endpoint: Optional[bool], + ) -> Tuple[str, dict]: + if api_key is None and headers is None: + raise OpenAILikeError( + status_code=400, + message="Missing API Key - A call is being made to LLM Provider but no key is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params", + ) + + if api_base is None: + raise OpenAILikeError( + status_code=400, + message="Missing API Base - A call is being made to LLM Provider but no api base is set either in the environment variables ({LLM_PROVIDER}_API_KEY) or via params", + ) + + if headers is None: + headers = { + "Content-Type": "application/json", + } + + if ( + api_key is not None and "Authorization" not in headers + ): # [TODO] remove 'validate_environment' from OpenAI base. should use llm providers config for this only. + headers.update({"Authorization": "Bearer {}".format(api_key)}) + + if not custom_endpoint: + if endpoint_type == "chat_completions": + api_base = "{}/chat/completions".format(api_base) + elif endpoint_type == "embeddings": + api_base = "{}/embeddings".format(api_base) + return api_base, headers diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/embedding/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/embedding/handler.py new file mode 100644 index 00000000..95a4aa85 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/openai_like/embedding/handler.py @@ -0,0 +1,153 @@ +# What is this? +## Handler file for OpenAI-like endpoints. +## Allows jina ai embedding calls - which don't allow 'encoding_format' in payload. + +import json +from typing import Optional + +import httpx + +import litellm +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + get_async_httpx_client, +) +from litellm.types.utils import EmbeddingResponse + +from ..common_utils import OpenAILikeBase, OpenAILikeError + + +class OpenAILikeEmbeddingHandler(OpenAILikeBase): + def __init__(self, **kwargs): + pass + + async def aembedding( + self, + input: list, + data: dict, + model_response: EmbeddingResponse, + timeout: float, + api_key: str, + api_base: str, + logging_obj, + headers: dict, + client=None, + ) -> EmbeddingResponse: + response = None + try: + if client is None or not isinstance(client, AsyncHTTPHandler): + async_client = get_async_httpx_client( + llm_provider=litellm.LlmProviders.OPENAI, + params={"timeout": timeout}, + ) + else: + async_client = client + try: + response = await async_client.post( + api_base, + headers=headers, + data=json.dumps(data), + ) # type: ignore + + response.raise_for_status() + + response_json = response.json() + except httpx.HTTPStatusError as e: + raise OpenAILikeError( + status_code=e.response.status_code, + message=e.response.text if e.response else str(e), + ) + except httpx.TimeoutException: + raise OpenAILikeError( + status_code=408, message="Timeout error occurred." + ) + except Exception as e: + raise OpenAILikeError(status_code=500, message=str(e)) + + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response_json, + ) + return EmbeddingResponse(**response_json) + except Exception as e: + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + original_response=str(e), + ) + raise e + + def embedding( + self, + model: str, + input: list, + timeout: float, + logging_obj, + api_key: Optional[str], + api_base: Optional[str], + optional_params: dict, + model_response: Optional[EmbeddingResponse] = None, + client=None, + aembedding=None, + custom_endpoint: Optional[bool] = None, + headers: Optional[dict] = None, + ) -> EmbeddingResponse: + api_base, headers = self._validate_environment( + api_base=api_base, + api_key=api_key, + endpoint_type="embeddings", + headers=headers, + custom_endpoint=custom_endpoint, + ) + model = model + data = {"model": model, "input": input, **optional_params} + + ## LOGGING + logging_obj.pre_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data, "api_base": api_base}, + ) + + if aembedding is True: + return self.aembedding(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, headers=headers) # type: ignore + if client is None or isinstance(client, AsyncHTTPHandler): + self.client = HTTPHandler(timeout=timeout) # type: ignore + else: + self.client = client + + ## EMBEDDING CALL + try: + response = self.client.post( + api_base, + headers=headers, + data=json.dumps(data), + ) # type: ignore + + response.raise_for_status() # type: ignore + + response_json = response.json() # type: ignore + except httpx.HTTPStatusError as e: + raise OpenAILikeError( + status_code=e.response.status_code, + message=e.response.text, + ) + except httpx.TimeoutException: + raise OpenAILikeError(status_code=408, message="Timeout error occurred.") + except Exception as e: + raise OpenAILikeError(status_code=500, message=str(e)) + + ## LOGGING + logging_obj.post_call( + input=input, + api_key=api_key, + additional_args={"complete_input_dict": data}, + original_response=response_json, + ) + + return litellm.EmbeddingResponse(**response_json) |