diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud')
3 files changed, 372 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py new file mode 100644 index 00000000..b0abdda5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py @@ -0,0 +1,130 @@ +import json +from typing import Callable, Optional, Union + +import litellm +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + _get_httpx_client, +) +from litellm.utils import ModelResponse + +from .transformation import NLPCloudConfig + +nlp_config = NLPCloudConfig() + + +def completion( + model: str, + messages: list, + api_base: str, + model_response: ModelResponse, + print_verbose: Callable, + encoding, + api_key, + logging_obj, + optional_params: dict, + litellm_params: dict, + logger_fn=None, + default_max_tokens_to_sample=None, + client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None, + headers={}, +): + headers = nlp_config.validate_environment( + api_key=api_key, + headers=headers, + model=model, + messages=messages, + optional_params=optional_params, + ) + + ## Load Config + config = litellm.NLPCloudConfig.get_config() + for k, v in config.items(): + if ( + k not in optional_params + ): # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in + optional_params[k] = v + + completion_url_fragment_1 = api_base + completion_url_fragment_2 = "/generation" + model = model + + completion_url = completion_url_fragment_1 + model + completion_url_fragment_2 + data = nlp_config.transform_request( + model=model, + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + headers=headers, + ) + + ## LOGGING + logging_obj.pre_call( + input=None, + api_key=api_key, + additional_args={ + "complete_input_dict": data, + "headers": headers, + "api_base": completion_url, + }, + ) + ## COMPLETION CALL + if client is None or not isinstance(client, HTTPHandler): + client = _get_httpx_client() + + response = client.post( + completion_url, + headers=headers, + data=json.dumps(data), + stream=optional_params["stream"] if "stream" in optional_params else False, + ) + if "stream" in optional_params and optional_params["stream"] is True: + return clean_and_iterate_chunks(response) + else: + return nlp_config.transform_response( + model=model, + raw_response=response, + model_response=model_response, + logging_obj=logging_obj, + api_key=api_key, + request_data=data, + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + encoding=encoding, + ) + + +# def clean_and_iterate_chunks(response): +# def process_chunk(chunk): +# print(f"received chunk: {chunk}") +# cleaned_chunk = chunk.decode("utf-8") +# # Perform further processing based on your needs +# return cleaned_chunk + + +# for line in response.iter_lines(): +# if line: +# yield process_chunk(line) +def clean_and_iterate_chunks(response): + buffer = b"" + + for chunk in response.iter_content(chunk_size=1024): + if not chunk: + break + + buffer += chunk + while b"\x00" in buffer: + buffer = buffer.replace(b"\x00", b"") + yield buffer.decode("utf-8") + buffer = b"" + + # No more data expected, yield any remaining data in the buffer + if buffer: + yield buffer.decode("utf-8") + + +def embedding(): + # logic for parsing in - calling - parsing out model embedding calls + pass diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py new file mode 100644 index 00000000..b7967249 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py @@ -0,0 +1,227 @@ +import json +import time +from typing import TYPE_CHECKING, Any, List, Optional, Union + +import httpx + +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + convert_content_list_to_str, +) +from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException +from litellm.types.llms.openai import AllMessageValues +from litellm.utils import ModelResponse, Usage + +from ..common_utils import NLPCloudError + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj + + LoggingClass = LiteLLMLoggingObj +else: + LoggingClass = Any + + +class NLPCloudConfig(BaseConfig): + """ + Reference: https://docs.nlpcloud.com/#generation + + - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain. + + - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text. + + - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence. + + - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result. + + - `remove_input` (boolean): Optional. Whether to remove the input text from the result. + + - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated. + + - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities. + + - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation. + + - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering. + + - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times. + + - `num_beams` (int): Optional. Number of beams for beam search. + + - `num_return_sequences` (int): Optional. The number of independently computed returned sequences. + """ + + max_length: Optional[int] = None + length_no_input: Optional[bool] = None + end_sequence: Optional[str] = None + remove_end_sequence: Optional[bool] = None + remove_input: Optional[bool] = None + bad_words: Optional[list] = None + temperature: Optional[float] = None + top_p: Optional[float] = None + top_k: Optional[int] = None + repetition_penalty: Optional[float] = None + num_beams: Optional[int] = None + num_return_sequences: Optional[int] = None + + def __init__( + self, + max_length: Optional[int] = None, + length_no_input: Optional[bool] = None, + end_sequence: Optional[str] = None, + remove_end_sequence: Optional[bool] = None, + remove_input: Optional[bool] = None, + bad_words: Optional[list] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + top_k: Optional[int] = None, + repetition_penalty: Optional[float] = None, + num_beams: Optional[int] = None, + num_return_sequences: Optional[int] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return super().get_config() + + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + headers = { + "accept": "application/json", + "content-type": "application/json", + } + if api_key: + headers["Authorization"] = f"Token {api_key}" + return headers + + def get_supported_openai_params(self, model: str) -> List: + return [ + "max_tokens", + "stream", + "temperature", + "top_p", + "presence_penalty", + "frequency_penalty", + "n", + "stop", + ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + for param, value in non_default_params.items(): + if param == "max_tokens": + optional_params["max_length"] = value + if param == "stream": + optional_params["stream"] = value + if param == "temperature": + optional_params["temperature"] = value + if param == "top_p": + optional_params["top_p"] = value + if param == "presence_penalty": + optional_params["presence_penalty"] = value + if param == "frequency_penalty": + optional_params["frequency_penalty"] = value + if param == "n": + optional_params["num_return_sequences"] = value + if param == "stop": + optional_params["stop_sequences"] = value + return optional_params + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + return NLPCloudError( + status_code=status_code, message=error_message, headers=headers + ) + + def transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + headers: dict, + ) -> dict: + text = " ".join(convert_content_list_to_str(message) for message in messages) + + data = { + "text": text, + **optional_params, + } + + return data + + def transform_response( + self, + model: str, + raw_response: httpx.Response, + model_response: ModelResponse, + logging_obj: LoggingClass, + request_data: dict, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + encoding: Any, + api_key: Optional[str] = None, + json_mode: Optional[bool] = None, + ) -> ModelResponse: + ## LOGGING + logging_obj.post_call( + input=None, + api_key=api_key, + original_response=raw_response.text, + additional_args={"complete_input_dict": request_data}, + ) + + ## RESPONSE OBJECT + try: + completion_response = raw_response.json() + except Exception: + raise NLPCloudError( + message=raw_response.text, status_code=raw_response.status_code + ) + if "error" in completion_response: + raise NLPCloudError( + message=completion_response["error"], + status_code=raw_response.status_code, + ) + else: + try: + if len(completion_response["generated_text"]) > 0: + model_response.choices[0].message.content = ( # type: ignore + completion_response["generated_text"] + ) + except Exception: + raise NLPCloudError( + message=json.dumps(completion_response), + status_code=raw_response.status_code, + ) + + ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. + prompt_tokens = completion_response["nb_input_tokens"] + completion_tokens = completion_response["nb_generated_tokens"] + + model_response.created = int(time.time()) + model_response.model = model + usage = Usage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) + setattr(model_response, "usage", usage) + return model_response diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py new file mode 100644 index 00000000..232f56c9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py @@ -0,0 +1,15 @@ +from typing import Optional, Union + +import httpx + +from litellm.llms.base_llm.chat.transformation import BaseLLMException + + +class NLPCloudError(BaseLLMException): + def __init__( + self, + status_code: int, + message: str, + headers: Optional[Union[dict, httpx.Headers]] = None, + ): + super().__init__(status_code=status_code, message=message, headers=headers) |