about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py130
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py227
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py15
3 files changed, 372 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py
new file mode 100644
index 00000000..b0abdda5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/handler.py
@@ -0,0 +1,130 @@
+import json
+from typing import Callable, Optional, Union
+
+import litellm
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_httpx_client,
+)
+from litellm.utils import ModelResponse
+
+from .transformation import NLPCloudConfig
+
+nlp_config = NLPCloudConfig()
+
+
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params: dict,
+    litellm_params: dict,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+    client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    headers={},
+):
+    headers = nlp_config.validate_environment(
+        api_key=api_key,
+        headers=headers,
+        model=model,
+        messages=messages,
+        optional_params=optional_params,
+    )
+
+    ## Load Config
+    config = litellm.NLPCloudConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+
+    completion_url_fragment_1 = api_base
+    completion_url_fragment_2 = "/generation"
+    model = model
+
+    completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
+    data = nlp_config.transform_request(
+        model=model,
+        messages=messages,
+        optional_params=optional_params,
+        litellm_params=litellm_params,
+        headers=headers,
+    )
+
+    ## LOGGING
+    logging_obj.pre_call(
+        input=None,
+        api_key=api_key,
+        additional_args={
+            "complete_input_dict": data,
+            "headers": headers,
+            "api_base": completion_url,
+        },
+    )
+    ## COMPLETION CALL
+    if client is None or not isinstance(client, HTTPHandler):
+        client = _get_httpx_client()
+
+    response = client.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    if "stream" in optional_params and optional_params["stream"] is True:
+        return clean_and_iterate_chunks(response)
+    else:
+        return nlp_config.transform_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+        )
+
+
+# def clean_and_iterate_chunks(response):
+#     def process_chunk(chunk):
+#         print(f"received chunk: {chunk}")
+#         cleaned_chunk = chunk.decode("utf-8")
+#         # Perform further processing based on your needs
+#         return cleaned_chunk
+
+
+#     for line in response.iter_lines():
+#         if line:
+#             yield process_chunk(line)
+def clean_and_iterate_chunks(response):
+    buffer = b""
+
+    for chunk in response.iter_content(chunk_size=1024):
+        if not chunk:
+            break
+
+        buffer += chunk
+        while b"\x00" in buffer:
+            buffer = buffer.replace(b"\x00", b"")
+            yield buffer.decode("utf-8")
+            buffer = b""
+
+    # No more data expected, yield any remaining data in the buffer
+    if buffer:
+        yield buffer.decode("utf-8")
+
+
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py
new file mode 100644
index 00000000..b7967249
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/chat/transformation.py
@@ -0,0 +1,227 @@
+import json
+import time
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+import httpx
+
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    convert_content_list_to_str,
+)
+from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
+from litellm.types.llms.openai import AllMessageValues
+from litellm.utils import ModelResponse, Usage
+
+from ..common_utils import NLPCloudError
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+
+    LoggingClass = LiteLLMLoggingObj
+else:
+    LoggingClass = Any
+
+
+class NLPCloudConfig(BaseConfig):
+    """
+    Reference: https://docs.nlpcloud.com/#generation
+
+    - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
+
+    - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
+
+    - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
+
+    - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
+
+    - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
+
+    - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
+
+    - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
+
+    - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+
+    - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
+
+    - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
+
+    - `num_beams` (int): Optional. Number of beams for beam search.
+
+    - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
+    """
+
+    max_length: Optional[int] = None
+    length_no_input: Optional[bool] = None
+    end_sequence: Optional[str] = None
+    remove_end_sequence: Optional[bool] = None
+    remove_input: Optional[bool] = None
+    bad_words: Optional[list] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    repetition_penalty: Optional[float] = None
+    num_beams: Optional[int] = None
+    num_return_sequences: Optional[int] = None
+
+    def __init__(
+        self,
+        max_length: Optional[int] = None,
+        length_no_input: Optional[bool] = None,
+        end_sequence: Optional[str] = None,
+        remove_end_sequence: Optional[bool] = None,
+        remove_input: Optional[bool] = None,
+        bad_words: Optional[list] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        repetition_penalty: Optional[float] = None,
+        num_beams: Optional[int] = None,
+        num_return_sequences: Optional[int] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return super().get_config()
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+        }
+        if api_key:
+            headers["Authorization"] = f"Token {api_key}"
+        return headers
+
+    def get_supported_openai_params(self, model: str) -> List:
+        return [
+            "max_tokens",
+            "stream",
+            "temperature",
+            "top_p",
+            "presence_penalty",
+            "frequency_penalty",
+            "n",
+            "stop",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_length"] = value
+            if param == "stream":
+                optional_params["stream"] = value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "presence_penalty":
+                optional_params["presence_penalty"] = value
+            if param == "frequency_penalty":
+                optional_params["frequency_penalty"] = value
+            if param == "n":
+                optional_params["num_return_sequences"] = value
+            if param == "stop":
+                optional_params["stop_sequences"] = value
+        return optional_params
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return NLPCloudError(
+            status_code=status_code, message=error_message, headers=headers
+        )
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        text = " ".join(convert_content_list_to_str(message) for message in messages)
+
+        data = {
+            "text": text,
+            **optional_params,
+        }
+
+        return data
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LoggingClass,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        ## LOGGING
+        logging_obj.post_call(
+            input=None,
+            api_key=api_key,
+            original_response=raw_response.text,
+            additional_args={"complete_input_dict": request_data},
+        )
+
+        ## RESPONSE OBJECT
+        try:
+            completion_response = raw_response.json()
+        except Exception:
+            raise NLPCloudError(
+                message=raw_response.text, status_code=raw_response.status_code
+            )
+        if "error" in completion_response:
+            raise NLPCloudError(
+                message=completion_response["error"],
+                status_code=raw_response.status_code,
+            )
+        else:
+            try:
+                if len(completion_response["generated_text"]) > 0:
+                    model_response.choices[0].message.content = (  # type: ignore
+                        completion_response["generated_text"]
+                    )
+            except Exception:
+                raise NLPCloudError(
+                    message=json.dumps(completion_response),
+                    status_code=raw_response.status_code,
+                )
+
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = completion_response["nb_input_tokens"]
+        completion_tokens = completion_response["nb_generated_tokens"]
+
+        model_response.created = int(time.time())
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        setattr(model_response, "usage", usage)
+        return model_response
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py
new file mode 100644
index 00000000..232f56c9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/nlp_cloud/common_utils.py
@@ -0,0 +1,15 @@
+from typing import Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+
+
+class NLPCloudError(BaseLLMException):
+    def __init__(
+        self,
+        status_code: int,
+        message: str,
+        headers: Optional[Union[dict, httpx.Headers]] = None,
+    ):
+        super().__init__(status_code=status_code, message=message, headers=headers)