diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py | 134 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/embed.py | 82 |
2 files changed, 216 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py new file mode 100644 index 00000000..eedac6e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py @@ -0,0 +1,134 @@ +""" +Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer + +This is OpenAI compatible + +This file only contains param mapping logic + +API calling is done using the OpenAI SDK with an api_base +""" + +from typing import Optional, Union + +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig + + +class NvidiaNimConfig(OpenAIGPTConfig): + """ + Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer + + The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: + """ + + temperature: Optional[int] = None + top_p: Optional[int] = None + frequency_penalty: Optional[int] = None + presence_penalty: Optional[int] = None + max_tokens: Optional[int] = None + stop: Optional[Union[str, list]] = None + + def __init__( + self, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + frequency_penalty: Optional[int] = None, + presence_penalty: Optional[int] = None, + max_tokens: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return super().get_config() + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the given model + + + Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference + """ + if model in [ + "google/recurrentgemma-2b", + "google/gemma-2-27b-it", + "google/gemma-2-9b-it", + "gemma-2-9b-it", + ]: + return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] + elif model == "nvidia/nemotron-4-340b-instruct": + return [ + "stream", + "temperature", + "top_p", + "max_tokens", + "max_completion_tokens", + ] + elif model == "nvidia/nemotron-4-340b-reward": + return [ + "stream", + ] + elif model in ["google/codegemma-1.1-7b"]: + # most params - but no 'seed' :( + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "max_completion_tokens", + "stop", + ] + else: + # DEFAULT Case - The vast majority of Nvidia NIM Models lie here + # "upstage/solar-10.7b-instruct", + # "snowflake/arctic", + # "seallms/seallm-7b-v2.5", + # "nvidia/llama3-chatqa-1.5-8b", + # "nvidia/llama3-chatqa-1.5-70b", + # "mistralai/mistral-large", + # "mistralai/mixtral-8x22b-instruct-v0.1", + # "mistralai/mixtral-8x7b-instruct-v0.1", + # "mistralai/mistral-7b-instruct-v0.3", + # "mistralai/mistral-7b-instruct-v0.2", + # "mistralai/codestral-22b-instruct-v0.1", + # "microsoft/phi-3-small-8k-instruct", + # "microsoft/phi-3-small-128k-instruct", + # "microsoft/phi-3-mini-4k-instruct", + # "microsoft/phi-3-mini-128k-instruct", + # "microsoft/phi-3-medium-4k-instruct", + # "microsoft/phi-3-medium-128k-instruct", + # "meta/llama3-70b-instruct", + # "meta/llama3-8b-instruct", + # "meta/llama2-70b", + # "meta/codellama-70b", + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "max_completion_tokens", + "stop", + "seed", + ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + supported_openai_params = self.get_supported_openai_params(model=model) + for param, value in non_default_params.items(): + if param == "max_completion_tokens": + optional_params["max_tokens"] = value + elif param in supported_openai_params: + optional_params[param] = value + return optional_params diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/embed.py b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/embed.py new file mode 100644 index 00000000..24c6cc34 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/embed.py @@ -0,0 +1,82 @@ +""" +Nvidia NIM embeddings endpoint: https://docs.api.nvidia.com/nim/reference/nvidia-nv-embedqa-e5-v5-infer + +This is OpenAI compatible + +This file only contains param mapping logic + +API calling is done using the OpenAI SDK with an api_base +""" + +import types +from typing import Optional + + +class NvidiaNimEmbeddingConfig: + """ + Reference: https://docs.api.nvidia.com/nim/reference/nvidia-nv-embedqa-e5-v5-infer + """ + + # OpenAI params + encoding_format: Optional[str] = None + user: Optional[str] = None + + # Nvidia NIM params + input_type: Optional[str] = None + truncate: Optional[str] = None + + def __init__( + self, + encoding_format: Optional[str] = None, + user: Optional[str] = None, + input_type: Optional[str] = None, + truncate: Optional[str] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params( + self, + ): + return ["encoding_format", "user", "dimensions"] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + kwargs: Optional[dict] = None, + ): + if "extra_body" not in optional_params: + optional_params["extra_body"] = {} + for k, v in non_default_params.items(): + if k == "input_type": + optional_params["extra_body"].update({"input_type": v}) + elif k == "truncate": + optional_params["extra_body"].update({"truncate": v}) + else: + optional_params[k] = v + + if kwargs is not None: + # pass kwargs in extra_body + optional_params["extra_body"].update(kwargs) + return optional_params |