diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py new file mode 100644 index 00000000..eedac6e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/llms/nvidia_nim/chat.py @@ -0,0 +1,134 @@ +""" +Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer + +This is OpenAI compatible + +This file only contains param mapping logic + +API calling is done using the OpenAI SDK with an api_base +""" + +from typing import Optional, Union + +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig + + +class NvidiaNimConfig(OpenAIGPTConfig): + """ + Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer + + The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: + """ + + temperature: Optional[int] = None + top_p: Optional[int] = None + frequency_penalty: Optional[int] = None + presence_penalty: Optional[int] = None + max_tokens: Optional[int] = None + stop: Optional[Union[str, list]] = None + + def __init__( + self, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + frequency_penalty: Optional[int] = None, + presence_penalty: Optional[int] = None, + max_tokens: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return super().get_config() + + def get_supported_openai_params(self, model: str) -> list: + """ + Get the supported OpenAI params for the given model + + + Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference + """ + if model in [ + "google/recurrentgemma-2b", + "google/gemma-2-27b-it", + "google/gemma-2-9b-it", + "gemma-2-9b-it", + ]: + return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] + elif model == "nvidia/nemotron-4-340b-instruct": + return [ + "stream", + "temperature", + "top_p", + "max_tokens", + "max_completion_tokens", + ] + elif model == "nvidia/nemotron-4-340b-reward": + return [ + "stream", + ] + elif model in ["google/codegemma-1.1-7b"]: + # most params - but no 'seed' :( + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "max_completion_tokens", + "stop", + ] + else: + # DEFAULT Case - The vast majority of Nvidia NIM Models lie here + # "upstage/solar-10.7b-instruct", + # "snowflake/arctic", + # "seallms/seallm-7b-v2.5", + # "nvidia/llama3-chatqa-1.5-8b", + # "nvidia/llama3-chatqa-1.5-70b", + # "mistralai/mistral-large", + # "mistralai/mixtral-8x22b-instruct-v0.1", + # "mistralai/mixtral-8x7b-instruct-v0.1", + # "mistralai/mistral-7b-instruct-v0.3", + # "mistralai/mistral-7b-instruct-v0.2", + # "mistralai/codestral-22b-instruct-v0.1", + # "microsoft/phi-3-small-8k-instruct", + # "microsoft/phi-3-small-128k-instruct", + # "microsoft/phi-3-mini-4k-instruct", + # "microsoft/phi-3-mini-128k-instruct", + # "microsoft/phi-3-medium-4k-instruct", + # "microsoft/phi-3-medium-128k-instruct", + # "meta/llama3-70b-instruct", + # "meta/llama3-8b-instruct", + # "meta/llama2-70b", + # "meta/codellama-70b", + return [ + "stream", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "max_tokens", + "max_completion_tokens", + "stop", + "seed", + ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + supported_openai_params = self.get_supported_openai_params(model=model) + for param, value in non_default_params.items(): + if param == "max_completion_tokens": + optional_params["max_tokens"] = value + elif param in supported_openai_params: + optional_params[param] = value + return optional_params |