1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
"""
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
This is OpenAI compatible
This file only contains param mapping logic
API calling is done using the OpenAI SDK with an api_base
"""
from typing import Optional, Union
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
class NvidiaNimConfig(OpenAIGPTConfig):
"""
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
"""
temperature: Optional[int] = None
top_p: Optional[int] = None
frequency_penalty: Optional[int] = None
presence_penalty: Optional[int] = None
max_tokens: Optional[int] = None
stop: Optional[Union[str, list]] = None
def __init__(
self,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
frequency_penalty: Optional[int] = None,
presence_penalty: Optional[int] = None,
max_tokens: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return super().get_config()
def get_supported_openai_params(self, model: str) -> list:
"""
Get the supported OpenAI params for the given model
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
"""
if model in [
"google/recurrentgemma-2b",
"google/gemma-2-27b-it",
"google/gemma-2-9b-it",
"gemma-2-9b-it",
]:
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
elif model == "nvidia/nemotron-4-340b-instruct":
return [
"stream",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
]
elif model == "nvidia/nemotron-4-340b-reward":
return [
"stream",
]
elif model in ["google/codegemma-1.1-7b"]:
# most params - but no 'seed' :(
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
]
else:
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here
# "upstage/solar-10.7b-instruct",
# "snowflake/arctic",
# "seallms/seallm-7b-v2.5",
# "nvidia/llama3-chatqa-1.5-8b",
# "nvidia/llama3-chatqa-1.5-70b",
# "mistralai/mistral-large",
# "mistralai/mixtral-8x22b-instruct-v0.1",
# "mistralai/mixtral-8x7b-instruct-v0.1",
# "mistralai/mistral-7b-instruct-v0.3",
# "mistralai/mistral-7b-instruct-v0.2",
# "mistralai/codestral-22b-instruct-v0.1",
# "microsoft/phi-3-small-8k-instruct",
# "microsoft/phi-3-small-128k-instruct",
# "microsoft/phi-3-mini-4k-instruct",
# "microsoft/phi-3-mini-128k-instruct",
# "microsoft/phi-3-medium-4k-instruct",
# "microsoft/phi-3-medium-128k-instruct",
# "meta/llama3-70b-instruct",
# "meta/llama3-8b-instruct",
# "meta/llama2-70b",
# "meta/codellama-70b",
return [
"stream",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"max_tokens",
"max_completion_tokens",
"stop",
"seed",
]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_openai_params = self.get_supported_openai_params(model=model)
for param, value in non_default_params.items():
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
elif param in supported_openai_params:
optional_params[param] = value
return optional_params
|