diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/constants.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/constants.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/litellm/constants.py | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/constants.py b/.venv/lib/python3.12/site-packages/litellm/constants.py new file mode 100644 index 00000000..da66f897 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/litellm/constants.py @@ -0,0 +1,443 @@ +from typing import List, Literal + +ROUTER_MAX_FALLBACKS = 5 +DEFAULT_BATCH_SIZE = 512 +DEFAULT_FLUSH_INTERVAL_SECONDS = 5 +DEFAULT_MAX_RETRIES = 2 +DEFAULT_FAILURE_THRESHOLD_PERCENT = ( + 0.5 # default cooldown a deployment if 50% of requests fail in a given minute +) +DEFAULT_REDIS_SYNC_INTERVAL = 1 +DEFAULT_COOLDOWN_TIME_SECONDS = 5 +DEFAULT_REPLICATE_POLLING_RETRIES = 5 +DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1 +DEFAULT_IMAGE_TOKEN_COUNT = 250 +DEFAULT_IMAGE_WIDTH = 300 +DEFAULT_IMAGE_HEIGHT = 300 +MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB +SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic. +#### RELIABILITY #### +REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. +#### Networking settings #### +request_timeout: float = 6000 # time in seconds +STREAM_SSE_DONE_STRING: str = "[DONE]" + +LITELLM_CHAT_PROVIDERS = [ + "openai", + "openai_like", + "xai", + "custom_openai", + "text-completion-openai", + "cohere", + "cohere_chat", + "clarifai", + "anthropic", + "anthropic_text", + "replicate", + "huggingface", + "together_ai", + "openrouter", + "vertex_ai", + "vertex_ai_beta", + "gemini", + "ai21", + "baseten", + "azure", + "azure_text", + "azure_ai", + "sagemaker", + "sagemaker_chat", + "bedrock", + "vllm", + "nlp_cloud", + "petals", + "oobabooga", + "ollama", + "ollama_chat", + "deepinfra", + "perplexity", + "mistral", + "groq", + "nvidia_nim", + "cerebras", + "ai21_chat", + "volcengine", + "codestral", + "text-completion-codestral", + "deepseek", + "sambanova", + "maritalk", + "cloudflare", + "fireworks_ai", + "friendliai", + "watsonx", + "watsonx_text", + "triton", + "predibase", + "databricks", + "empower", + "github", + "custom", + "litellm_proxy", + "hosted_vllm", + "lm_studio", + "galadriel", +] + + +OPENAI_CHAT_COMPLETION_PARAMS = [ + "functions", + "function_call", + "temperature", + "temperature", + "top_p", + "n", + "stream", + "stream_options", + "stop", + "max_completion_tokens", + "modalities", + "prediction", + "audio", + "max_tokens", + "presence_penalty", + "frequency_penalty", + "logit_bias", + "user", + "request_timeout", + "api_base", + "api_version", + "api_key", + "deployment_id", + "organization", + "base_url", + "default_headers", + "timeout", + "response_format", + "seed", + "tools", + "tool_choice", + "max_retries", + "parallel_tool_calls", + "logprobs", + "top_logprobs", + "reasoning_effort", + "extra_headers", + "thinking", +] + +openai_compatible_endpoints: List = [ + "api.perplexity.ai", + "api.endpoints.anyscale.com/v1", + "api.deepinfra.com/v1/openai", + "api.mistral.ai/v1", + "codestral.mistral.ai/v1/chat/completions", + "codestral.mistral.ai/v1/fim/completions", + "api.groq.com/openai/v1", + "https://integrate.api.nvidia.com/v1", + "api.deepseek.com/v1", + "api.together.xyz/v1", + "app.empower.dev/api/v1", + "https://api.friendli.ai/serverless/v1", + "api.sambanova.ai/v1", + "api.x.ai/v1", + "api.galadriel.ai/v1", +] + + +openai_compatible_providers: List = [ + "anyscale", + "mistral", + "groq", + "nvidia_nim", + "cerebras", + "sambanova", + "ai21_chat", + "ai21", + "volcengine", + "codestral", + "deepseek", + "deepinfra", + "perplexity", + "xinference", + "xai", + "together_ai", + "fireworks_ai", + "empower", + "friendliai", + "azure_ai", + "github", + "litellm_proxy", + "hosted_vllm", + "lm_studio", + "galadriel", +] +openai_text_completion_compatible_providers: List = ( + [ # providers that support `/v1/completions` + "together_ai", + "fireworks_ai", + "hosted_vllm", + ] +) +_openai_like_providers: List = [ + "predibase", + "databricks", + "watsonx", +] # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk +# well supported replicate llms +replicate_models: List = [ + # llama replicate supported LLMs + "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", + "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", + "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", + # Vicuna + "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", + "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", + # Flan T-5 + "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f", + # Others + "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", + "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", +] + +clarifai_models: List = [ + "clarifai/meta.Llama-3.Llama-3-8B-Instruct", + "clarifai/gcp.generate.gemma-1_1-7b-it", + "clarifai/mistralai.completion.mixtral-8x22B", + "clarifai/cohere.generate.command-r-plus", + "clarifai/databricks.drbx.dbrx-instruct", + "clarifai/mistralai.completion.mistral-large", + "clarifai/mistralai.completion.mistral-medium", + "clarifai/mistralai.completion.mistral-small", + "clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1", + "clarifai/gcp.generate.gemma-2b-it", + "clarifai/gcp.generate.gemma-7b-it", + "clarifai/deci.decilm.deciLM-7B-instruct", + "clarifai/mistralai.completion.mistral-7B-Instruct", + "clarifai/gcp.generate.gemini-pro", + "clarifai/anthropic.completion.claude-v1", + "clarifai/anthropic.completion.claude-instant-1_2", + "clarifai/anthropic.completion.claude-instant", + "clarifai/anthropic.completion.claude-v2", + "clarifai/anthropic.completion.claude-2_1", + "clarifai/meta.Llama-2.codeLlama-70b-Python", + "clarifai/meta.Llama-2.codeLlama-70b-Instruct", + "clarifai/openai.completion.gpt-3_5-turbo-instruct", + "clarifai/meta.Llama-2.llama2-7b-chat", + "clarifai/meta.Llama-2.llama2-13b-chat", + "clarifai/meta.Llama-2.llama2-70b-chat", + "clarifai/openai.chat-completion.gpt-4-turbo", + "clarifai/microsoft.text-generation.phi-2", + "clarifai/meta.Llama-2.llama2-7b-chat-vllm", + "clarifai/upstage.solar.solar-10_7b-instruct", + "clarifai/openchat.openchat.openchat-3_5-1210", + "clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B", + "clarifai/gcp.generate.text-bison", + "clarifai/meta.Llama-2.llamaGuard-7b", + "clarifai/fblgit.una-cybertron.una-cybertron-7b-v2", + "clarifai/openai.chat-completion.GPT-4", + "clarifai/openai.chat-completion.GPT-3_5-turbo", + "clarifai/ai21.complete.Jurassic2-Grande", + "clarifai/ai21.complete.Jurassic2-Grande-Instruct", + "clarifai/ai21.complete.Jurassic2-Jumbo-Instruct", + "clarifai/ai21.complete.Jurassic2-Jumbo", + "clarifai/ai21.complete.Jurassic2-Large", + "clarifai/cohere.generate.cohere-generate-command", + "clarifai/wizardlm.generate.wizardCoder-Python-34B", + "clarifai/wizardlm.generate.wizardLM-70B", + "clarifai/tiiuae.falcon.falcon-40b-instruct", + "clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat", + "clarifai/gcp.generate.code-gecko", + "clarifai/gcp.generate.code-bison", + "clarifai/mistralai.completion.mistral-7B-OpenOrca", + "clarifai/mistralai.completion.openHermes-2-mistral-7B", + "clarifai/wizardlm.generate.wizardLM-13B", + "clarifai/huggingface-research.zephyr.zephyr-7B-alpha", + "clarifai/wizardlm.generate.wizardCoder-15B", + "clarifai/microsoft.text-generation.phi-1_5", + "clarifai/databricks.Dolly-v2.dolly-v2-12b", + "clarifai/bigcode.code.StarCoder", + "clarifai/salesforce.xgen.xgen-7b-8k-instruct", + "clarifai/mosaicml.mpt.mpt-7b-instruct", + "clarifai/anthropic.completion.claude-3-opus", + "clarifai/anthropic.completion.claude-3-sonnet", + "clarifai/gcp.generate.gemini-1_5-pro", + "clarifai/gcp.generate.imagen-2", + "clarifai/salesforce.blip.general-english-image-caption-blip-2", +] + + +huggingface_models: List = [ + "meta-llama/Llama-2-7b-hf", + "meta-llama/Llama-2-7b-chat-hf", + "meta-llama/Llama-2-13b-hf", + "meta-llama/Llama-2-13b-chat-hf", + "meta-llama/Llama-2-70b-hf", + "meta-llama/Llama-2-70b-chat-hf", + "meta-llama/Llama-2-7b", + "meta-llama/Llama-2-7b-chat", + "meta-llama/Llama-2-13b", + "meta-llama/Llama-2-13b-chat", + "meta-llama/Llama-2-70b", + "meta-llama/Llama-2-70b-chat", +] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers +empower_models = [ + "empower/empower-functions", + "empower/empower-functions-small", +] + +together_ai_models: List = [ + # llama llms - chat + "togethercomputer/llama-2-70b-chat", + # llama llms - language / instruct + "togethercomputer/llama-2-70b", + "togethercomputer/LLaMA-2-7B-32K", + "togethercomputer/Llama-2-7B-32K-Instruct", + "togethercomputer/llama-2-7b", + # falcon llms + "togethercomputer/falcon-40b-instruct", + "togethercomputer/falcon-7b-instruct", + # alpaca + "togethercomputer/alpaca-7b", + # chat llms + "HuggingFaceH4/starchat-alpha", + # code llms + "togethercomputer/CodeLlama-34b", + "togethercomputer/CodeLlama-34b-Instruct", + "togethercomputer/CodeLlama-34b-Python", + "defog/sqlcoder", + "NumbersStation/nsql-llama-2-7B", + "WizardLM/WizardCoder-15B-V1.0", + "WizardLM/WizardCoder-Python-34B-V1.0", + # language llms + "NousResearch/Nous-Hermes-Llama2-13b", + "Austism/chronos-hermes-13b", + "upstage/SOLAR-0-70b-16bit", + "WizardLM/WizardLM-70B-V1.0", +] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...) + + +baseten_models: List = [ + "qvv0xeq", + "q841o8w", + "31dxrj3", +] # FALCON 7B # WizardLM # Mosaic ML + +BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[ + "cohere", + "anthropic", + "mistral", + "amazon", + "meta", + "llama", + "ai21", + "nova", + "deepseek_r1", +] + +open_ai_embedding_models: List = ["text-embedding-ada-002"] +cohere_embedding_models: List = [ + "embed-english-v3.0", + "embed-english-light-v3.0", + "embed-multilingual-v3.0", + "embed-english-v2.0", + "embed-english-light-v2.0", + "embed-multilingual-v2.0", +] +bedrock_embedding_models: List = [ + "amazon.titan-embed-text-v1", + "cohere.embed-english-v3", + "cohere.embed-multilingual-v3", +] + +known_tokenizer_config = { + "mistralai/Mistral-7B-Instruct-v0.1": { + "tokenizer": { + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "bos_token": "<s>", + "eos_token": "</s>", + }, + "status": "success", + }, + "meta-llama/Meta-Llama-3-8B-Instruct": { + "tokenizer": { + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "bos_token": "<|begin_of_text|>", + "eos_token": "", + }, + "status": "success", + }, + "deepseek-r1/deepseek-r1-7b-instruct": { + "tokenizer": { + "add_bos_token": True, + "add_eos_token": False, + "bos_token": { + "__type": "AddedToken", + "content": "<|begin▁of▁sentence|>", + "lstrip": False, + "normalized": True, + "rstrip": False, + "single_word": False, + }, + "clean_up_tokenization_spaces": False, + "eos_token": { + "__type": "AddedToken", + "content": "<|end▁of▁sentence|>", + "lstrip": False, + "normalized": True, + "rstrip": False, + "single_word": False, + }, + "legacy": True, + "model_max_length": 16384, + "pad_token": { + "__type": "AddedToken", + "content": "<|end▁of▁sentence|>", + "lstrip": False, + "normalized": True, + "rstrip": False, + "single_word": False, + }, + "sp_model_kwargs": {}, + "unk_token": None, + "tokenizer_class": "LlamaTokenizerFast", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}", + }, + "status": "success", + }, +} + + +OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"] +HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute +RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when converting response format to tool call + +########################### Logging Callback Constants ########################### +AZURE_STORAGE_MSFT_VERSION = "2019-07-07" + +########################### LiteLLM Proxy Specific Constants ########################### +######################################################################################## +MAX_SPENDLOG_ROWS_TO_QUERY = ( + 1_000_000 # if spendLogs has more than 1M rows, do not query the DB +) +# makes it clear this is a rate limit error for a litellm virtual key +RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash" + +# pass through route constansts +BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [ + "agents/", + "knowledgebases/", + "flows/", + "retrieveAndGenerate/", + "rerank/", + "generateQuery/", + "optimize-prompt/", +] + +BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour +BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours + +HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds + +UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard" +LITELLM_PROXY_ADMIN_NAME = "default_user_id" |