model_list: - model_name: claude-3-5-sonnet litellm_params: model: claude-3-haiku-20240307 # - model_name: gemini-1.5-flash-gemini # litellm_params: # model: vertex_ai_beta/gemini-1.5-flash # api_base: https://gateway.ai.cloudflare.com/v1/fa4cdcab1f32b95ca3b53fd36043d691/test/google-vertex-ai/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.5-flash - litellm_params: api_base: http://0.0.0.0:8080 api_key: '' model: gpt-4o rpm: 800 input_cost_per_token: 300 model_name: gpt-4o - model_name: llama3-70b-8192 litellm_params: model: groq/llama3-70b-8192 - model_name: fake-openai-endpoint litellm_params: model: predibase/llama-3-8b-instruct api_key: os.environ/PREDIBASE_API_KEY tenant_id: os.environ/PREDIBASE_TENANT_ID max_new_tokens: 256 # - litellm_params: # api_base: https://my-endpoint-europe-berri-992.openai.azure.com/ # api_key: os.environ/AZURE_EUROPE_API_KEY # model: azure/gpt-35-turbo # rpm: 10 # model_name: gpt-3.5-turbo-fake-model - litellm_params: api_base: https://openai-gpt-4-test-v-1.openai.azure.com api_key: os.environ/AZURE_API_KEY api_version: 2024-02-15-preview model: azure/chatgpt-v-2 tpm: 100 model_name: gpt-3.5-turbo - litellm_params: model: anthropic.claude-3-sonnet-20240229-v1:0 model_name: bedrock-anthropic-claude-3 - litellm_params: model: claude-3-haiku-20240307 model_name: anthropic-claude-3 - litellm_params: api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_key: os.environ/AZURE_API_KEY api_version: 2024-02-15-preview model: azure/chatgpt-v-2 drop_params: True tpm: 100 model_name: gpt-3.5-turbo - model_name: tts litellm_params: model: openai/tts-1 - model_name: gpt-4-turbo-preview litellm_params: api_base: https://openai-france-1234.openai.azure.com api_key: os.environ/AZURE_FRANCE_API_KEY api_version: 2024-02-15-preview model: azure/gpt-turbo - model_name: text-embedding litellm_params: model: textembedding-gecko-multilingual@001 vertex_project: my-project-9d5c vertex_location: us-central1 - model_name: lbl/command-r-plus litellm_params: model: openai/lbl/command-r-plus api_key: "os.environ/VLLM_API_KEY" api_base: http://vllm-command:8000/v1 rpm: 1000 input_cost_per_token: 0 output_cost_per_token: 0 model_info: max_input_tokens: 80920 # litellm_settings: # callbacks: ["dynamic_rate_limiter"] # # success_callback: ["langfuse"] # # failure_callback: ["langfuse"] # # default_team_settings: # # - team_id: proj1 # # success_callback: ["langfuse"] # # langfuse_public_key: pk-lf-a65841e9-5192-4397-a679-cfff029fd5b0 # # langfuse_secret: sk-lf-d58c2891-3717-4f98-89dd-df44826215fd # # langfuse_host: https://us.cloud.langfuse.com # # - team_id: proj2 # # success_callback: ["langfuse"] # # langfuse_public_key: pk-lf-3d789fd1-f49f-4e73-a7d9-1b4e11acbf9a # # langfuse_secret: sk-lf-11b13aca-b0d4-4cde-9d54-721479dace6d # # langfuse_host: https://us.cloud.langfuse.com assistant_settings: custom_llm_provider: openai litellm_params: api_key: os.environ/OPENAI_API_KEY router_settings: enable_pre_call_checks: true litellm_settings: callbacks: ["s3"] # general_settings: # # alerting: ["slack"] # enable_jwt_auth: True # litellm_jwtauth: # team_id_jwt_field: "client_id"