about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py307
1 files changed, 307 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py b/.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py
new file mode 100644
index 00000000..81ad1346
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/llms/deprecated_providers/aleph_alpha.py
@@ -0,0 +1,307 @@
+import json
+import time
+import types
+from typing import Callable, Optional
+
+import httpx  # type: ignore
+
+import litellm
+from litellm.utils import Choices, Message, ModelResponse, Usage
+
+
+class AlephAlphaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.aleph-alpha.com/complete"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+
+class AlephAlphaConfig:
+    """
+    Reference: https://docs.aleph-alpha.com/api/complete/
+
+    The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
+
+    - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
+
+    - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
+
+    - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
+
+    - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
+
+    - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
+
+    - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
+
+    - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
+
+    - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
+
+    - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
+
+    - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
+
+    - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
+
+    - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
+
+    - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
+
+    - `n` (integer, nullable; default value: 1): The number of completions to return.
+
+    - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
+
+    - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
+
+    - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
+
+    - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
+
+    - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
+
+    - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
+
+    - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
+
+    - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
+
+    - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
+
+    - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
+    """
+
+    maximum_tokens: Optional[int] = (
+        litellm.max_tokens
+    )  # aleph alpha requires max tokens
+    minimum_tokens: Optional[int] = None
+    echo: Optional[bool] = None
+    temperature: Optional[int] = None
+    top_k: Optional[int] = None
+    top_p: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    sequence_penalty: Optional[int] = None
+    sequence_penalty_min_length: Optional[int] = None
+    repetition_penalties_include_prompt: Optional[bool] = None
+    repetition_penalties_include_completion: Optional[bool] = None
+    use_multiplicative_presence_penalty: Optional[bool] = None
+    use_multiplicative_frequency_penalty: Optional[bool] = None
+    use_multiplicative_sequence_penalty: Optional[bool] = None
+    penalty_bias: Optional[str] = None
+    penalty_exceptions_include_stop_sequences: Optional[bool] = None
+    best_of: Optional[int] = None
+    n: Optional[int] = None
+    logit_bias: Optional[dict] = None
+    log_probs: Optional[int] = None
+    stop_sequences: Optional[list] = None
+    tokens: Optional[bool] = None
+    raw_completion: Optional[bool] = None
+    disable_optimizations: Optional[bool] = None
+    completion_bias_inclusion: Optional[list] = None
+    completion_bias_exclusion: Optional[list] = None
+    completion_bias_inclusion_first_token_only: Optional[bool] = None
+    completion_bias_exclusion_first_token_only: Optional[bool] = None
+    contextual_control_threshold: Optional[int] = None
+    control_log_additive: Optional[bool] = None
+
+    def __init__(
+        self,
+        maximum_tokens: Optional[int] = None,
+        minimum_tokens: Optional[int] = None,
+        echo: Optional[bool] = None,
+        temperature: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        sequence_penalty: Optional[int] = None,
+        sequence_penalty_min_length: Optional[int] = None,
+        repetition_penalties_include_prompt: Optional[bool] = None,
+        repetition_penalties_include_completion: Optional[bool] = None,
+        use_multiplicative_presence_penalty: Optional[bool] = None,
+        use_multiplicative_frequency_penalty: Optional[bool] = None,
+        use_multiplicative_sequence_penalty: Optional[bool] = None,
+        penalty_bias: Optional[str] = None,
+        penalty_exceptions_include_stop_sequences: Optional[bool] = None,
+        best_of: Optional[int] = None,
+        n: Optional[int] = None,
+        logit_bias: Optional[dict] = None,
+        log_probs: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+        tokens: Optional[bool] = None,
+        raw_completion: Optional[bool] = None,
+        disable_optimizations: Optional[bool] = None,
+        completion_bias_inclusion: Optional[list] = None,
+        completion_bias_exclusion: Optional[list] = None,
+        completion_bias_inclusion_first_token_only: Optional[bool] = None,
+        completion_bias_exclusion_first_token_only: Optional[bool] = None,
+        contextual_control_threshold: Optional[int] = None,
+        control_log_additive: Optional[bool] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+
+
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+
+
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params: dict,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+
+    ## Load Config
+    config = litellm.AlephAlphaConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+
+    completion_url = api_base
+    model = model
+    prompt = ""
+    if "control" in model:  # follow the ###Instruction / ###Response format
+        for idx, message in enumerate(messages):
+            if "role" in message:
+                if (
+                    idx == 0
+                ):  # set first message as instruction (required), let later user messages be input
+                    prompt += f"###Instruction: {message['content']}"
+                else:
+                    if message["role"] == "system":
+                        prompt += f"###Instruction: {message['content']}"
+                    elif message["role"] == "user":
+                        prompt += f"###Input: {message['content']}"
+                    else:
+                        prompt += f"###Response: {message['content']}"
+            else:
+                prompt += f"{message['content']}"
+    else:
+        prompt = " ".join(message["content"] for message in messages)
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = litellm.module_level_client.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    if "stream" in optional_params and optional_params["stream"] is True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise AlephAlphaError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                choices_list = []
+                for idx, item in enumerate(completion_response["completions"]):
+                    if len(item["completion"]) > 0:
+                        message_obj = Message(content=item["completion"])
+                    else:
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(
+                        finish_reason=item["finish_reason"],
+                        index=idx + 1,
+                        message=message_obj,
+                    )
+                    choices_list.append(choice_obj)
+                model_response.choices = choices_list  # type: ignore
+            except Exception:
+                raise AlephAlphaError(
+                    message=json.dumps(completion_response),
+                    status_code=response.status_code,
+                )
+
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(
+                model_response["choices"][0]["message"]["content"],
+                disallowed_special=(),
+            )
+        )
+
+        model_response.created = int(time.time())
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        setattr(model_response, "usage", usage)
+        return model_response
+
+
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass