about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__init__.py135
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py241
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/black_forest_labs.py66
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cerebras.py6
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cohere.py15
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fal_ai.py90
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fireworks_ai.py6
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hf_inference.py122
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hyperbolic.py43
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/nebius.py41
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/novita.py26
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/replicate.py53
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/sambanova.py6
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/together.py59
14 files changed, 909 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__init__.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__init__.py
new file mode 100644
index 00000000..34003125
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/__init__.py
@@ -0,0 +1,135 @@
+from typing import Dict, Literal
+
+from ._common import TaskProviderHelper
+from .black_forest_labs import BlackForestLabsTextToImageTask
+from .cerebras import CerebrasConversationalTask
+from .cohere import CohereConversationalTask
+from .fal_ai import (
+    FalAIAutomaticSpeechRecognitionTask,
+    FalAITextToImageTask,
+    FalAITextToSpeechTask,
+    FalAITextToVideoTask,
+)
+from .fireworks_ai import FireworksAIConversationalTask
+from .hf_inference import HFInferenceBinaryInputTask, HFInferenceConversational, HFInferenceTask
+from .hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask
+from .nebius import NebiusConversationalTask, NebiusTextGenerationTask, NebiusTextToImageTask
+from .novita import NovitaConversationalTask, NovitaTextGenerationTask
+from .replicate import ReplicateTask, ReplicateTextToSpeechTask
+from .sambanova import SambanovaConversationalTask
+from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
+
+
+PROVIDER_T = Literal[
+    "black-forest-labs",
+    "cerebras",
+    "cohere",
+    "fal-ai",
+    "fireworks-ai",
+    "hf-inference",
+    "hyperbolic",
+    "nebius",
+    "novita",
+    "replicate",
+    "sambanova",
+    "together",
+]
+
+PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
+    "black-forest-labs": {
+        "text-to-image": BlackForestLabsTextToImageTask(),
+    },
+    "cerebras": {
+        "conversational": CerebrasConversationalTask(),
+    },
+    "cohere": {
+        "conversational": CohereConversationalTask(),
+    },
+    "fal-ai": {
+        "automatic-speech-recognition": FalAIAutomaticSpeechRecognitionTask(),
+        "text-to-image": FalAITextToImageTask(),
+        "text-to-speech": FalAITextToSpeechTask(),
+        "text-to-video": FalAITextToVideoTask(),
+    },
+    "fireworks-ai": {
+        "conversational": FireworksAIConversationalTask(),
+    },
+    "hf-inference": {
+        "text-to-image": HFInferenceTask("text-to-image"),
+        "conversational": HFInferenceConversational(),
+        "text-generation": HFInferenceTask("text-generation"),
+        "text-classification": HFInferenceTask("text-classification"),
+        "question-answering": HFInferenceTask("question-answering"),
+        "audio-classification": HFInferenceBinaryInputTask("audio-classification"),
+        "automatic-speech-recognition": HFInferenceBinaryInputTask("automatic-speech-recognition"),
+        "fill-mask": HFInferenceTask("fill-mask"),
+        "feature-extraction": HFInferenceTask("feature-extraction"),
+        "image-classification": HFInferenceBinaryInputTask("image-classification"),
+        "image-segmentation": HFInferenceBinaryInputTask("image-segmentation"),
+        "document-question-answering": HFInferenceTask("document-question-answering"),
+        "image-to-text": HFInferenceBinaryInputTask("image-to-text"),
+        "object-detection": HFInferenceBinaryInputTask("object-detection"),
+        "audio-to-audio": HFInferenceBinaryInputTask("audio-to-audio"),
+        "zero-shot-image-classification": HFInferenceBinaryInputTask("zero-shot-image-classification"),
+        "zero-shot-classification": HFInferenceTask("zero-shot-classification"),
+        "image-to-image": HFInferenceBinaryInputTask("image-to-image"),
+        "sentence-similarity": HFInferenceTask("sentence-similarity"),
+        "table-question-answering": HFInferenceTask("table-question-answering"),
+        "tabular-classification": HFInferenceTask("tabular-classification"),
+        "text-to-speech": HFInferenceTask("text-to-speech"),
+        "token-classification": HFInferenceTask("token-classification"),
+        "translation": HFInferenceTask("translation"),
+        "summarization": HFInferenceTask("summarization"),
+        "visual-question-answering": HFInferenceBinaryInputTask("visual-question-answering"),
+    },
+    "hyperbolic": {
+        "text-to-image": HyperbolicTextToImageTask(),
+        "conversational": HyperbolicTextGenerationTask("conversational"),
+        "text-generation": HyperbolicTextGenerationTask("text-generation"),
+    },
+    "nebius": {
+        "text-to-image": NebiusTextToImageTask(),
+        "conversational": NebiusConversationalTask(),
+        "text-generation": NebiusTextGenerationTask(),
+    },
+    "novita": {
+        "text-generation": NovitaTextGenerationTask(),
+        "conversational": NovitaConversationalTask(),
+    },
+    "replicate": {
+        "text-to-image": ReplicateTask("text-to-image"),
+        "text-to-speech": ReplicateTextToSpeechTask(),
+        "text-to-video": ReplicateTask("text-to-video"),
+    },
+    "sambanova": {
+        "conversational": SambanovaConversationalTask(),
+    },
+    "together": {
+        "text-to-image": TogetherTextToImageTask(),
+        "conversational": TogetherConversationalTask(),
+        "text-generation": TogetherTextGenerationTask(),
+    },
+}
+
+
+def get_provider_helper(provider: PROVIDER_T, task: str) -> TaskProviderHelper:
+    """Get provider helper instance by name and task.
+
+    Args:
+        provider (str): Name of the provider
+        task (str): Name of the task
+
+    Returns:
+        TaskProviderHelper: Helper instance for the specified provider and task
+
+    Raises:
+        ValueError: If provider or task is not supported
+    """
+    if provider not in PROVIDERS:
+        raise ValueError(f"Provider '{provider}' not supported. Available providers: {list(PROVIDERS.keys())}")
+    if task not in PROVIDERS[provider]:
+        raise ValueError(
+            f"Task '{task}' not supported for provider '{provider}'. "
+            f"Available tasks: {list(PROVIDERS[provider].keys())}"
+        )
+    return PROVIDERS[provider][task]
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py
new file mode 100644
index 00000000..a30b5cf3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/_common.py
@@ -0,0 +1,241 @@
+from functools import lru_cache
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub import constants
+from huggingface_hub.inference._common import RequestParameters
+from huggingface_hub.utils import build_hf_headers, get_token, logging
+
+
+logger = logging.get_logger(__name__)
+
+
+# Dev purposes only.
+# If you want to try to run inference for a new model locally before it's registered on huggingface.co
+# for a given Inference Provider, you can add it to the following dictionary.
+HARDCODED_MODEL_ID_MAPPING: Dict[str, Dict[str, str]] = {
+    # "HF model ID" => "Model ID on Inference Provider's side"
+    #
+    # Example:
+    # "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
+    "cerebras": {},
+    "cohere": {},
+    "fal-ai": {},
+    "fireworks-ai": {},
+    "hf-inference": {},
+    "hyperbolic": {},
+    "nebius": {},
+    "replicate": {},
+    "sambanova": {},
+    "together": {},
+}
+
+
+def filter_none(d: Dict[str, Any]) -> Dict[str, Any]:
+    return {k: v for k, v in d.items() if v is not None}
+
+
+class TaskProviderHelper:
+    """Base class for task-specific provider helpers."""
+
+    def __init__(self, provider: str, base_url: str, task: str) -> None:
+        self.provider = provider
+        self.task = task
+        self.base_url = base_url
+
+    def prepare_request(
+        self,
+        *,
+        inputs: Any,
+        parameters: Dict[str, Any],
+        headers: Dict,
+        model: Optional[str],
+        api_key: Optional[str],
+        extra_payload: Optional[Dict[str, Any]] = None,
+    ) -> RequestParameters:
+        """
+        Prepare the request to be sent to the provider.
+
+        Each step (api_key, model, headers, url, payload) can be customized in subclasses.
+        """
+        # api_key from user, or local token, or raise error
+        api_key = self._prepare_api_key(api_key)
+
+        # mapped model from HF model ID
+        mapped_model = self._prepare_mapped_model(model)
+
+        # default HF headers + user headers (to customize in subclasses)
+        headers = self._prepare_headers(headers, api_key)
+
+        # routed URL if HF token, or direct URL (to customize in '_prepare_route' in subclasses)
+        url = self._prepare_url(api_key, mapped_model)
+
+        # prepare payload (to customize in subclasses)
+        payload = self._prepare_payload_as_dict(inputs, parameters, mapped_model=mapped_model)
+        if payload is not None:
+            payload = recursive_merge(payload, extra_payload or {})
+
+        # body data (to customize in subclasses)
+        data = self._prepare_payload_as_bytes(inputs, parameters, mapped_model, extra_payload)
+
+        # check if both payload and data are set and return
+        if payload is not None and data is not None:
+            raise ValueError("Both payload and data cannot be set in the same request.")
+        if payload is None and data is None:
+            raise ValueError("Either payload or data must be set in the request.")
+        return RequestParameters(url=url, task=self.task, model=mapped_model, json=payload, data=data, headers=headers)
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        """
+        Return the response in the expected format.
+
+        Override this method in subclasses for customized response handling."""
+        return response
+
+    def _prepare_api_key(self, api_key: Optional[str]) -> str:
+        """Return the API key to use for the request.
+
+        Usually not overwritten in subclasses."""
+        if api_key is None:
+            api_key = get_token()
+        if api_key is None:
+            raise ValueError(
+                f"You must provide an api_key to work with {self.provider} API or log in with `huggingface-cli login`."
+            )
+        return api_key
+
+    def _prepare_mapped_model(self, model: Optional[str]) -> str:
+        """Return the mapped model ID to use for the request.
+
+        Usually not overwritten in subclasses."""
+        if model is None:
+            raise ValueError(f"Please provide an HF model ID supported by {self.provider}.")
+
+        # hardcoded mapping for local testing
+        if HARDCODED_MODEL_ID_MAPPING.get(self.provider, {}).get(model):
+            return HARDCODED_MODEL_ID_MAPPING[self.provider][model]
+
+        provider_mapping = _fetch_inference_provider_mapping(model).get(self.provider)
+        if provider_mapping is None:
+            raise ValueError(f"Model {model} is not supported by provider {self.provider}.")
+
+        if provider_mapping.task != self.task:
+            raise ValueError(
+                f"Model {model} is not supported for task {self.task} and provider {self.provider}. "
+                f"Supported task: {provider_mapping.task}."
+            )
+
+        if provider_mapping.status == "staging":
+            logger.warning(
+                f"Model {model} is in staging mode for provider {self.provider}. Meant for test purposes only."
+            )
+        return provider_mapping.provider_id
+
+    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
+        """Return the headers to use for the request.
+
+        Override this method in subclasses for customized headers.
+        """
+        return {**build_hf_headers(token=api_key), **headers}
+
+    def _prepare_url(self, api_key: str, mapped_model: str) -> str:
+        """Return the URL to use for the request.
+
+        Usually not overwritten in subclasses."""
+        base_url = self._prepare_base_url(api_key)
+        route = self._prepare_route(mapped_model)
+        return f"{base_url.rstrip('/')}/{route.lstrip('/')}"
+
+    def _prepare_base_url(self, api_key: str) -> str:
+        """Return the base URL to use for the request.
+
+        Usually not overwritten in subclasses."""
+        # Route to the proxy if the api_key is a HF TOKEN
+        if api_key.startswith("hf_"):
+            logger.info(f"Calling '{self.provider}' provider through Hugging Face router.")
+            return constants.INFERENCE_PROXY_TEMPLATE.format(provider=self.provider)
+        else:
+            logger.info(f"Calling '{self.provider}' provider directly.")
+            return self.base_url
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        """Return the route to use for the request.
+
+        Override this method in subclasses for customized routes.
+        """
+        return ""
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        """Return the payload to use for the request, as a dict.
+
+        Override this method in subclasses for customized payloads.
+        Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value.
+        """
+        return None
+
+    def _prepare_payload_as_bytes(
+        self, inputs: Any, parameters: Dict, mapped_model: str, extra_payload: Optional[Dict]
+    ) -> Optional[bytes]:
+        """Return the body to use for the request, as bytes.
+
+        Override this method in subclasses for customized body data.
+        Only one of `_prepare_payload_as_dict` and `_prepare_payload_as_bytes` should return a value.
+        """
+        return None
+
+
+class BaseConversationalTask(TaskProviderHelper):
+    """
+    Base class for conversational (chat completion) tasks.
+    The schema follows the OpenAI API format defined here: https://platform.openai.com/docs/api-reference/chat
+    """
+
+    def __init__(self, provider: str, base_url: str):
+        super().__init__(provider=provider, base_url=base_url, task="conversational")
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return "/v1/chat/completions"
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        return {"messages": inputs, **filter_none(parameters), "model": mapped_model}
+
+
+class BaseTextGenerationTask(TaskProviderHelper):
+    """
+    Base class for text-generation (completion) tasks.
+    The schema follows the OpenAI API format defined here: https://platform.openai.com/docs/api-reference/completions
+    """
+
+    def __init__(self, provider: str, base_url: str):
+        super().__init__(provider=provider, base_url=base_url, task="text-generation")
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return "/v1/completions"
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        return {"prompt": inputs, **filter_none(parameters), "model": mapped_model}
+
+
+@lru_cache(maxsize=None)
+def _fetch_inference_provider_mapping(model: str) -> Dict:
+    """
+    Fetch provider mappings for a model from the Hub.
+    """
+    from huggingface_hub.hf_api import HfApi
+
+    info = HfApi().model_info(model, expand=["inferenceProviderMapping"])
+    provider_mapping = info.inference_provider_mapping
+    if provider_mapping is None:
+        raise ValueError(f"No provider mapping found for model {model}")
+    return provider_mapping
+
+
+def recursive_merge(dict1: Dict, dict2: Dict) -> Dict:
+    return {
+        **dict1,
+        **{
+            key: recursive_merge(dict1[key], value)
+            if (key in dict1 and isinstance(dict1[key], dict) and isinstance(value, dict))
+            else value
+            for key, value in dict2.items()
+        },
+    }
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/black_forest_labs.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/black_forest_labs.py
new file mode 100644
index 00000000..14d8eb3d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/black_forest_labs.py
@@ -0,0 +1,66 @@
+import time
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
+from huggingface_hub.utils import logging
+from huggingface_hub.utils._http import get_session
+
+
+logger = logging.get_logger(__name__)
+
+MAX_POLLING_ATTEMPTS = 6
+POLLING_INTERVAL = 1.0
+
+
+class BlackForestLabsTextToImageTask(TaskProviderHelper):
+    def __init__(self):
+        super().__init__(provider="black-forest-labs", base_url="https://api.us1.bfl.ai/v1", task="text-to-image")
+
+    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
+        headers = super()._prepare_headers(headers, api_key)
+        if not api_key.startswith("hf_"):
+            _ = headers.pop("authorization")
+            headers["X-Key"] = api_key
+        return headers
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return mapped_model
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "num_inference_steps" in parameters:
+            parameters["steps"] = parameters.pop("num_inference_steps")
+        if "guidance_scale" in parameters:
+            parameters["guidance"] = parameters.pop("guidance_scale")
+
+        return {"prompt": inputs, **parameters}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        """
+        Polling mechanism for Black Forest Labs since the API is asynchronous.
+        """
+        url = _as_dict(response).get("polling_url")
+        session = get_session()
+        for _ in range(MAX_POLLING_ATTEMPTS):
+            time.sleep(POLLING_INTERVAL)
+
+            response = session.get(url, headers={"Content-Type": "application/json"})  # type: ignore
+            response.raise_for_status()  # type: ignore
+            response_json: Dict = response.json()  # type: ignore
+            status = response_json.get("status")
+            logger.info(
+                f"Polling generation result from {url}. Current status: {status}. "
+                f"Will retry after {POLLING_INTERVAL} seconds if not ready."
+            )
+
+            if (
+                status == "Ready"
+                and isinstance(response_json.get("result"), dict)
+                and (sample_url := response_json["result"].get("sample"))
+            ):
+                image_resp = session.get(sample_url)
+                image_resp.raise_for_status()
+                return image_resp.content
+
+        raise TimeoutError(f"Failed to get the image URL after {MAX_POLLING_ATTEMPTS} attempts.")
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cerebras.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cerebras.py
new file mode 100644
index 00000000..12b18158
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cerebras.py
@@ -0,0 +1,6 @@
+from huggingface_hub.inference._providers._common import BaseConversationalTask
+
+
+class CerebrasConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider="cerebras", base_url="https://api.cerebras.ai")
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cohere.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cohere.py
new file mode 100644
index 00000000..2fbe6fbf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/cohere.py
@@ -0,0 +1,15 @@
+from huggingface_hub.inference._providers._common import (
+    BaseConversationalTask,
+)
+
+
+_PROVIDER = "cohere"
+_BASE_URL = "https://api.cohere.com"
+
+
+class CohereConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return "/compatibility/v1/chat/completions"
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fal_ai.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fal_ai.py
new file mode 100644
index 00000000..e6d2e4bd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fal_ai.py
@@ -0,0 +1,90 @@
+import base64
+from abc import ABC
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
+from huggingface_hub.utils import get_session
+
+
+class FalAITask(TaskProviderHelper, ABC):
+    def __init__(self, task: str):
+        super().__init__(provider="fal-ai", base_url="https://fal.run", task=task)
+
+    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
+        headers = super()._prepare_headers(headers, api_key)
+        if not api_key.startswith("hf_"):
+            headers["authorization"] = f"Key {api_key}"
+        return headers
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return f"/{mapped_model}"
+
+
+class FalAIAutomaticSpeechRecognitionTask(FalAITask):
+    def __init__(self):
+        super().__init__("automatic-speech-recognition")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        if isinstance(inputs, str) and inputs.startswith(("http://", "https://")):
+            # If input is a URL, pass it directly
+            audio_url = inputs
+        else:
+            # If input is a file path, read it first
+            if isinstance(inputs, str):
+                with open(inputs, "rb") as f:
+                    inputs = f.read()
+
+            audio_b64 = base64.b64encode(inputs).decode()
+            content_type = "audio/mpeg"
+            audio_url = f"data:{content_type};base64,{audio_b64}"
+
+        return {"audio_url": audio_url, **filter_none(parameters)}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        text = _as_dict(response)["text"]
+        if not isinstance(text, str):
+            raise ValueError(f"Unexpected output format from FalAI API. Expected string, got {type(text)}.")
+        return text
+
+
+class FalAITextToImageTask(FalAITask):
+    def __init__(self):
+        super().__init__("text-to-image")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "width" in parameters and "height" in parameters:
+            parameters["image_size"] = {
+                "width": parameters.pop("width"),
+                "height": parameters.pop("height"),
+            }
+        return {"prompt": inputs, **parameters}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        url = _as_dict(response)["images"][0]["url"]
+        return get_session().get(url).content
+
+
+class FalAITextToSpeechTask(FalAITask):
+    def __init__(self):
+        super().__init__("text-to-speech")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        return {"lyrics": inputs, **filter_none(parameters)}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        url = _as_dict(response)["audio"]["url"]
+        return get_session().get(url).content
+
+
+class FalAITextToVideoTask(FalAITask):
+    def __init__(self):
+        super().__init__("text-to-video")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        return {"prompt": inputs, **filter_none(parameters)}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        url = _as_dict(response)["video"]["url"]
+        return get_session().get(url).content
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fireworks_ai.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fireworks_ai.py
new file mode 100644
index 00000000..bac95c29
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -0,0 +1,6 @@
+from ._common import BaseConversationalTask
+
+
+class FireworksAIConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider="fireworks-ai", base_url="https://api.fireworks.ai/inference")
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hf_inference.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hf_inference.py
new file mode 100644
index 00000000..2377f91b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hf_inference.py
@@ -0,0 +1,122 @@
+import json
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from huggingface_hub import constants
+from huggingface_hub.inference._common import _b64_encode, _open_as_binary
+from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
+from huggingface_hub.utils import build_hf_headers, get_session, get_token, hf_raise_for_status
+
+
+class HFInferenceTask(TaskProviderHelper):
+    """Base class for HF Inference API tasks."""
+
+    def __init__(self, task: str):
+        super().__init__(
+            provider="hf-inference",
+            base_url=constants.INFERENCE_PROXY_TEMPLATE.format(provider="hf-inference"),
+            task=task,
+        )
+
+    def _prepare_api_key(self, api_key: Optional[str]) -> str:
+        # special case: for HF Inference we allow not providing an API key
+        return api_key or get_token()  # type: ignore[return-value]
+
+    def _prepare_mapped_model(self, model: Optional[str]) -> str:
+        if model is not None:
+            return model
+        model = _fetch_recommended_models().get(self.task)
+        if model is None:
+            raise ValueError(
+                f"Task {self.task} has no recommended model for HF Inference. Please specify a model"
+                " explicitly. Visit https://huggingface.co/tasks for more info."
+            )
+        return model
+
+    def _prepare_url(self, api_key: str, mapped_model: str) -> str:
+        # hf-inference provider can handle URLs (e.g. Inference Endpoints or TGI deployment)
+        if mapped_model.startswith(("http://", "https://")):
+            return mapped_model
+        return (
+            # Feature-extraction and sentence-similarity are the only cases where we handle models with several tasks.
+            f"{self.base_url}/pipeline/{self.task}/{mapped_model}"
+            if self.task in ("feature-extraction", "sentence-similarity")
+            # Otherwise, we use the default endpoint
+            else f"{self.base_url}/models/{mapped_model}"
+        )
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        if isinstance(inputs, bytes):
+            raise ValueError(f"Unexpected binary input for task {self.task}.")
+        if isinstance(inputs, Path):
+            raise ValueError(f"Unexpected path input for task {self.task} (got {inputs})")
+        return {"inputs": inputs, "parameters": filter_none(parameters)}
+
+
+class HFInferenceBinaryInputTask(HFInferenceTask):
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        return None
+
+    def _prepare_payload_as_bytes(
+        self, inputs: Any, parameters: Dict, mapped_model: str, extra_payload: Optional[Dict]
+    ) -> Optional[bytes]:
+        parameters = filter_none({k: v for k, v in parameters.items() if v is not None})
+        extra_payload = extra_payload or {}
+        has_parameters = len(parameters) > 0 or len(extra_payload) > 0
+
+        # Raise if not a binary object or a local path or a URL.
+        if not isinstance(inputs, (bytes, Path)) and not isinstance(inputs, str):
+            raise ValueError(f"Expected binary inputs or a local path or a URL. Got {inputs}")
+
+        # Send inputs as raw content when no parameters are provided
+        if not has_parameters:
+            with _open_as_binary(inputs) as data:
+                data_as_bytes = data if isinstance(data, bytes) else data.read()
+                return data_as_bytes
+
+        # Otherwise encode as b64
+        return json.dumps({"inputs": _b64_encode(inputs), "parameters": parameters, **extra_payload}).encode("utf-8")
+
+
+class HFInferenceConversational(HFInferenceTask):
+    def __init__(self):
+        super().__init__("text-generation")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        payload_model = parameters.get("model") or mapped_model
+
+        if payload_model is None or payload_model.startswith(("http://", "https://")):
+            payload_model = "dummy"
+
+        return {**filter_none(parameters), "model": payload_model, "messages": inputs}
+
+    def _prepare_url(self, api_key: str, mapped_model: str) -> str:
+        base_url = (
+            mapped_model
+            if mapped_model.startswith(("http://", "https://"))
+            else f"{constants.INFERENCE_PROXY_TEMPLATE.format(provider='hf-inference')}/models/{mapped_model}"
+        )
+        return _build_chat_completion_url(base_url)
+
+
+def _build_chat_completion_url(model_url: str) -> str:
+    # Strip trailing /
+    model_url = model_url.rstrip("/")
+
+    # Append /chat/completions if not already present
+    if model_url.endswith("/v1"):
+        model_url += "/chat/completions"
+
+    # Append /v1/chat/completions if not already present
+    if not model_url.endswith("/chat/completions"):
+        model_url += "/v1/chat/completions"
+
+    return model_url
+
+
+@lru_cache(maxsize=1)
+def _fetch_recommended_models() -> Dict[str, Optional[str]]:
+    response = get_session().get(f"{constants.ENDPOINT}/api/tasks", headers=build_hf_headers())
+    hf_raise_for_status(response)
+    return {task: next(iter(details["widgetModels"]), None) for task, details in response.json().items()}
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hyperbolic.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hyperbolic.py
new file mode 100644
index 00000000..919a3818
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/hyperbolic.py
@@ -0,0 +1,43 @@
+import base64
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import BaseConversationalTask, TaskProviderHelper, filter_none
+
+
+class HyperbolicTextToImageTask(TaskProviderHelper):
+    def __init__(self):
+        super().__init__(provider="hyperbolic", base_url="https://api.hyperbolic.xyz", task="text-to-image")
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return "/v1/images/generations"
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "num_inference_steps" in parameters:
+            parameters["steps"] = parameters.pop("num_inference_steps")
+        if "guidance_scale" in parameters:
+            parameters["cfg_scale"] = parameters.pop("guidance_scale")
+        # For Hyperbolic, the width and height are required parameters
+        if "width" not in parameters:
+            parameters["width"] = 512
+        if "height" not in parameters:
+            parameters["height"] = 512
+        return {"prompt": inputs, "model_name": mapped_model, **parameters}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        response_dict = _as_dict(response)
+        return base64.b64decode(response_dict["images"][0]["image"])
+
+
+class HyperbolicTextGenerationTask(BaseConversationalTask):
+    """
+    Special case for Hyperbolic, where text-generation task is handled as a conversational task.
+    """
+
+    def __init__(self, task: str):
+        super().__init__(
+            provider="hyperbolic",
+            base_url="https://api.hyperbolic.xyz",
+        )
+        self.task = task
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/nebius.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/nebius.py
new file mode 100644
index 00000000..d6b37356
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/nebius.py
@@ -0,0 +1,41 @@
+import base64
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import (
+    BaseConversationalTask,
+    BaseTextGenerationTask,
+    TaskProviderHelper,
+    filter_none,
+)
+
+
+class NebiusTextGenerationTask(BaseTextGenerationTask):
+    def __init__(self):
+        super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
+
+
+class NebiusConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
+
+
+class NebiusTextToImageTask(TaskProviderHelper):
+    def __init__(self):
+        super().__init__(task="text-to-image", provider="nebius", base_url="https://api.studio.nebius.ai")
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return "/v1/images/generations"
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "guidance_scale" in parameters:
+            parameters.pop("guidance_scale")
+        if parameters.get("response_format") not in ("b64_json", "url"):
+            parameters["response_format"] = "b64_json"
+
+        return {"prompt": inputs, **parameters, "model": mapped_model}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        response_dict = _as_dict(response)
+        return base64.b64decode(response_dict["data"][0]["b64_json"])
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/novita.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/novita.py
new file mode 100644
index 00000000..3fc836a3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/novita.py
@@ -0,0 +1,26 @@
+from huggingface_hub.inference._providers._common import (
+    BaseConversationalTask,
+    BaseTextGenerationTask,
+)
+
+
+_PROVIDER = "novita"
+_BASE_URL = "https://api.novita.ai/v3/openai"
+
+
+class NovitaTextGenerationTask(BaseTextGenerationTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        # there is no v1/ route for novita
+        return "/completions"
+
+
+class NovitaConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        # there is no v1/ route for novita
+        return "/chat/completions"
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/replicate.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/replicate.py
new file mode 100644
index 00000000..dc84f69f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/replicate.py
@@ -0,0 +1,53 @@
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
+from huggingface_hub.utils import get_session
+
+
+_PROVIDER = "replicate"
+_BASE_URL = "https://api.replicate.com"
+
+
+class ReplicateTask(TaskProviderHelper):
+    def __init__(self, task: str):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL, task=task)
+
+    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
+        headers = super()._prepare_headers(headers, api_key)
+        headers["Prefer"] = "wait"
+        return headers
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        if ":" in mapped_model:
+            return "/v1/predictions"
+        return f"/v1/models/{mapped_model}/predictions"
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        payload: Dict[str, Any] = {"input": {"prompt": inputs, **filter_none(parameters)}}
+        if ":" in mapped_model:
+            version = mapped_model.split(":", 1)[1]
+            payload["version"] = version
+        return payload
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        response_dict = _as_dict(response)
+        if response_dict.get("output") is None:
+            raise TimeoutError(
+                f"Inference request timed out after 60 seconds. No output generated for model {response_dict.get('model')}"
+                "The model might be in cold state or starting up. Please try again later."
+            )
+        output_url = (
+            response_dict["output"] if isinstance(response_dict["output"], str) else response_dict["output"][0]
+        )
+        return get_session().get(output_url).content
+
+
+class ReplicateTextToSpeechTask(ReplicateTask):
+    def __init__(self):
+        super().__init__("text-to-speech")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, mapped_model)  # type: ignore[assignment]
+        payload["input"]["text"] = payload["input"].pop("prompt")  # rename "prompt" to "text" for TTS
+        return payload
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/sambanova.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/sambanova.py
new file mode 100644
index 00000000..3678e942
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/sambanova.py
@@ -0,0 +1,6 @@
+from huggingface_hub.inference._providers._common import BaseConversationalTask
+
+
+class SambanovaConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider="sambanova", base_url="https://api.sambanova.ai")
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/together.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/together.py
new file mode 100644
index 00000000..6e2c1eb4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_providers/together.py
@@ -0,0 +1,59 @@
+import base64
+from abc import ABC
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import (
+    BaseConversationalTask,
+    BaseTextGenerationTask,
+    TaskProviderHelper,
+    filter_none,
+)
+
+
+_PROVIDER = "together"
+_BASE_URL = "https://api.together.xyz"
+
+
+class TogetherTask(TaskProviderHelper, ABC):
+    """Base class for Together API tasks."""
+
+    def __init__(self, task: str):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL, task=task)
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        if self.task == "text-to-image":
+            return "/v1/images/generations"
+        elif self.task == "conversational":
+            return "/v1/chat/completions"
+        elif self.task == "text-generation":
+            return "/v1/completions"
+        raise ValueError(f"Unsupported task '{self.task}' for Together API.")
+
+
+class TogetherTextGenerationTask(BaseTextGenerationTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+
+class TogetherConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+
+class TogetherTextToImageTask(TogetherTask):
+    def __init__(self):
+        super().__init__("text-to-image")
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "num_inference_steps" in parameters:
+            parameters["steps"] = parameters.pop("num_inference_steps")
+        if "guidance_scale" in parameters:
+            parameters["guidance"] = parameters.pop("guidance_scale")
+
+        return {"prompt": inputs, "response_format": "base64", **parameters, "model": mapped_model}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        response_dict = _as_dict(response)
+        return base64.b64decode(response_dict["data"][0]["b64_json"])