aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py0
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py3628
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__init__.py187
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py43
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py30
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py114
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/base.py161
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py301
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py28
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py80
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py36
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py47
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_classification.py43
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py51
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py54
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py101
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/object_detection.py58
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/question_answering.py74
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py27
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/summarization.py41
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py62
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py42
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_classification.py41
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_generation.py168
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py100
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py50
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py100
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py46
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/token_classification.py51
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/translation.py49
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/video_classification.py45
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py49
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py45
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py40
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py52
35 files changed, 6044 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py
new file mode 100644
index 00000000..df7bf4ea
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/_async_client.py
@@ -0,0 +1,3628 @@
+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# WARNING
+# This entire file has been adapted from the sync-client code in `src/huggingface_hub/inference/_client.py`.
+# Any change in InferenceClient will be automatically reflected in AsyncInferenceClient.
+# To re-generate the code, run `make style` or `python ./utils/generate_async_inference_client.py --update`.
+# WARNING
+import asyncio
+import base64
+import logging
+import re
+import warnings
+from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
+
+from huggingface_hub import constants
+from huggingface_hub.errors import InferenceTimeoutError
+from huggingface_hub.inference._common import (
+ TASKS_EXPECTING_IMAGES,
+ ContentT,
+ ModelStatus,
+ RequestParameters,
+ _async_stream_chat_completion_response,
+ _async_stream_text_generation_response,
+ _b64_encode,
+ _b64_to_image,
+ _bytes_to_dict,
+ _bytes_to_image,
+ _bytes_to_list,
+ _get_unsupported_text_generation_kwargs,
+ _import_numpy,
+ _open_as_binary,
+ _set_unsupported_text_generation_kwargs,
+ raise_text_generation_error,
+)
+from huggingface_hub.inference._generated.types import (
+ AudioClassificationOutputElement,
+ AudioClassificationOutputTransform,
+ AudioToAudioOutputElement,
+ AutomaticSpeechRecognitionOutput,
+ ChatCompletionInputGrammarType,
+ ChatCompletionInputStreamOptions,
+ ChatCompletionInputTool,
+ ChatCompletionInputToolChoiceClass,
+ ChatCompletionInputToolChoiceEnum,
+ ChatCompletionOutput,
+ ChatCompletionStreamOutput,
+ DocumentQuestionAnsweringOutputElement,
+ FillMaskOutputElement,
+ ImageClassificationOutputElement,
+ ImageClassificationOutputTransform,
+ ImageSegmentationOutputElement,
+ ImageSegmentationSubtask,
+ ImageToImageTargetSize,
+ ImageToTextOutput,
+ ObjectDetectionOutputElement,
+ Padding,
+ QuestionAnsweringOutputElement,
+ SummarizationOutput,
+ SummarizationTruncationStrategy,
+ TableQuestionAnsweringOutputElement,
+ TextClassificationOutputElement,
+ TextClassificationOutputTransform,
+ TextGenerationInputGrammarType,
+ TextGenerationOutput,
+ TextGenerationStreamOutput,
+ TextToSpeechEarlyStoppingEnum,
+ TokenClassificationAggregationStrategy,
+ TokenClassificationOutputElement,
+ TranslationOutput,
+ TranslationTruncationStrategy,
+ VisualQuestionAnsweringOutputElement,
+ ZeroShotClassificationOutputElement,
+ ZeroShotImageClassificationOutputElement,
+)
+from huggingface_hub.inference._providers import PROVIDER_T, HFInferenceTask, get_provider_helper
+from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
+from huggingface_hub.utils._deprecation import _deprecate_arguments, _deprecate_method
+
+from .._common import _async_yield_from, _import_aiohttp
+
+
+if TYPE_CHECKING:
+ import numpy as np
+ from aiohttp import ClientResponse, ClientSession
+ from PIL.Image import Image
+
+logger = logging.getLogger(__name__)
+
+
+MODEL_KWARGS_NOT_USED_REGEX = re.compile(r"The following `model_kwargs` are not used by the model: \[(.*?)\]")
+
+
+class AsyncInferenceClient:
+ """
+ Initialize a new Inference Client.
+
+ [`InferenceClient`] aims to provide a unified experience to perform inference. The client can be used
+ seamlessly with either the (free) Inference API, self-hosted Inference Endpoints, or third-party Inference Providers.
+
+ Args:
+ model (`str`, `optional`):
+ The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct`
+ or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is
+ automatically selected for the task.
+ Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
+ arguments are mutually exclusive. If using `base_url` for chat completion, the `/chat/completions` suffix
+ path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
+ documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
+ provider (`str`, *optional*):
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
+ defaults to hf-inference (Hugging Face Serverless Inference API).
+ If model is a URL or `base_url` is passed, then `provider` is not used.
+ token (`str` or `bool`, *optional*):
+ Hugging Face token. Will default to the locally saved token if not provided.
+ Pass `token=False` if you don't want to send your token to the server.
+ Note: for better compatibility with OpenAI's client, `token` has been aliased as `api_key`. Those 2
+ arguments are mutually exclusive and have the exact same behavior.
+ timeout (`float`, `optional`):
+ The maximum number of seconds to wait for a response from the server. Loading a new model in Inference
+ API can take up to several minutes. Defaults to None, meaning it will loop until the server is available.
+ headers (`Dict[str, str]`, `optional`):
+ Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
+ Values in this dictionary will override the default values.
+ cookies (`Dict[str, str]`, `optional`):
+ Additional cookies to send to the server.
+ trust_env ('bool', 'optional'):
+ Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
+ proxies (`Any`, `optional`):
+ Proxies to use for the request.
+ base_url (`str`, `optional`):
+ Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
+ follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
+ api_key (`str`, `optional`):
+ Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClient`]
+ follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
+ """
+
+ def __init__(
+ self,
+ model: Optional[str] = None,
+ *,
+ provider: Optional[PROVIDER_T] = None,
+ token: Optional[str] = None,
+ timeout: Optional[float] = None,
+ headers: Optional[Dict[str, str]] = None,
+ cookies: Optional[Dict[str, str]] = None,
+ trust_env: bool = False,
+ proxies: Optional[Any] = None,
+ # OpenAI compatibility
+ base_url: Optional[str] = None,
+ api_key: Optional[str] = None,
+ ) -> None:
+ if model is not None and base_url is not None:
+ raise ValueError(
+ "Received both `model` and `base_url` arguments. Please provide only one of them."
+ " `base_url` is an alias for `model` to make the API compatible with OpenAI's client."
+ " If using `base_url` for chat completion, the `/chat/completions` suffix path will be appended to the base url."
+ " When passing a URL as `model`, the client will not append any suffix path to it."
+ )
+ if token is not None and api_key is not None:
+ raise ValueError(
+ "Received both `token` and `api_key` arguments. Please provide only one of them."
+ " `api_key` is an alias for `token` to make the API compatible with OpenAI's client."
+ " It has the exact same behavior as `token`."
+ )
+
+ self.model: Optional[str] = base_url or model
+ self.token: Optional[str] = token if token is not None else api_key
+ self.headers = headers if headers is not None else {}
+
+ # Configure provider
+ self.provider = provider if provider is not None else "hf-inference"
+
+ self.cookies = cookies
+ self.timeout = timeout
+ self.trust_env = trust_env
+ self.proxies = proxies
+
+ # Keep track of the sessions to close them properly
+ self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
+
+ def __repr__(self):
+ return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
+
+ @overload
+ async def post( # type: ignore[misc]
+ self,
+ *,
+ json: Optional[Union[str, Dict, List]] = None,
+ data: Optional[ContentT] = None,
+ model: Optional[str] = None,
+ task: Optional[str] = None,
+ stream: Literal[False] = ...,
+ ) -> bytes: ...
+
+ @overload
+ async def post( # type: ignore[misc]
+ self,
+ *,
+ json: Optional[Union[str, Dict, List]] = None,
+ data: Optional[ContentT] = None,
+ model: Optional[str] = None,
+ task: Optional[str] = None,
+ stream: Literal[True] = ...,
+ ) -> AsyncIterable[bytes]: ...
+
+ @overload
+ async def post(
+ self,
+ *,
+ json: Optional[Union[str, Dict, List]] = None,
+ data: Optional[ContentT] = None,
+ model: Optional[str] = None,
+ task: Optional[str] = None,
+ stream: bool = False,
+ ) -> Union[bytes, AsyncIterable[bytes]]: ...
+
+ @_deprecate_method(
+ version="0.31.0",
+ message=(
+ "Making direct POST requests to the inference server is not supported anymore. "
+ "Please use task methods instead (e.g. `InferenceClient.chat_completion`). "
+ "If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub."
+ ),
+ )
+ async def post(
+ self,
+ *,
+ json: Optional[Union[str, Dict, List]] = None,
+ data: Optional[ContentT] = None,
+ model: Optional[str] = None,
+ task: Optional[str] = None,
+ stream: bool = False,
+ ) -> Union[bytes, AsyncIterable[bytes]]:
+ """
+ Make a POST request to the inference server.
+
+ This method is deprecated and will be removed in the future.
+ Please use task methods instead (e.g. `InferenceClient.chat_completion`).
+ """
+ if self.provider != "hf-inference":
+ raise ValueError(
+ "Cannot use `post` with another provider than `hf-inference`. "
+ "`InferenceClient.post` is deprecated and should not be used directly anymore."
+ )
+ provider_helper = HFInferenceTask(task or "unknown")
+ mapped_model = provider_helper._prepare_mapped_model(model or self.model)
+ url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
+ headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
+ return await self._inner_post(
+ request_parameters=RequestParameters(
+ url=url,
+ task=task or "unknown",
+ model=model or "unknown",
+ json=json,
+ data=data,
+ headers=headers,
+ ),
+ stream=stream,
+ )
+
+ @overload
+ async def _inner_post( # type: ignore[misc]
+ self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
+ ) -> bytes: ...
+
+ @overload
+ async def _inner_post( # type: ignore[misc]
+ self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
+ ) -> AsyncIterable[bytes]: ...
+
+ @overload
+ async def _inner_post(
+ self, request_parameters: RequestParameters, *, stream: bool = False
+ ) -> Union[bytes, AsyncIterable[bytes]]: ...
+
+ async def _inner_post(
+ self, request_parameters: RequestParameters, *, stream: bool = False
+ ) -> Union[bytes, AsyncIterable[bytes]]:
+ """Make a request to the inference server."""
+
+ aiohttp = _import_aiohttp()
+
+ # TODO: this should be handled in provider helpers directly
+ if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
+ request_parameters.headers["Accept"] = "image/png"
+
+ while True:
+ with _open_as_binary(request_parameters.data) as data_as_binary:
+ # Do not use context manager as we don't want to close the connection immediately when returning
+ # a stream
+ session = self._get_client_session(headers=request_parameters.headers)
+
+ try:
+ response = await session.post(
+ request_parameters.url, json=request_parameters.json, data=data_as_binary, proxy=self.proxies
+ )
+ response_error_payload = None
+ if response.status != 200:
+ try:
+ response_error_payload = await response.json() # get payload before connection closed
+ except Exception:
+ pass
+ response.raise_for_status()
+ if stream:
+ return _async_yield_from(session, response)
+ else:
+ content = await response.read()
+ await session.close()
+ return content
+ except asyncio.TimeoutError as error:
+ await session.close()
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
+ except aiohttp.ClientResponseError as error:
+ error.response_error_payload = response_error_payload
+ await session.close()
+ raise error
+ except Exception:
+ await session.close()
+ raise
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, exc_type, exc_value, traceback):
+ await self.close()
+
+ def __del__(self):
+ if len(self._sessions) > 0:
+ warnings.warn(
+ "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
+ "This can happen if you've stopped streaming data from the server before the stream was complete. "
+ "To close the client properly, you must call `await client.close()` "
+ "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
+ )
+
+ async def close(self):
+ """Close all open sessions.
+
+ By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
+ are streaming data from the server and you stop before the stream is complete, you must call this method to
+ close the session properly.
+
+ Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
+ """
+ await asyncio.gather(*[session.close() for session in self._sessions.keys()])
+
+ async def audio_classification(
+ self,
+ audio: ContentT,
+ *,
+ model: Optional[str] = None,
+ top_k: Optional[int] = None,
+ function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
+ ) -> List[AudioClassificationOutputElement]:
+ """
+ Perform audio classification on the provided audio content.
+
+ Args:
+ audio (Union[str, Path, bytes, BinaryIO]):
+ The audio content to classify. It can be raw audio bytes, a local audio file, or a URL pointing to an
+ audio file.
+ model (`str`, *optional*):
+ The model to use for audio classification. Can be a model ID hosted on the Hugging Face Hub
+ or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for
+ audio classification will be used.
+ top_k (`int`, *optional*):
+ When specified, limits the output to the top K most probable classes.
+ function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
+ The function to apply to the model outputs in order to retrieve the scores.
+
+ Returns:
+ `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.audio_classification("audio.flac")
+ [
+ AudioClassificationOutputElement(score=0.4976358711719513, label='hap'),
+ AudioClassificationOutputElement(score=0.3677836060523987, label='neu'),
+ ...
+ ]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="audio-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=audio,
+ parameters={"function_to_apply": function_to_apply, "top_k": top_k},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return AudioClassificationOutputElement.parse_obj_as_list(response)
+
+ async def audio_to_audio(
+ self,
+ audio: ContentT,
+ *,
+ model: Optional[str] = None,
+ ) -> List[AudioToAudioOutputElement]:
+ """
+ Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
+
+ Args:
+ audio (Union[str, Path, bytes, BinaryIO]):
+ The audio content for the model. It can be raw audio bytes, a local audio file, or a URL pointing to an
+ audio file.
+ model (`str`, *optional*):
+ The model can be any model which takes an audio file and returns another audio file. Can be a model ID hosted on the Hugging Face Hub
+ or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for
+ audio_to_audio will be used.
+
+ Returns:
+ `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
+
+ Raises:
+ `InferenceTimeoutError`:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> audio_output = await client.audio_to_audio("audio.flac")
+ >>> async for i, item in enumerate(audio_output):
+ >>> with open(f"output_{i}.flac", "wb") as f:
+ f.write(item.blob)
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="audio-to-audio")
+ request_parameters = provider_helper.prepare_request(
+ inputs=audio,
+ parameters={},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ audio_output = AudioToAudioOutputElement.parse_obj_as_list(response)
+ for item in audio_output:
+ item.blob = base64.b64decode(item.blob)
+ return audio_output
+
+ async def automatic_speech_recognition(
+ self,
+ audio: ContentT,
+ *,
+ model: Optional[str] = None,
+ extra_body: Optional[Dict] = None,
+ ) -> AutomaticSpeechRecognitionOutput:
+ """
+ Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
+
+ Args:
+ audio (Union[str, Path, bytes, BinaryIO]):
+ The content to transcribe. It can be raw audio bytes, local audio file, or a URL to an audio file.
+ model (`str`, *optional*):
+ The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended model for ASR will be used.
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
+ Returns:
+ [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.automatic_speech_recognition("hello_world.flac").text
+ "hello world"
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
+ request_parameters = provider_helper.prepare_request(
+ inputs=audio,
+ parameters={**(extra_body or {})},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return AutomaticSpeechRecognitionOutput.parse_obj_as_instance(response)
+
+ @overload
+ async def chat_completion( # type: ignore
+ self,
+ messages: List[Dict],
+ *,
+ model: Optional[str] = None,
+ stream: Literal[False] = False,
+ frequency_penalty: Optional[float] = None,
+ logit_bias: Optional[List[float]] = None,
+ logprobs: Optional[bool] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[float] = None,
+ response_format: Optional[ChatCompletionInputGrammarType] = None,
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None,
+ temperature: Optional[float] = None,
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
+ tool_prompt: Optional[str] = None,
+ tools: Optional[List[ChatCompletionInputTool]] = None,
+ top_logprobs: Optional[int] = None,
+ top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
+ ) -> ChatCompletionOutput: ...
+
+ @overload
+ async def chat_completion( # type: ignore
+ self,
+ messages: List[Dict],
+ *,
+ model: Optional[str] = None,
+ stream: Literal[True] = True,
+ frequency_penalty: Optional[float] = None,
+ logit_bias: Optional[List[float]] = None,
+ logprobs: Optional[bool] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[float] = None,
+ response_format: Optional[ChatCompletionInputGrammarType] = None,
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None,
+ temperature: Optional[float] = None,
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
+ tool_prompt: Optional[str] = None,
+ tools: Optional[List[ChatCompletionInputTool]] = None,
+ top_logprobs: Optional[int] = None,
+ top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
+ ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
+
+ @overload
+ async def chat_completion(
+ self,
+ messages: List[Dict],
+ *,
+ model: Optional[str] = None,
+ stream: bool = False,
+ frequency_penalty: Optional[float] = None,
+ logit_bias: Optional[List[float]] = None,
+ logprobs: Optional[bool] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[float] = None,
+ response_format: Optional[ChatCompletionInputGrammarType] = None,
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None,
+ temperature: Optional[float] = None,
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
+ tool_prompt: Optional[str] = None,
+ tools: Optional[List[ChatCompletionInputTool]] = None,
+ top_logprobs: Optional[int] = None,
+ top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
+ ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
+
+ async def chat_completion(
+ self,
+ messages: List[Dict],
+ *,
+ model: Optional[str] = None,
+ stream: bool = False,
+ # Parameters from ChatCompletionInput (handled manually)
+ frequency_penalty: Optional[float] = None,
+ logit_bias: Optional[List[float]] = None,
+ logprobs: Optional[bool] = None,
+ max_tokens: Optional[int] = None,
+ n: Optional[int] = None,
+ presence_penalty: Optional[float] = None,
+ response_format: Optional[ChatCompletionInputGrammarType] = None,
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None,
+ temperature: Optional[float] = None,
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
+ tool_prompt: Optional[str] = None,
+ tools: Optional[List[ChatCompletionInputTool]] = None,
+ top_logprobs: Optional[int] = None,
+ top_p: Optional[float] = None,
+ extra_body: Optional[Dict] = None,
+ ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
+ """
+ A method for completing conversations using a specified language model.
+
+ <Tip>
+
+ The `client.chat_completion` method is aliased as `client.chat.completions.create` for compatibility with OpenAI's client.
+ Inputs and outputs are strictly the same and using either syntax will yield the same results.
+ Check out the [Inference guide](https://huggingface.co/docs/huggingface_hub/guides/inference#openai-compatibility)
+ for more details about OpenAI's compatibility.
+
+ </Tip>
+
+ <Tip>
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+ </Tip>
+
+ Args:
+ messages (List of [`ChatCompletionInputMessage`]):
+ Conversation history consisting of roles and content pairs.
+ model (`str`, *optional*):
+ The model to use for chat-completion. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended model for chat-based text-generation will be used.
+ See https://huggingface.co/tasks/text-generation for more details.
+ If `model` is a model ID, it is passed to the server as the `model` parameter. If you want to define a
+ custom URL while setting `model` in the request payload, you must set `base_url` when initializing [`InferenceClient`].
+ frequency_penalty (`float`, *optional*):
+ Penalizes new tokens based on their existing frequency
+ in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
+ logit_bias (`List[float]`, *optional*):
+ Adjusts the likelihood of specific tokens appearing in the generated output.
+ logprobs (`bool`, *optional*):
+ Whether to return log probabilities of the output tokens or not. If true, returns the log
+ probabilities of each output token returned in the content of message.
+ max_tokens (`int`, *optional*):
+ Maximum number of tokens allowed in the response. Defaults to 100.
+ n (`int`, *optional*):
+ The number of completions to generate for each prompt.
+ presence_penalty (`float`, *optional*):
+ Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the
+ text so far, increasing the model's likelihood to talk about new topics.
+ response_format ([`ChatCompletionInputGrammarType`], *optional*):
+ Grammar constraints. Can be either a JSONSchema or a regex.
+ seed (Optional[`int`], *optional*):
+ Seed for reproducible control flow. Defaults to None.
+ stop (`List[str]`, *optional*):
+ Up to four strings which trigger the end of the response.
+ Defaults to None.
+ stream (`bool`, *optional*):
+ Enable realtime streaming of responses. Defaults to False.
+ stream_options ([`ChatCompletionInputStreamOptions`], *optional*):
+ Options for streaming completions.
+ temperature (`float`, *optional*):
+ Controls randomness of the generations. Lower values ensure
+ less random completions. Range: [0, 2]. Defaults to 1.0.
+ top_logprobs (`int`, *optional*):
+ An integer between 0 and 5 specifying the number of most likely tokens to return at each token
+ position, each with an associated log probability. logprobs must be set to true if this parameter is
+ used.
+ top_p (`float`, *optional*):
+ Fraction of the most likely next words to sample from.
+ Must be between 0 and 1. Defaults to 1.0.
+ tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*):
+ The tool to use for the completion. Defaults to "auto".
+ tool_prompt (`str`, *optional*):
+ A prompt to be appended before the tools.
+ tools (List of [`ChatCompletionInputTool`], *optional*):
+ A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
+ provide a list of functions the model may generate JSON inputs for.
+ extra_body (`Dict`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
+ Returns:
+ [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
+ Generated text returned from the server:
+ - if `stream=False`, the generated text is returned as a [`ChatCompletionOutput`] (default).
+ - if `stream=True`, the generated text is returned token by token as a sequence of [`ChatCompletionStreamOutput`].
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> messages = [{"role": "user", "content": "What is the capital of France?"}]
+ >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
+ >>> await client.chat_completion(messages, max_tokens=100)
+ ChatCompletionOutput(
+ choices=[
+ ChatCompletionOutputComplete(
+ finish_reason='eos_token',
+ index=0,
+ message=ChatCompletionOutputMessage(
+ role='assistant',
+ content='The capital of France is Paris.',
+ name=None,
+ tool_calls=None
+ ),
+ logprobs=None
+ )
+ ],
+ created=1719907176,
+ id='',
+ model='meta-llama/Meta-Llama-3-8B-Instruct',
+ object='text_completion',
+ system_fingerprint='2.0.4-sha-f426a33',
+ usage=ChatCompletionOutputUsage(
+ completion_tokens=8,
+ prompt_tokens=17,
+ total_tokens=25
+ )
+ )
+ ```
+
+ Example using streaming:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> messages = [{"role": "user", "content": "What is the capital of France?"}]
+ >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
+ >>> async for token in await client.chat_completion(messages, max_tokens=10, stream=True):
+ ... print(token)
+ ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content='The', role='assistant'), index=0, finish_reason=None)], created=1710498504)
+ ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' capital', role='assistant'), index=0, finish_reason=None)], created=1710498504)
+ (...)
+ ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(content=' may', role='assistant'), index=0, finish_reason=None)], created=1710498504)
+ ```
+
+ Example using OpenAI's syntax:
+ ```py
+ # Must be run in an async context
+ # instead of `from openai import OpenAI`
+ from huggingface_hub import AsyncInferenceClient
+
+ # instead of `client = OpenAI(...)`
+ client = AsyncInferenceClient(
+ base_url=...,
+ api_key=...,
+ )
+
+ output = await client.chat.completions.create(
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Count to 10"},
+ ],
+ stream=True,
+ max_tokens=1024,
+ )
+
+ for chunk in output:
+ print(chunk.choices[0].delta.content)
+ ```
+
+ Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="together", # Use Together AI provider
+ ... api_key="<together_api_key>", # Pass your Together API key directly
+ ... )
+ >>> client.chat_completion(
+ ... model="meta-llama/Meta-Llama-3-8B-Instruct",
+ ... messages=[{"role": "user", "content": "What is the capital of France?"}],
+ ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
+ ... )
+ ```
+
+ Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="sambanova", # Use Sambanova provider
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> client.chat_completion(
+ ... model="meta-llama/Meta-Llama-3-8B-Instruct",
+ ... messages=[{"role": "user", "content": "What is the capital of France?"}],
+ ... )
+ ```
+
+ Example using Image + Text as input:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+
+ # provide a remote URL
+ >>> image_url ="https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ # or a base64-encoded image
+ >>> image_path = "/path/to/image.jpeg"
+ >>> with open(image_path, "rb") as f:
+ ... base64_image = base64.b64encode(f.read()).decode("utf-8")
+ >>> image_url = f"data:image/jpeg;base64,{base64_image}"
+
+ >>> client = AsyncInferenceClient("meta-llama/Llama-3.2-11B-Vision-Instruct")
+ >>> output = await client.chat.completions.create(
+ ... messages=[
+ ... {
+ ... "role": "user",
+ ... "content": [
+ ... {
+ ... "type": "image_url",
+ ... "image_url": {"url": image_url},
+ ... },
+ ... {
+ ... "type": "text",
+ ... "text": "Describe this image in one sentence.",
+ ... },
+ ... ],
+ ... },
+ ... ],
+ ... )
+ >>> output
+ The image depicts the iconic Statue of Liberty situated in New York Harbor, New York, on a clear day.
+ ```
+
+ Example using tools:
+ ```py
+ # Must be run in an async context
+ >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
+ >>> messages = [
+ ... {
+ ... "role": "system",
+ ... "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
+ ... },
+ ... {
+ ... "role": "user",
+ ... "content": "What's the weather like the next 3 days in San Francisco, CA?",
+ ... },
+ ... ]
+ >>> tools = [
+ ... {
+ ... "type": "function",
+ ... "function": {
+ ... "name": "get_current_weather",
+ ... "description": "Get the current weather",
+ ... "parameters": {
+ ... "type": "object",
+ ... "properties": {
+ ... "location": {
+ ... "type": "string",
+ ... "description": "The city and state, e.g. San Francisco, CA",
+ ... },
+ ... "format": {
+ ... "type": "string",
+ ... "enum": ["celsius", "fahrenheit"],
+ ... "description": "The temperature unit to use. Infer this from the users location.",
+ ... },
+ ... },
+ ... "required": ["location", "format"],
+ ... },
+ ... },
+ ... },
+ ... {
+ ... "type": "function",
+ ... "function": {
+ ... "name": "get_n_day_weather_forecast",
+ ... "description": "Get an N-day weather forecast",
+ ... "parameters": {
+ ... "type": "object",
+ ... "properties": {
+ ... "location": {
+ ... "type": "string",
+ ... "description": "The city and state, e.g. San Francisco, CA",
+ ... },
+ ... "format": {
+ ... "type": "string",
+ ... "enum": ["celsius", "fahrenheit"],
+ ... "description": "The temperature unit to use. Infer this from the users location.",
+ ... },
+ ... "num_days": {
+ ... "type": "integer",
+ ... "description": "The number of days to forecast",
+ ... },
+ ... },
+ ... "required": ["location", "format", "num_days"],
+ ... },
+ ... },
+ ... },
+ ... ]
+
+ >>> response = await client.chat_completion(
+ ... model="meta-llama/Meta-Llama-3-70B-Instruct",
+ ... messages=messages,
+ ... tools=tools,
+ ... tool_choice="auto",
+ ... max_tokens=500,
+ ... )
+ >>> response.choices[0].message.tool_calls[0].function
+ ChatCompletionOutputFunctionDefinition(
+ arguments={
+ 'location': 'San Francisco, CA',
+ 'format': 'fahrenheit',
+ 'num_days': 3
+ },
+ name='get_n_day_weather_forecast',
+ description=None
+ )
+ ```
+
+ Example using response_format:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
+ >>> messages = [
+ ... {
+ ... "role": "user",
+ ... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I saw and when?",
+ ... },
+ ... ]
+ >>> response_format = {
+ ... "type": "json",
+ ... "value": {
+ ... "properties": {
+ ... "location": {"type": "string"},
+ ... "activity": {"type": "string"},
+ ... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5},
+ ... "animals": {"type": "array", "items": {"type": "string"}},
+ ... },
+ ... "required": ["location", "activity", "animals_seen", "animals"],
+ ... },
+ ... }
+ >>> response = await client.chat_completion(
+ ... messages=messages,
+ ... response_format=response_format,
+ ... max_tokens=500,
+ )
+ >>> response.choices[0].message.content
+ '{\n\n"activity": "bike ride",\n"animals": ["puppy", "cat", "raccoon"],\n"animals_seen": 3,\n"location": "park"}'
+ ```
+ """
+ # Get the provider helper
+ provider_helper = get_provider_helper(self.provider, task="conversational")
+
+ # Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
+ # `self.model` takes precedence over 'model' argument for building URL.
+ # `model` takes precedence for payload value.
+ model_id_or_url = self.model or model
+ payload_model = model or self.model
+
+ # Prepare the payload
+ parameters = {
+ "model": payload_model,
+ "frequency_penalty": frequency_penalty,
+ "logit_bias": logit_bias,
+ "logprobs": logprobs,
+ "max_tokens": max_tokens,
+ "n": n,
+ "presence_penalty": presence_penalty,
+ "response_format": response_format,
+ "seed": seed,
+ "stop": stop,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tool_prompt": tool_prompt,
+ "tools": tools,
+ "top_logprobs": top_logprobs,
+ "top_p": top_p,
+ "stream": stream,
+ "stream_options": stream_options,
+ **(extra_body or {}),
+ }
+ request_parameters = provider_helper.prepare_request(
+ inputs=messages,
+ parameters=parameters,
+ headers=self.headers,
+ model=model_id_or_url,
+ api_key=self.token,
+ )
+ data = await self._inner_post(request_parameters, stream=stream)
+
+ if stream:
+ return _async_stream_chat_completion_response(data) # type: ignore[arg-type]
+
+ return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type]
+
+ async def document_question_answering(
+ self,
+ image: ContentT,
+ question: str,
+ *,
+ model: Optional[str] = None,
+ doc_stride: Optional[int] = None,
+ handle_impossible_answer: Optional[bool] = None,
+ lang: Optional[str] = None,
+ max_answer_len: Optional[int] = None,
+ max_question_len: Optional[int] = None,
+ max_seq_len: Optional[int] = None,
+ top_k: Optional[int] = None,
+ word_boxes: Optional[List[Union[List[float], str]]] = None,
+ ) -> List[DocumentQuestionAnsweringOutputElement]:
+ """
+ Answer questions on document images.
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
+ question (`str`):
+ Question to be answered.
+ model (`str`, *optional*):
+ The model to use for the document question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used.
+ Defaults to None.
+ doc_stride (`int`, *optional*):
+ If the words in the document are too long to fit with the question for the model, it will be split in
+ several chunks with some overlap. This argument controls the size of that overlap.
+ handle_impossible_answer (`bool`, *optional*):
+ Whether to accept impossible as an answer
+ lang (`str`, *optional*):
+ Language to use while running OCR. Defaults to english.
+ max_answer_len (`int`, *optional*):
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
+ max_question_len (`int`, *optional*):
+ The maximum length of the question after tokenization. It will be truncated if needed.
+ max_seq_len (`int`, *optional*):
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
+ model. The context will be split in several chunks (using doc_stride as overlap) if needed.
+ top_k (`int`, *optional*):
+ The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
+ answers if there are not enough options available within the context.
+ word_boxes (`List[Union[List[float], str`, *optional*):
+ A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
+ step and use the provided bounding boxes instead.
+ Returns:
+ `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
+ [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
+ ```
+ """
+ inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
+ provider_helper = get_provider_helper(self.provider, task="document-question-answering")
+ request_parameters = provider_helper.prepare_request(
+ inputs=inputs,
+ parameters={
+ "doc_stride": doc_stride,
+ "handle_impossible_answer": handle_impossible_answer,
+ "lang": lang,
+ "max_answer_len": max_answer_len,
+ "max_question_len": max_question_len,
+ "max_seq_len": max_seq_len,
+ "top_k": top_k,
+ "word_boxes": word_boxes,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return DocumentQuestionAnsweringOutputElement.parse_obj_as_list(response)
+
+ async def feature_extraction(
+ self,
+ text: str,
+ *,
+ normalize: Optional[bool] = None,
+ prompt_name: Optional[str] = None,
+ truncate: Optional[bool] = None,
+ truncation_direction: Optional[Literal["Left", "Right"]] = None,
+ model: Optional[str] = None,
+ ) -> "np.ndarray":
+ """
+ Generate embeddings for a given text.
+
+ Args:
+ text (`str`):
+ The text to embed.
+ model (`str`, *optional*):
+ The model to use for the conversational task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended conversational model will be used.
+ Defaults to None.
+ normalize (`bool`, *optional*):
+ Whether to normalize the embeddings or not.
+ Only available on server powered by Text-Embedding-Inference.
+ prompt_name (`str`, *optional*):
+ The name of the prompt that should be used by for encoding. If not set, no prompt will be applied.
+ Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+ For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",...},
+ then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
+ because the prompt text will be prepended before any text to encode.
+ truncate (`bool`, *optional*):
+ Whether to truncate the embeddings or not.
+ Only available on server powered by Text-Embedding-Inference.
+ truncation_direction (`Literal["Left", "Right"]`, *optional*):
+ Which side of the input should be truncated when `truncate=True` is passed.
+
+ Returns:
+ `np.ndarray`: The embedding representing the input text as a float32 numpy array.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.feature_extraction("Hi, who are you?")
+ array([[ 2.424802 , 2.93384 , 1.1750331 , ..., 1.240499, -0.13776633, -0.7889173 ],
+ [-0.42943227, -0.6364878 , -1.693462 , ..., 0.41978157, -2.4336355 , 0.6162071 ],
+ ...,
+ [ 0.28552425, -0.928395 , -1.2077185 , ..., 0.76810825, -2.1069427 , 0.6236161 ]], dtype=float32)
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="feature-extraction")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "normalize": normalize,
+ "prompt_name": prompt_name,
+ "truncate": truncate,
+ "truncation_direction": truncation_direction,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ np = _import_numpy()
+ return np.array(_bytes_to_dict(response), dtype="float32")
+
+ async def fill_mask(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ targets: Optional[List[str]] = None,
+ top_k: Optional[int] = None,
+ ) -> List[FillMaskOutputElement]:
+ """
+ Fill in a hole with a missing word (token to be precise).
+
+ Args:
+ text (`str`):
+ a string to be filled from, must contain the [MASK] token (check model card for exact name of the mask).
+ model (`str`, *optional*):
+ The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
+ targets (`List[str`, *optional*):
+ When passed, the model will limit the scores to the passed targets instead of looking up in the whole
+ vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
+ resulting token will be used (with a warning, and that might be slower).
+ top_k (`int`, *optional*):
+ When passed, overrides the number of predictions to return.
+ Returns:
+ `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
+ probability, token reference, and completed text.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.fill_mask("The goal of life is <mask>.")
+ [
+ FillMaskOutputElement(score=0.06897063553333282, token=11098, token_str=' happiness', sequence='The goal of life is happiness.'),
+ FillMaskOutputElement(score=0.06554922461509705, token=45075, token_str=' immortality', sequence='The goal of life is immortality.')
+ ]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="fill-mask")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={"targets": targets, "top_k": top_k},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return FillMaskOutputElement.parse_obj_as_list(response)
+
+ async def image_classification(
+ self,
+ image: ContentT,
+ *,
+ model: Optional[str] = None,
+ function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
+ top_k: Optional[int] = None,
+ ) -> List[ImageClassificationOutputElement]:
+ """
+ Perform image classification on the given image using the specified model.
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The image to classify. It can be raw bytes, an image file, or a URL to an online image.
+ model (`str`, *optional*):
+ The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
+ deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
+ function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
+ The function to apply to the model outputs in order to retrieve the scores.
+ top_k (`int`, *optional*):
+ When specified, limits the output to the top K most probable classes.
+ Returns:
+ `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.image_classification("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
+ [ImageClassificationOutputElement(label='Blenheim spaniel', score=0.9779096841812134), ...]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="image-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={"function_to_apply": function_to_apply, "top_k": top_k},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return ImageClassificationOutputElement.parse_obj_as_list(response)
+
+ async def image_segmentation(
+ self,
+ image: ContentT,
+ *,
+ model: Optional[str] = None,
+ mask_threshold: Optional[float] = None,
+ overlap_mask_area_threshold: Optional[float] = None,
+ subtask: Optional["ImageSegmentationSubtask"] = None,
+ threshold: Optional[float] = None,
+ ) -> List[ImageSegmentationOutputElement]:
+ """
+ Perform image segmentation on the given image using the specified model.
+
+ <Tip warning={true}>
+
+ You must have `PIL` installed if you want to work with images (`pip install Pillow`).
+
+ </Tip>
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The image to segment. It can be raw bytes, an image file, or a URL to an online image.
+ model (`str`, *optional*):
+ The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
+ deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
+ mask_threshold (`float`, *optional*):
+ Threshold to use when turning the predicted masks into binary values.
+ overlap_mask_area_threshold (`float`, *optional*):
+ Mask overlap threshold to eliminate small, disconnected segments.
+ subtask (`"ImageSegmentationSubtask"`, *optional*):
+ Segmentation task to be performed, depending on model capabilities.
+ threshold (`float`, *optional*):
+ Probability threshold to filter out predicted masks.
+ Returns:
+ `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.image_segmentation("cat.jpg")
+ [ImageSegmentationOutputElement(score=0.989008, label='LABEL_184', mask=<PIL.PngImagePlugin.PngImageFile image mode=L size=400x300 at 0x7FDD2B129CC0>), ...]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="audio-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={
+ "mask_threshold": mask_threshold,
+ "overlap_mask_area_threshold": overlap_mask_area_threshold,
+ "subtask": subtask,
+ "threshold": threshold,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ output = ImageSegmentationOutputElement.parse_obj_as_list(response)
+ for item in output:
+ item.mask = _b64_to_image(item.mask) # type: ignore [assignment]
+ return output
+
+ async def image_to_image(
+ self,
+ image: ContentT,
+ prompt: Optional[str] = None,
+ *,
+ negative_prompt: Optional[str] = None,
+ num_inference_steps: Optional[int] = None,
+ guidance_scale: Optional[float] = None,
+ model: Optional[str] = None,
+ target_size: Optional[ImageToImageTargetSize] = None,
+ **kwargs,
+ ) -> "Image":
+ """
+ Perform image-to-image translation using a specified model.
+
+ <Tip warning={true}>
+
+ You must have `PIL` installed if you want to work with images (`pip install Pillow`).
+
+ </Tip>
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
+ prompt (`str`, *optional*):
+ The text prompt to guide the image generation.
+ negative_prompt (`str`, *optional*):
+ One prompt to guide what NOT to include in image generation.
+ num_inference_steps (`int`, *optional*):
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
+ quality image at the expense of slower inference.
+ guidance_scale (`float`, *optional*):
+ For diffusion models. A higher guidance scale value encourages the model to generate images closely
+ linked to the text prompt at the expense of lower image quality.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+ target_size (`ImageToImageTargetSize`, *optional*):
+ The size in pixel of the output image.
+
+ Returns:
+ `Image`: The translated image.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> image = await client.image_to_image("cat.jpg", prompt="turn the cat into a tiger")
+ >>> image.save("tiger.jpg")
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="image-to-image")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={
+ "prompt": prompt,
+ "negative_prompt": negative_prompt,
+ "target_size": target_size,
+ "num_inference_steps": num_inference_steps,
+ "guidance_scale": guidance_scale,
+ **kwargs,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return _bytes_to_image(response)
+
+ async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
+ """
+ Takes an input image and return text.
+
+ Models can have very different outputs depending on your use case (image captioning, optical character recognition
+ (OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The input image to caption. It can be raw bytes, an image file, or a URL to an online image..
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+
+ Returns:
+ [`ImageToTextOutput`]: The generated text.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.image_to_text("cat.jpg")
+ 'a cat standing in a grassy field '
+ >>> await client.image_to_text("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
+ 'a dog laying on the grass next to a flower pot '
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="image-to-text")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ output = ImageToTextOutput.parse_obj(response)
+ return output[0] if isinstance(output, list) else output
+
+ async def object_detection(
+ self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
+ ) -> List[ObjectDetectionOutputElement]:
+ """
+ Perform object detection on the given image using the specified model.
+
+ <Tip warning={true}>
+
+ You must have `PIL` installed if you want to work with images (`pip install Pillow`).
+
+ </Tip>
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The image to detect objects on. It can be raw bytes, an image file, or a URL to an online image.
+ model (`str`, *optional*):
+ The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
+ deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
+ threshold (`float`, *optional*):
+ The probability necessary to make a prediction.
+ Returns:
+ `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+ `ValueError`:
+ If the request output is not a List.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.object_detection("people.jpg")
+ [ObjectDetectionOutputElement(score=0.9486683011054993, label='person', box=ObjectDetectionBoundingBox(xmin=59, ymin=39, xmax=420, ymax=510)), ...]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="object-detection")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={"threshold": threshold},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return ObjectDetectionOutputElement.parse_obj_as_list(response)
+
+ async def question_answering(
+ self,
+ question: str,
+ context: str,
+ *,
+ model: Optional[str] = None,
+ align_to_words: Optional[bool] = None,
+ doc_stride: Optional[int] = None,
+ handle_impossible_answer: Optional[bool] = None,
+ max_answer_len: Optional[int] = None,
+ max_question_len: Optional[int] = None,
+ max_seq_len: Optional[int] = None,
+ top_k: Optional[int] = None,
+ ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
+ """
+ Retrieve the answer to a question from a given text.
+
+ Args:
+ question (`str`):
+ Question to be answered.
+ context (`str`):
+ The context of the question.
+ model (`str`):
+ The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint.
+ align_to_words (`bool`, *optional*):
+ Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt
+ on non-space-separated languages (like Japanese or Chinese)
+ doc_stride (`int`, *optional*):
+ If the context is too long to fit with the question for the model, it will be split in several chunks
+ with some overlap. This argument controls the size of that overlap.
+ handle_impossible_answer (`bool`, *optional*):
+ Whether to accept impossible as an answer.
+ max_answer_len (`int`, *optional*):
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
+ max_question_len (`int`, *optional*):
+ The maximum length of the question after tokenization. It will be truncated if needed.
+ max_seq_len (`int`, *optional*):
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
+ model. The context will be split in several chunks (using docStride as overlap) if needed.
+ top_k (`int`, *optional*):
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
+ topk answers if there are not enough options available within the context.
+
+ Returns:
+ Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
+ When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
+ When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.question_answering(question="What's my name?", context="My name is Clara and I live in Berkeley.")
+ QuestionAnsweringOutputElement(answer='Clara', end=16, score=0.9326565265655518, start=11)
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="question-answering")
+ request_parameters = provider_helper.prepare_request(
+ inputs=None,
+ parameters={
+ "align_to_words": align_to_words,
+ "doc_stride": doc_stride,
+ "handle_impossible_answer": handle_impossible_answer,
+ "max_answer_len": max_answer_len,
+ "max_question_len": max_question_len,
+ "max_seq_len": max_seq_len,
+ "top_k": top_k,
+ },
+ extra_payload={"question": question, "context": context},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ # Parse the response as a single `QuestionAnsweringOutputElement` when top_k is 1 or not provided, or a list of `QuestionAnsweringOutputElement` to ensure backward compatibility.
+ output = QuestionAnsweringOutputElement.parse_obj(response)
+ return output
+
+ async def sentence_similarity(
+ self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
+ ) -> List[float]:
+ """
+ Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
+
+ Args:
+ sentence (`str`):
+ The main sentence to compare to others.
+ other_sentences (`List[str]`):
+ The list of sentences to compare to.
+ model (`str`, *optional*):
+ The model to use for the conversational task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended conversational model will be used.
+ Defaults to None.
+
+ Returns:
+ `List[float]`: The embedding representing the input text.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.sentence_similarity(
+ ... "Machine learning is so easy.",
+ ... other_sentences=[
+ ... "Deep learning is so straightforward.",
+ ... "This is so difficult, like rocket science.",
+ ... "I can't believe how much I struggled with this.",
+ ... ],
+ ... )
+ [0.7785726189613342, 0.45876261591911316, 0.2906220555305481]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="sentence-similarity")
+ request_parameters = provider_helper.prepare_request(
+ inputs=None,
+ parameters={},
+ extra_payload={"source_sentence": sentence, "sentences": other_sentences},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return _bytes_to_list(response)
+
+ async def summarization(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ clean_up_tokenization_spaces: Optional[bool] = None,
+ generate_parameters: Optional[Dict[str, Any]] = None,
+ truncation: Optional["SummarizationTruncationStrategy"] = None,
+ ) -> SummarizationOutput:
+ """
+ Generate a summary of a given text using a specified model.
+
+ Args:
+ text (`str`):
+ The input text to summarize.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended model for summarization will be used.
+ clean_up_tokenization_spaces (`bool`, *optional*):
+ Whether to clean up the potential extra spaces in the text output.
+ generate_parameters (`Dict[str, Any]`, *optional*):
+ Additional parametrization of the text generation algorithm.
+ truncation (`"SummarizationTruncationStrategy"`, *optional*):
+ The truncation strategy to use.
+ Returns:
+ [`SummarizationOutput`]: The generated summary text.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.summarization("The Eiffel tower...")
+ SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
+ ```
+ """
+ parameters = {
+ "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
+ "generate_parameters": generate_parameters,
+ "truncation": truncation,
+ }
+ provider_helper = get_provider_helper(self.provider, task="summarization")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters=parameters,
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return SummarizationOutput.parse_obj_as_list(response)[0]
+
+ async def table_question_answering(
+ self,
+ table: Dict[str, Any],
+ query: str,
+ *,
+ model: Optional[str] = None,
+ padding: Optional["Padding"] = None,
+ sequential: Optional[bool] = None,
+ truncation: Optional[bool] = None,
+ ) -> TableQuestionAnsweringOutputElement:
+ """
+ Retrieve the answer to a question from information given in a table.
+
+ Args:
+ table (`str`):
+ A table of data represented as a dict of lists where entries are headers and the lists are all the
+ values, all lists must have the same size.
+ query (`str`):
+ The query in plain text that you want to ask the table.
+ model (`str`):
+ The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face
+ Hub or a URL to a deployed Inference Endpoint.
+ padding (`"Padding"`, *optional*):
+ Activates and controls padding.
+ sequential (`bool`, *optional*):
+ Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
+ inference to be done sequentially to extract relations within sequences, given their conversational
+ nature.
+ truncation (`bool`, *optional*):
+ Activates and controls truncation.
+
+ Returns:
+ [`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> query = "How many stars does the transformers repository have?"
+ >>> table = {"Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"]}
+ >>> await client.table_question_answering(table, query, model="google/tapas-base-finetuned-wtq")
+ TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="table-question-answering")
+ request_parameters = provider_helper.prepare_request(
+ inputs=None,
+ parameters={"model": model, "padding": padding, "sequential": sequential, "truncation": truncation},
+ extra_payload={"query": query, "table": table},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
+
+ async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
+ """
+ Classifying a target category (a group) based on a set of attributes.
+
+ Args:
+ table (`Dict[str, Any]`):
+ Set of attributes to classify.
+ model (`str`, *optional*):
+ The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended tabular classification model will be used.
+ Defaults to None.
+
+ Returns:
+ `List`: a list of labels, one per row in the initial table.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> table = {
+ ... "fixed_acidity": ["7.4", "7.8", "10.3"],
+ ... "volatile_acidity": ["0.7", "0.88", "0.32"],
+ ... "citric_acid": ["0", "0", "0.45"],
+ ... "residual_sugar": ["1.9", "2.6", "6.4"],
+ ... "chlorides": ["0.076", "0.098", "0.073"],
+ ... "free_sulfur_dioxide": ["11", "25", "5"],
+ ... "total_sulfur_dioxide": ["34", "67", "13"],
+ ... "density": ["0.9978", "0.9968", "0.9976"],
+ ... "pH": ["3.51", "3.2", "3.23"],
+ ... "sulphates": ["0.56", "0.68", "0.82"],
+ ... "alcohol": ["9.4", "9.8", "12.6"],
+ ... }
+ >>> await client.tabular_classification(table=table, model="julien-c/wine-quality")
+ ["5", "5", "5"]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="tabular-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=None,
+ extra_payload={"table": table},
+ parameters={},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return _bytes_to_list(response)
+
+ async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
+ """
+ Predicting a numerical target value given a set of attributes/features in a table.
+
+ Args:
+ table (`Dict[str, Any]`):
+ Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
+ model (`str`, *optional*):
+ The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended tabular regression model will be used.
+ Defaults to None.
+
+ Returns:
+ `List`: a list of predicted numerical target values.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> table = {
+ ... "Height": ["11.52", "12.48", "12.3778"],
+ ... "Length1": ["23.2", "24", "23.9"],
+ ... "Length2": ["25.4", "26.3", "26.5"],
+ ... "Length3": ["30", "31.2", "31.1"],
+ ... "Species": ["Bream", "Bream", "Bream"],
+ ... "Width": ["4.02", "4.3056", "4.6961"],
+ ... }
+ >>> await client.tabular_regression(table, model="scikit-learn/Fish-Weight")
+ [110, 120, 130]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="tabular-regression")
+ request_parameters = provider_helper.prepare_request(
+ inputs=None,
+ parameters={},
+ extra_payload={"table": table},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return _bytes_to_list(response)
+
+ async def text_classification(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ top_k: Optional[int] = None,
+ function_to_apply: Optional["TextClassificationOutputTransform"] = None,
+ ) -> List[TextClassificationOutputElement]:
+ """
+ Perform text classification (e.g. sentiment-analysis) on the given text.
+
+ Args:
+ text (`str`):
+ A string to be classified.
+ model (`str`, *optional*):
+ The model to use for the text classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended text classification model will be used.
+ Defaults to None.
+ top_k (`int`, *optional*):
+ When specified, limits the output to the top K most probable classes.
+ function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
+ The function to apply to the model outputs in order to retrieve the scores.
+
+ Returns:
+ `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.text_classification("I like you")
+ [
+ TextClassificationOutputElement(label='POSITIVE', score=0.9998695850372314),
+ TextClassificationOutputElement(label='NEGATIVE', score=0.0001304351753788069),
+ ]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="text-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "function_to_apply": function_to_apply,
+ "top_k": top_k,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value]
+
+ @overload
+ async def text_generation( # type: ignore
+ self,
+ prompt: str,
+ *,
+ details: Literal[False] = ...,
+ stream: Literal[False] = ...,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> str: ...
+
+ @overload
+ async def text_generation( # type: ignore
+ self,
+ prompt: str,
+ *,
+ details: Literal[True] = ...,
+ stream: Literal[False] = ...,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> TextGenerationOutput: ...
+
+ @overload
+ async def text_generation( # type: ignore
+ self,
+ prompt: str,
+ *,
+ details: Literal[False] = ...,
+ stream: Literal[True] = ...,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> AsyncIterable[str]: ...
+
+ @overload
+ async def text_generation( # type: ignore
+ self,
+ prompt: str,
+ *,
+ details: Literal[True] = ...,
+ stream: Literal[True] = ...,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> AsyncIterable[TextGenerationStreamOutput]: ...
+
+ @overload
+ async def text_generation(
+ self,
+ prompt: str,
+ *,
+ details: Literal[True] = ...,
+ stream: bool = ...,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> Union[TextGenerationOutput, AsyncIterable[TextGenerationStreamOutput]]: ...
+
+ async def text_generation(
+ self,
+ prompt: str,
+ *,
+ details: bool = False,
+ stream: bool = False,
+ model: Optional[str] = None,
+ # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
+ adapter_id: Optional[str] = None,
+ best_of: Optional[int] = None,
+ decoder_input_details: Optional[bool] = None,
+ do_sample: Optional[bool] = False, # Manual default value
+ frequency_penalty: Optional[float] = None,
+ grammar: Optional[TextGenerationInputGrammarType] = None,
+ max_new_tokens: Optional[int] = None,
+ repetition_penalty: Optional[float] = None,
+ return_full_text: Optional[bool] = False, # Manual default value
+ seed: Optional[int] = None,
+ stop: Optional[List[str]] = None,
+ stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_n_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ truncate: Optional[int] = None,
+ typical_p: Optional[float] = None,
+ watermark: Optional[bool] = None,
+ ) -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:
+ """
+ Given a prompt, generate the following text.
+
+ <Tip>
+
+ If you want to generate a response from chat messages, you should use the [`InferenceClient.chat_completion`] method.
+ It accepts a list of messages instead of a single text prompt and handles the chat templating for you.
+
+ </Tip>
+
+ Args:
+ prompt (`str`):
+ Input text.
+ details (`bool`, *optional*):
+ By default, text_generation returns a string. Pass `details=True` if you want a detailed output (tokens,
+ probabilities, seed, finish reason, etc.). Only available for models running on with the
+ `text-generation-inference` backend.
+ stream (`bool`, *optional*):
+ By default, text_generation returns the full generated text. Pass `stream=True` if you want a stream of
+ tokens to be returned. Only available for models running on with the `text-generation-inference`
+ backend.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+ adapter_id (`str`, *optional*):
+ Lora adapter id.
+ best_of (`int`, *optional*):
+ Generate best_of sequences and return the one if the highest token logprobs.
+ decoder_input_details (`bool`, *optional*):
+ Return the decoder input token logprobs and ids. You must set `details=True` as well for it to be taken
+ into account. Defaults to `False`.
+ do_sample (`bool`, *optional*):
+ Activate logits sampling
+ frequency_penalty (`float`, *optional*):
+ Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in
+ the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+ grammar ([`TextGenerationInputGrammarType`], *optional*):
+ Grammar constraints. Can be either a JSONSchema or a regex.
+ max_new_tokens (`int`, *optional*):
+ Maximum number of generated tokens. Defaults to 100.
+ repetition_penalty (`float`, *optional*):
+ The parameter for repetition penalty. 1.0 means no penalty. See [this
+ paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+ return_full_text (`bool`, *optional*):
+ Whether to prepend the prompt to the generated text
+ seed (`int`, *optional*):
+ Random sampling seed
+ stop (`List[str]`, *optional*):
+ Stop generating tokens if a member of `stop` is generated.
+ stop_sequences (`List[str]`, *optional*):
+ Deprecated argument. Use `stop` instead.
+ temperature (`float`, *optional*):
+ The value used to module the logits distribution.
+ top_n_tokens (`int`, *optional*):
+ Return information about the `top_n_tokens` most likely tokens at each generation step, instead of
+ just the sampled token.
+ top_k (`int`, *optional`):
+ The number of highest probability vocabulary tokens to keep for top-k-filtering.
+ top_p (`float`, *optional`):
+ If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
+ higher are kept for generation.
+ truncate (`int`, *optional`):
+ Truncate inputs tokens to the given size.
+ typical_p (`float`, *optional`):
+ Typical Decoding mass
+ See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
+ watermark (`bool`, *optional`):
+ Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
+
+ Returns:
+ `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
+ Generated text returned from the server:
+ - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
+ - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
+ - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
+ - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
+
+ Raises:
+ `ValidationError`:
+ If input values are not valid. No HTTP call is made to the server.
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+
+ # Case 1: generate text
+ >>> await client.text_generation("The huggingface_hub library is ", max_new_tokens=12)
+ '100% open source and built to be easy to use.'
+
+ # Case 2: iterate over the generated tokens. Useful for large generation.
+ >>> async for token in await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, stream=True):
+ ... print(token)
+ 100
+ %
+ open
+ source
+ and
+ built
+ to
+ be
+ easy
+ to
+ use
+ .
+
+ # Case 3: get more details about the generation process.
+ >>> await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, details=True)
+ TextGenerationOutput(
+ generated_text='100% open source and built to be easy to use.',
+ details=TextGenerationDetails(
+ finish_reason='length',
+ generated_tokens=12,
+ seed=None,
+ prefill=[
+ TextGenerationPrefillOutputToken(id=487, text='The', logprob=None),
+ TextGenerationPrefillOutputToken(id=53789, text=' hugging', logprob=-13.171875),
+ (...)
+ TextGenerationPrefillOutputToken(id=204, text=' ', logprob=-7.0390625)
+ ],
+ tokens=[
+ TokenElement(id=1425, text='100', logprob=-1.0175781, special=False),
+ TokenElement(id=16, text='%', logprob=-0.0463562, special=False),
+ (...)
+ TokenElement(id=25, text='.', logprob=-0.5703125, special=False)
+ ],
+ best_of_sequences=None
+ )
+ )
+
+ # Case 4: iterate over the generated tokens with more details.
+ # Last object is more complete, containing the full generated text and the finish reason.
+ >>> async for details in await client.text_generation("The huggingface_hub library is ", max_new_tokens=12, details=True, stream=True):
+ ... print(details)
+ ...
+ TextGenerationStreamOutput(token=TokenElement(id=1425, text='100', logprob=-1.0175781, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=16, text='%', logprob=-0.0463562, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=1314, text=' open', logprob=-1.3359375, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=3178, text=' source', logprob=-0.28100586, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=273, text=' and', logprob=-0.5961914, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=3426, text=' built', logprob=-1.9423828, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=271, text=' to', logprob=-1.4121094, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=314, text=' be', logprob=-1.5224609, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=1833, text=' easy', logprob=-2.1132812, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=271, text=' to', logprob=-0.08520508, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(id=745, text=' use', logprob=-0.39453125, special=False), generated_text=None, details=None)
+ TextGenerationStreamOutput(token=TokenElement(
+ id=25,
+ text='.',
+ logprob=-0.5703125,
+ special=False),
+ generated_text='100% open source and built to be easy to use.',
+ details=TextGenerationStreamOutputStreamDetails(finish_reason='length', generated_tokens=12, seed=None)
+ )
+
+ # Case 5: generate constrained output using grammar
+ >>> response = await client.text_generation(
+ ... prompt="I saw a puppy a cat and a raccoon during my bike ride in the park",
+ ... model="HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+ ... max_new_tokens=100,
+ ... repetition_penalty=1.3,
+ ... grammar={
+ ... "type": "json",
+ ... "value": {
+ ... "properties": {
+ ... "location": {"type": "string"},
+ ... "activity": {"type": "string"},
+ ... "animals_seen": {"type": "integer", "minimum": 1, "maximum": 5},
+ ... "animals": {"type": "array", "items": {"type": "string"}},
+ ... },
+ ... "required": ["location", "activity", "animals_seen", "animals"],
+ ... },
+ ... },
+ ... )
+ >>> json.loads(response)
+ {
+ "activity": "bike riding",
+ "animals": ["puppy", "cat", "raccoon"],
+ "animals_seen": 3,
+ "location": "park"
+ }
+ ```
+ """
+ if decoder_input_details and not details:
+ warnings.warn(
+ "`decoder_input_details=True` has been passed to the server but `details=False` is set meaning that"
+ " the output from the server will be truncated."
+ )
+ decoder_input_details = False
+
+ if stop_sequences is not None:
+ warnings.warn(
+ "`stop_sequences` is a deprecated argument for `text_generation` task"
+ " and will be removed in version '0.28.0'. Use `stop` instead.",
+ FutureWarning,
+ )
+ if stop is None:
+ stop = stop_sequences # use deprecated arg if provided
+
+ # Build payload
+ parameters = {
+ "adapter_id": adapter_id,
+ "best_of": best_of,
+ "decoder_input_details": decoder_input_details,
+ "details": details,
+ "do_sample": do_sample,
+ "frequency_penalty": frequency_penalty,
+ "grammar": grammar,
+ "max_new_tokens": max_new_tokens,
+ "repetition_penalty": repetition_penalty,
+ "return_full_text": return_full_text,
+ "seed": seed,
+ "stop": stop if stop is not None else [],
+ "temperature": temperature,
+ "top_k": top_k,
+ "top_n_tokens": top_n_tokens,
+ "top_p": top_p,
+ "truncate": truncate,
+ "typical_p": typical_p,
+ "watermark": watermark,
+ }
+
+ # Remove some parameters if not a TGI server
+ unsupported_kwargs = _get_unsupported_text_generation_kwargs(model)
+ if len(unsupported_kwargs) > 0:
+ # The server does not support some parameters
+ # => means it is not a TGI server
+ # => remove unsupported parameters and warn the user
+
+ ignored_parameters = []
+ for key in unsupported_kwargs:
+ if parameters.get(key):
+ ignored_parameters.append(key)
+ parameters.pop(key, None)
+ if len(ignored_parameters) > 0:
+ warnings.warn(
+ "API endpoint/model for text-generation is not served via TGI. Ignoring following parameters:"
+ f" {', '.join(ignored_parameters)}.",
+ UserWarning,
+ )
+ if details:
+ warnings.warn(
+ "API endpoint/model for text-generation is not served via TGI. Parameter `details=True` will"
+ " be ignored meaning only the generated text will be returned.",
+ UserWarning,
+ )
+ details = False
+ if stream:
+ raise ValueError(
+ "API endpoint/model for text-generation is not served via TGI. Cannot return output as a stream."
+ " Please pass `stream=False` as input."
+ )
+
+ provider_helper = get_provider_helper(self.provider, task="text-generation")
+ request_parameters = provider_helper.prepare_request(
+ inputs=prompt,
+ parameters=parameters,
+ extra_payload={"stream": stream},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+
+ # Handle errors separately for more precise error messages
+ try:
+ bytes_output = await self._inner_post(request_parameters, stream=stream)
+ except _import_aiohttp().ClientResponseError as e:
+ match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
+ if e.status == 400 and match:
+ unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
+ _set_unsupported_text_generation_kwargs(model, unused_params)
+ return await self.text_generation( # type: ignore
+ prompt=prompt,
+ details=details,
+ stream=stream,
+ model=model or self.model,
+ adapter_id=adapter_id,
+ best_of=best_of,
+ decoder_input_details=decoder_input_details,
+ do_sample=do_sample,
+ frequency_penalty=frequency_penalty,
+ grammar=grammar,
+ max_new_tokens=max_new_tokens,
+ repetition_penalty=repetition_penalty,
+ return_full_text=return_full_text,
+ seed=seed,
+ stop=stop,
+ temperature=temperature,
+ top_k=top_k,
+ top_n_tokens=top_n_tokens,
+ top_p=top_p,
+ truncate=truncate,
+ typical_p=typical_p,
+ watermark=watermark,
+ )
+ raise_text_generation_error(e)
+
+ # Parse output
+ if stream:
+ return _async_stream_text_generation_response(bytes_output, details) # type: ignore
+
+ data = _bytes_to_dict(bytes_output) # type: ignore[arg-type]
+
+ # Data can be a single element (dict) or an iterable of dicts where we select the first element of.
+ if isinstance(data, list):
+ data = data[0]
+
+ return TextGenerationOutput.parse_obj_as_instance(data) if details else data["generated_text"]
+
+ async def text_to_image(
+ self,
+ prompt: str,
+ *,
+ negative_prompt: Optional[str] = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: Optional[int] = None,
+ guidance_scale: Optional[float] = None,
+ model: Optional[str] = None,
+ scheduler: Optional[str] = None,
+ seed: Optional[int] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
+ ) -> "Image":
+ """
+ Generate an image based on a given text using a specified model.
+
+ <Tip warning={true}>
+
+ You must have `PIL` installed if you want to work with images (`pip install Pillow`).
+
+ </Tip>
+
+ <Tip>
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+ </Tip>
+
+ Args:
+ prompt (`str`):
+ The prompt to generate an image from.
+ negative_prompt (`str`, *optional*):
+ One prompt to guide what NOT to include in image generation.
+ height (`int`, *optional*):
+ The height in pixels of the output image
+ width (`int`, *optional*):
+ The width in pixels of the output image
+ num_inference_steps (`int`, *optional*):
+ The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+ expense of slower inference.
+ guidance_scale (`float`, *optional*):
+ A higher guidance scale value encourages the model to generate images closely linked to the text
+ prompt, but values too high may cause saturation and other artifacts.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended text-to-image model will be used.
+ Defaults to None.
+ scheduler (`str`, *optional*):
+ Override the scheduler with a compatible one.
+ seed (`int`, *optional*):
+ Seed for the random number generator.
+ extra_body (`Dict[str, Any]`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
+
+ Returns:
+ `Image`: The generated image.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+
+ >>> image = await client.text_to_image("An astronaut riding a horse on the moon.")
+ >>> image.save("astronaut.png")
+
+ >>> image = await client.text_to_image(
+ ... "An astronaut riding a horse on the moon.",
+ ... negative_prompt="low resolution, blurry",
+ ... model="stabilityai/stable-diffusion-2-1",
+ ... )
+ >>> image.save("better_astronaut.png")
+ ```
+ Example using a third-party provider directly. Usage will be billed on your fal.ai account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="fal-ai", # Use fal.ai provider
+ ... api_key="fal-ai-api-key", # Pass your fal.ai API key
+ ... )
+ >>> image = client.text_to_image(
+ ... "A majestic lion in a fantasy forest",
+ ... model="black-forest-labs/FLUX.1-schnell",
+ ... )
+ >>> image.save("lion.png")
+ ```
+
+ Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate", # Use replicate provider
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> image = client.text_to_image(
+ ... "An astronaut riding a horse on the moon.",
+ ... model="black-forest-labs/FLUX.1-dev",
+ ... )
+ >>> image.save("astronaut.png")
+ ```
+
+ Example using Replicate provider with extra parameters
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate", # Use replicate provider
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> image = client.text_to_image(
+ ... "An astronaut riding a horse on the moon.",
+ ... model="black-forest-labs/FLUX.1-schnell",
+ ... extra_body={"output_quality": 100},
+ ... )
+ >>> image.save("astronaut.png")
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="text-to-image")
+ request_parameters = provider_helper.prepare_request(
+ inputs=prompt,
+ parameters={
+ "negative_prompt": negative_prompt,
+ "height": height,
+ "width": width,
+ "num_inference_steps": num_inference_steps,
+ "guidance_scale": guidance_scale,
+ "scheduler": scheduler,
+ "seed": seed,
+ **(extra_body or {}),
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ response = provider_helper.get_response(response)
+ return _bytes_to_image(response)
+
+ async def text_to_video(
+ self,
+ prompt: str,
+ *,
+ model: Optional[str] = None,
+ guidance_scale: Optional[float] = None,
+ negative_prompt: Optional[List[str]] = None,
+ num_frames: Optional[float] = None,
+ num_inference_steps: Optional[int] = None,
+ seed: Optional[int] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
+ ) -> bytes:
+ """
+ Generate a video based on a given text.
+
+ <Tip>
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+ </Tip>
+
+ Args:
+ prompt (`str`):
+ The prompt to generate a video from.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended text-to-video model will be used.
+ Defaults to None.
+ guidance_scale (`float`, *optional*):
+ A higher guidance scale value encourages the model to generate videos closely linked to the text
+ prompt, but values too high may cause saturation and other artifacts.
+ negative_prompt (`List[str]`, *optional*):
+ One or several prompt to guide what NOT to include in video generation.
+ num_frames (`float`, *optional*):
+ The num_frames parameter determines how many video frames are generated.
+ num_inference_steps (`int`, *optional*):
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
+ expense of slower inference.
+ seed (`int`, *optional*):
+ Seed for the random number generator.
+ extra_body (`Dict[str, Any]`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
+
+ Returns:
+ `bytes`: The generated video.
+
+ Example:
+
+ Example using a third-party provider directly. Usage will be billed on your fal.ai account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="fal-ai", # Using fal.ai provider
+ ... api_key="fal-ai-api-key", # Pass your fal.ai API key
+ ... )
+ >>> video = client.text_to_video(
+ ... "A majestic lion running in a fantasy forest",
+ ... model="tencent/HunyuanVideo",
+ ... )
+ >>> with open("lion.mp4", "wb") as file:
+ ... file.write(video)
+ ```
+
+ Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate", # Using replicate provider
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> video = client.text_to_video(
+ ... "A cat running in a park",
+ ... model="genmo/mochi-1-preview",
+ ... )
+ >>> with open("cat.mp4", "wb") as file:
+ ... file.write(video)
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="text-to-video")
+ request_parameters = provider_helper.prepare_request(
+ inputs=prompt,
+ parameters={
+ "guidance_scale": guidance_scale,
+ "negative_prompt": negative_prompt,
+ "num_frames": num_frames,
+ "num_inference_steps": num_inference_steps,
+ "seed": seed,
+ **(extra_body or {}),
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ response = provider_helper.get_response(response)
+ return response
+
+ async def text_to_speech(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ do_sample: Optional[bool] = None,
+ early_stopping: Optional[Union[bool, "TextToSpeechEarlyStoppingEnum"]] = None,
+ epsilon_cutoff: Optional[float] = None,
+ eta_cutoff: Optional[float] = None,
+ max_length: Optional[int] = None,
+ max_new_tokens: Optional[int] = None,
+ min_length: Optional[int] = None,
+ min_new_tokens: Optional[int] = None,
+ num_beam_groups: Optional[int] = None,
+ num_beams: Optional[int] = None,
+ penalty_alpha: Optional[float] = None,
+ temperature: Optional[float] = None,
+ top_k: Optional[int] = None,
+ top_p: Optional[float] = None,
+ typical_p: Optional[float] = None,
+ use_cache: Optional[bool] = None,
+ extra_body: Optional[Dict[str, Any]] = None,
+ ) -> bytes:
+ """
+ Synthesize an audio of a voice pronouncing a given text.
+
+ <Tip>
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
+ </Tip>
+
+ Args:
+ text (`str`):
+ The text to synthesize.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. If not provided, the default recommended text-to-speech model will be used.
+ Defaults to None.
+ do_sample (`bool`, *optional*):
+ Whether to use sampling instead of greedy decoding when generating new tokens.
+ early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*):
+ Controls the stopping condition for beam-based methods.
+ epsilon_cutoff (`float`, *optional*):
+ If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
+ epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on
+ the size of the model. See [Truncation Sampling as Language Model
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ eta_cutoff (`float`, *optional*):
+ Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly
+ between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff)
+ * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token
+ probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3,
+ depending on the size of the model. See [Truncation Sampling as Language Model
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ max_length (`int`, *optional*):
+ The maximum length (in tokens) of the generated text, including the input.
+ max_new_tokens (`int`, *optional*):
+ The maximum number of tokens to generate. Takes precedence over max_length.
+ min_length (`int`, *optional*):
+ The minimum length (in tokens) of the generated text, including the input.
+ min_new_tokens (`int`, *optional*):
+ The minimum number of tokens to generate. Takes precedence over min_length.
+ num_beam_groups (`int`, *optional*):
+ Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
+ See [this paper](https://hf.co/papers/1610.02424) for more details.
+ num_beams (`int`, *optional*):
+ Number of beams to use for beam search.
+ penalty_alpha (`float`, *optional*):
+ The value balances the model confidence and the degeneration penalty in contrastive search decoding.
+ temperature (`float`, *optional*):
+ The value used to modulate the next token probabilities.
+ top_k (`int`, *optional*):
+ The number of highest probability vocabulary tokens to keep for top-k-filtering.
+ top_p (`float`, *optional*):
+ If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
+ top_p or higher are kept for generation.
+ typical_p (`float`, *optional*):
+ Local typicality measures how similar the conditional probability of predicting a target token next is
+ to the expected conditional probability of predicting a random token next, given the partial text
+ already generated. If set to float < 1, the smallest set of the most locally typical tokens with
+ probabilities that add up to typical_p or higher are kept for generation. See [this
+ paper](https://hf.co/papers/2202.00666) for more details.
+ use_cache (`bool`, *optional*):
+ Whether the model should use the past last key/values attentions to speed up decoding
+ extra_body (`Dict[str, Any]`, *optional*):
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
+ for supported parameters.
+ Returns:
+ `bytes`: The generated audio.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from pathlib import Path
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+
+ >>> audio = await client.text_to_speech("Hello world")
+ >>> Path("hello_world.flac").write_bytes(audio)
+ ```
+
+ Example using a third-party provider directly. Usage will be billed on your Replicate account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate",
+ ... api_key="your-replicate-api-key", # Pass your Replicate API key directly
+ ... )
+ >>> audio = client.text_to_speech(
+ ... text="Hello world",
+ ... model="OuteAI/OuteTTS-0.3-500M",
+ ... )
+ >>> Path("hello_world.flac").write_bytes(audio)
+ ```
+
+ Example using a third-party provider through Hugging Face Routing. Usage will be billed on your Hugging Face account.
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate",
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> audio =client.text_to_speech(
+ ... text="Hello world",
+ ... model="OuteAI/OuteTTS-0.3-500M",
+ ... )
+ >>> Path("hello_world.flac").write_bytes(audio)
+ ```
+ Example using Replicate provider with extra parameters
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> client = InferenceClient(
+ ... provider="replicate", # Use replicate provider
+ ... api_key="hf_...", # Pass your HF token
+ ... )
+ >>> audio = client.text_to_speech(
+ ... "Hello, my name is Kororo, an awesome text-to-speech model.",
+ ... model="hexgrad/Kokoro-82M",
+ ... extra_body={"voice": "af_nicole"},
+ ... )
+ >>> Path("hello.flac").write_bytes(audio)
+ ```
+
+ Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
+ ```py
+ >>> from huggingface_hub import InferenceClient
+ >>> lyrics = '''
+ ... [verse]
+ ... In the town where I was born
+ ... Lived a man who sailed to sea
+ ... And he told us of his life
+ ... In the land of submarines
+ ... So we sailed on to the sun
+ ... 'Til we found a sea of green
+ ... And we lived beneath the waves
+ ... In our yellow submarine
+
+ ... [chorus]
+ ... We all live in a yellow submarine
+ ... Yellow submarine, yellow submarine
+ ... We all live in a yellow submarine
+ ... Yellow submarine, yellow submarine
+ ... '''
+ >>> genres = "pavarotti-style tenor voice"
+ >>> client = InferenceClient(
+ ... provider="fal-ai",
+ ... model="m-a-p/YuE-s1-7B-anneal-en-cot",
+ ... api_key=...,
+ ... )
+ >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
+ >>> with open("output.mp3", "wb") as f:
+ ... f.write(audio)
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="text-to-speech")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "do_sample": do_sample,
+ "early_stopping": early_stopping,
+ "epsilon_cutoff": epsilon_cutoff,
+ "eta_cutoff": eta_cutoff,
+ "max_length": max_length,
+ "max_new_tokens": max_new_tokens,
+ "min_length": min_length,
+ "min_new_tokens": min_new_tokens,
+ "num_beam_groups": num_beam_groups,
+ "num_beams": num_beams,
+ "penalty_alpha": penalty_alpha,
+ "temperature": temperature,
+ "top_k": top_k,
+ "top_p": top_p,
+ "typical_p": typical_p,
+ "use_cache": use_cache,
+ **(extra_body or {}),
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ response = provider_helper.get_response(response)
+ return response
+
+ async def token_classification(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
+ ignore_labels: Optional[List[str]] = None,
+ stride: Optional[int] = None,
+ ) -> List[TokenClassificationOutputElement]:
+ """
+ Perform token classification on the given text.
+ Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
+
+ Args:
+ text (`str`):
+ A string to be classified.
+ model (`str`, *optional*):
+ The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used.
+ Defaults to None.
+ aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
+ The strategy used to fuse tokens based on model predictions
+ ignore_labels (`List[str`, *optional*):
+ A list of labels to ignore
+ stride (`int`, *optional*):
+ The number of overlapping tokens between chunks when splitting the input text.
+
+ Returns:
+ `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.token_classification("My name is Sarah Jessica Parker but you can call me Jessica")
+ [
+ TokenClassificationOutputElement(
+ entity_group='PER',
+ score=0.9971321225166321,
+ word='Sarah Jessica Parker',
+ start=11,
+ end=31,
+ ),
+ TokenClassificationOutputElement(
+ entity_group='PER',
+ score=0.9773476123809814,
+ word='Jessica',
+ start=52,
+ end=59,
+ )
+ ]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="token-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "aggregation_strategy": aggregation_strategy,
+ "ignore_labels": ignore_labels,
+ "stride": stride,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return TokenClassificationOutputElement.parse_obj_as_list(response)
+
+ async def translation(
+ self,
+ text: str,
+ *,
+ model: Optional[str] = None,
+ src_lang: Optional[str] = None,
+ tgt_lang: Optional[str] = None,
+ clean_up_tokenization_spaces: Optional[bool] = None,
+ truncation: Optional["TranslationTruncationStrategy"] = None,
+ generate_parameters: Optional[Dict[str, Any]] = None,
+ ) -> TranslationOutput:
+ """
+ Convert text from one language to another.
+
+ Check out https://huggingface.co/tasks/translation for more information on how to choose the best model for
+ your specific use case. Source and target languages usually depend on the model.
+ However, it is possible to specify source and target languages for certain models. If you are working with one of these models,
+ you can use `src_lang` and `tgt_lang` arguments to pass the relevant information.
+
+ Args:
+ text (`str`):
+ A string to be translated.
+ model (`str`, *optional*):
+ The model to use for the translation task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended translation model will be used.
+ Defaults to None.
+ src_lang (`str`, *optional*):
+ The source language of the text. Required for models that can translate from multiple languages.
+ tgt_lang (`str`, *optional*):
+ Target language to translate to. Required for models that can translate to multiple languages.
+ clean_up_tokenization_spaces (`bool`, *optional*):
+ Whether to clean up the potential extra spaces in the text output.
+ truncation (`"TranslationTruncationStrategy"`, *optional*):
+ The truncation strategy to use.
+ generate_parameters (`Dict[str, Any]`, *optional*):
+ Additional parametrization of the text generation algorithm.
+
+ Returns:
+ [`TranslationOutput`]: The generated translated text.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+ `ValueError`:
+ If only one of the `src_lang` and `tgt_lang` arguments are provided.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.translation("My name is Wolfgang and I live in Berlin")
+ 'Mein Name ist Wolfgang und ich lebe in Berlin.'
+ >>> await client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
+ TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
+ ```
+
+ Specifying languages:
+ ```py
+ >>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
+ "Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
+ ```
+ """
+ # Throw error if only one of `src_lang` and `tgt_lang` was given
+ if src_lang is not None and tgt_lang is None:
+ raise ValueError("You cannot specify `src_lang` without specifying `tgt_lang`.")
+
+ if src_lang is None and tgt_lang is not None:
+ raise ValueError("You cannot specify `tgt_lang` without specifying `src_lang`.")
+
+ provider_helper = get_provider_helper(self.provider, task="translation")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "src_lang": src_lang,
+ "tgt_lang": tgt_lang,
+ "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
+ "truncation": truncation,
+ "generate_parameters": generate_parameters,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return TranslationOutput.parse_obj_as_list(response)[0]
+
+ async def visual_question_answering(
+ self,
+ image: ContentT,
+ question: str,
+ *,
+ model: Optional[str] = None,
+ top_k: Optional[int] = None,
+ ) -> List[VisualQuestionAnsweringOutputElement]:
+ """
+ Answering open-ended questions based on an image.
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
+ question (`str`):
+ Question to be answered.
+ model (`str`, *optional*):
+ The model to use for the visual question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+ a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used.
+ Defaults to None.
+ top_k (`int`, *optional*):
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
+ topk answers if there are not enough options available within the context.
+ Returns:
+ `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
+
+ Raises:
+ `InferenceTimeoutError`:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.visual_question_answering(
+ ... image="https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg",
+ ... question="What is the animal doing?"
+ ... )
+ [
+ VisualQuestionAnsweringOutputElement(score=0.778609573841095, answer='laying down'),
+ VisualQuestionAnsweringOutputElement(score=0.6957435607910156, answer='sitting'),
+ ]
+ ```
+ """
+ provider_helper = get_provider_helper(self.provider, task="visual-question-answering")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={"top_k": top_k},
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ extra_payload={"question": question, "image": _b64_encode(image)},
+ )
+ response = await self._inner_post(request_parameters)
+ return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response)
+
+ @_deprecate_arguments(
+ version="0.30.0",
+ deprecated_args=["labels"],
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
+ )
+ async def zero_shot_classification(
+ self,
+ text: str,
+ # temporarily keeping it optional for backward compatibility.
+ candidate_labels: List[str] = None, # type: ignore
+ *,
+ multi_label: Optional[bool] = False,
+ hypothesis_template: Optional[str] = None,
+ model: Optional[str] = None,
+ # deprecated argument
+ labels: List[str] = None, # type: ignore
+ ) -> List[ZeroShotClassificationOutputElement]:
+ """
+ Provide as input a text and a set of candidate labels to classify the input text.
+
+ Args:
+ text (`str`):
+ The input text to classify.
+ candidate_labels (`List[str]`):
+ The set of possible class labels to classify the text into.
+ labels (`List[str]`, *optional*):
+ (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
+ multi_label (`bool`, *optional*):
+ Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
+ the label likelihoods for each sequence is 1. If true, the labels are considered independent and
+ probabilities are normalized for each candidate.
+ hypothesis_template (`str`, *optional*):
+ The sentence used in conjunction with `candidate_labels` to attempt the text classification by
+ replacing the placeholder with the candidate labels.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
+
+
+ Returns:
+ `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example with `multi_label=False`:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> text = (
+ ... "A new model offers an explanation for how the Galilean satellites formed around the solar system's"
+ ... "largest world. Konstantin Batygin did not set out to solve one of the solar system's most puzzling"
+ ... " mysteries when he went for a run up a hill in Nice, France."
+ ... )
+ >>> labels = ["space & cosmos", "scientific discovery", "microbiology", "robots", "archeology"]
+ >>> await client.zero_shot_classification(text, labels)
+ [
+ ZeroShotClassificationOutputElement(label='scientific discovery', score=0.7961668968200684),
+ ZeroShotClassificationOutputElement(label='space & cosmos', score=0.18570658564567566),
+ ZeroShotClassificationOutputElement(label='microbiology', score=0.00730885099619627),
+ ZeroShotClassificationOutputElement(label='archeology', score=0.006258360575884581),
+ ZeroShotClassificationOutputElement(label='robots', score=0.004559356719255447),
+ ]
+ >>> await client.zero_shot_classification(text, labels, multi_label=True)
+ [
+ ZeroShotClassificationOutputElement(label='scientific discovery', score=0.9829297661781311),
+ ZeroShotClassificationOutputElement(label='space & cosmos', score=0.755190908908844),
+ ZeroShotClassificationOutputElement(label='microbiology', score=0.0005462635890580714),
+ ZeroShotClassificationOutputElement(label='archeology', score=0.00047131875180639327),
+ ZeroShotClassificationOutputElement(label='robots', score=0.00030448526376858354),
+ ]
+ ```
+
+ Example with `multi_label=True` and a custom `hypothesis_template`:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.zero_shot_classification(
+ ... text="I really like our dinner and I'm very happy. I don't like the weather though.",
+ ... labels=["positive", "negative", "pessimistic", "optimistic"],
+ ... multi_label=True,
+ ... hypothesis_template="This text is {} towards the weather"
+ ... )
+ [
+ ZeroShotClassificationOutputElement(label='negative', score=0.9231801629066467),
+ ZeroShotClassificationOutputElement(label='pessimistic', score=0.8760990500450134),
+ ZeroShotClassificationOutputElement(label='optimistic', score=0.0008674879791215062),
+ ZeroShotClassificationOutputElement(label='positive', score=0.0005250611575320363)
+ ]
+ ```
+ """
+ # handle deprecation
+ if labels is not None:
+ if candidate_labels is not None:
+ raise ValueError(
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
+ )
+ candidate_labels = labels
+ elif candidate_labels is None:
+ raise ValueError("Must specify `candidate_labels`")
+
+ provider_helper = get_provider_helper(self.provider, task="zero-shot-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=text,
+ parameters={
+ "candidate_labels": candidate_labels,
+ "multi_label": multi_label,
+ "hypothesis_template": hypothesis_template,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ output = _bytes_to_dict(response)
+ return [
+ ZeroShotClassificationOutputElement.parse_obj_as_instance({"label": label, "score": score})
+ for label, score in zip(output["labels"], output["scores"])
+ ]
+
+ @_deprecate_arguments(
+ version="0.30.0",
+ deprecated_args=["labels"],
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
+ )
+ async def zero_shot_image_classification(
+ self,
+ image: ContentT,
+ # temporarily keeping it optional for backward compatibility.
+ candidate_labels: List[str] = None, # type: ignore
+ *,
+ model: Optional[str] = None,
+ hypothesis_template: Optional[str] = None,
+ # deprecated argument
+ labels: List[str] = None, # type: ignore
+ ) -> List[ZeroShotImageClassificationOutputElement]:
+ """
+ Provide input image and text labels to predict text labels for the image.
+
+ Args:
+ image (`Union[str, Path, bytes, BinaryIO]`):
+ The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
+ candidate_labels (`List[str]`):
+ The candidate labels for this image
+ labels (`List[str]`, *optional*):
+ (deprecated) List of string possible labels. There must be at least 2 labels.
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
+ hypothesis_template (`str`, *optional*):
+ The sentence used in conjunction with `candidate_labels` to attempt the image classification by
+ replacing the placeholder with the candidate labels.
+
+ Returns:
+ `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
+
+ Raises:
+ [`InferenceTimeoutError`]:
+ If the model is unavailable or the request times out.
+ `aiohttp.ClientResponseError`:
+ If the request fails with an HTTP error status code other than HTTP 503.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+
+ >>> await client.zero_shot_image_classification(
+ ... "https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg",
+ ... labels=["dog", "cat", "horse"],
+ ... )
+ [ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
+ ```
+ """
+ # handle deprecation
+ if labels is not None:
+ if candidate_labels is not None:
+ raise ValueError(
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
+ )
+ candidate_labels = labels
+ elif candidate_labels is None:
+ raise ValueError("Must specify `candidate_labels`")
+ # Raise ValueError if input is less than 2 labels
+ if len(candidate_labels) < 2:
+ raise ValueError("You must specify at least 2 classes to compare.")
+
+ provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification")
+ request_parameters = provider_helper.prepare_request(
+ inputs=image,
+ parameters={
+ "candidate_labels": candidate_labels,
+ "hypothesis_template": hypothesis_template,
+ },
+ headers=self.headers,
+ model=model or self.model,
+ api_key=self.token,
+ )
+ response = await self._inner_post(request_parameters)
+ return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
+
+ @_deprecate_method(
+ version="0.33.0",
+ message=(
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
+ " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
+ ),
+ )
+ async def list_deployed_models(
+ self, frameworks: Union[None, str, Literal["all"], List[str]] = None
+ ) -> Dict[str, List[str]]:
+ """
+ List models deployed on the HF Serverless Inference API service.
+
+ This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
+ are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
+ specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
+ in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
+ frameworks are checked, the more time it will take.
+
+ <Tip warning={true}>
+
+ This endpoint method does not return a live list of all models available for the HF Inference API service.
+ It searches over a cached list of models that were recently available and the list may not be up to date.
+ If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
+
+ </Tip>
+
+ <Tip>
+
+ This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
+ check its availability, you can directly use [`~InferenceClient.get_model_status`].
+
+ </Tip>
+
+ Args:
+ frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
+ The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
+ "all", all available frameworks will be tested. It is also possible to provide a single framework or a
+ custom set of frameworks to check.
+
+ Returns:
+ `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
+
+ Example:
+ ```py
+ # Must be run in an async contextthon
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+
+ # Discover zero-shot-classification models currently deployed
+ >>> models = await client.list_deployed_models()
+ >>> models["zero-shot-classification"]
+ ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
+
+ # List from only 1 framework
+ >>> await client.list_deployed_models("text-generation-inference")
+ {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
+ ```
+ """
+ if self.provider != "hf-inference":
+ raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
+
+ # Resolve which frameworks to check
+ if frameworks is None:
+ frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
+ elif frameworks == "all":
+ frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
+ elif isinstance(frameworks, str):
+ frameworks = [frameworks]
+ frameworks = list(set(frameworks))
+
+ # Fetch them iteratively
+ models_by_task: Dict[str, List[str]] = {}
+
+ def _unpack_response(framework: str, items: List[Dict]) -> None:
+ for model in items:
+ if framework == "sentence-transformers":
+ # Model running with the `sentence-transformers` framework can work with both tasks even if not
+ # branded as such in the API response
+ models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
+ models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
+ else:
+ models_by_task.setdefault(model["task"], []).append(model["model_id"])
+
+ for framework in frameworks:
+ response = get_session().get(
+ f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
+ )
+ hf_raise_for_status(response)
+ _unpack_response(framework, response.json())
+
+ # Sort alphabetically for discoverability and return
+ for task, models in models_by_task.items():
+ models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
+ return models_by_task
+
+ def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
+ aiohttp = _import_aiohttp()
+ client_headers = self.headers.copy()
+ if headers is not None:
+ client_headers.update(headers)
+
+ # Return a new aiohttp ClientSession with correct settings.
+ session = aiohttp.ClientSession(
+ headers=client_headers,
+ cookies=self.cookies,
+ timeout=aiohttp.ClientTimeout(self.timeout),
+ trust_env=self.trust_env,
+ )
+
+ # Keep track of sessions to close them later
+ self._sessions[session] = set()
+
+ # Override the `._request` method to register responses to be closed
+ session._wrapped_request = session._request
+
+ async def _request(method, url, **kwargs):
+ response = await session._wrapped_request(method, url, **kwargs)
+ self._sessions[session].add(response)
+ return response
+
+ session._request = _request
+
+ # Override the 'close' method to
+ # 1. close ongoing responses
+ # 2. deregister the session when closed
+ session._close = session.close
+
+ async def close_session():
+ for response in self._sessions[session]:
+ response.close()
+ await session._close()
+ self._sessions.pop(session, None)
+
+ session.close = close_session
+ return session
+
+ async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
+ """
+ Get information about the deployed endpoint.
+
+ This endpoint is only available on endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
+ Endpoints powered by `transformers` return an empty payload.
+
+ Args:
+ model (`str`, *optional*):
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+
+ Returns:
+ `Dict[str, Any]`: Information about the endpoint.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
+ >>> await client.get_endpoint_info()
+ {
+ 'model_id': 'meta-llama/Meta-Llama-3-70B-Instruct',
+ 'model_sha': None,
+ 'model_dtype': 'torch.float16',
+ 'model_device_type': 'cuda',
+ 'model_pipeline_tag': None,
+ 'max_concurrent_requests': 128,
+ 'max_best_of': 2,
+ 'max_stop_sequences': 4,
+ 'max_input_length': 8191,
+ 'max_total_tokens': 8192,
+ 'waiting_served_ratio': 0.3,
+ 'max_batch_total_tokens': 1259392,
+ 'max_waiting_tokens': 20,
+ 'max_batch_size': None,
+ 'validation_workers': 32,
+ 'max_client_batch_size': 4,
+ 'version': '2.0.2',
+ 'sha': 'dccab72549635c7eb5ddb17f43f0b7cdff07c214',
+ 'docker_label': 'sha-dccab72'
+ }
+ ```
+ """
+ if self.provider != "hf-inference":
+ raise ValueError(f"Getting endpoint info is not supported on '{self.provider}'.")
+
+ model = model or self.model
+ if model is None:
+ raise ValueError("Model id not provided.")
+ if model.startswith(("http://", "https://")):
+ url = model.rstrip("/") + "/info"
+ else:
+ url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
+
+ async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
+ response = await client.get(url, proxy=self.proxies)
+ response.raise_for_status()
+ return await response.json()
+
+ async def health_check(self, model: Optional[str] = None) -> bool:
+ """
+ Check the health of the deployed endpoint.
+
+ Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
+ For Inference API, please use [`InferenceClient.get_model_status`] instead.
+
+ Args:
+ model (`str`, *optional*):
+ URL of the Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+
+ Returns:
+ `bool`: True if everything is working fine.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient("https://jzgu0buei5.us-east-1.aws.endpoints.huggingface.cloud")
+ >>> await client.health_check()
+ True
+ ```
+ """
+ if self.provider != "hf-inference":
+ raise ValueError(f"Health check is not supported on '{self.provider}'.")
+
+ model = model or self.model
+ if model is None:
+ raise ValueError("Model id not provided.")
+ if not model.startswith(("http://", "https://")):
+ raise ValueError(
+ "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
+ )
+ url = model.rstrip("/") + "/health"
+
+ async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
+ response = await client.get(url, proxy=self.proxies)
+ return response.status == 200
+
+ @_deprecate_method(
+ version="0.33.0",
+ message=(
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
+ " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
+ ),
+ )
+ async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
+ """
+ Get the status of a model hosted on the HF Inference API.
+
+ <Tip>
+
+ This endpoint is mostly useful when you already know which model you want to use and want to check its
+ availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
+
+ </Tip>
+
+ Args:
+ model (`str`, *optional*):
+ Identifier of the model for witch the status gonna be checked. If model is not provided,
+ the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
+ identifier cannot be a URL.
+
+
+ Returns:
+ [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
+ about the state of the model: load, state, compute type and framework.
+
+ Example:
+ ```py
+ # Must be run in an async context
+ >>> from huggingface_hub import AsyncInferenceClient
+ >>> client = AsyncInferenceClient()
+ >>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
+ ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
+ ```
+ """
+ if self.provider != "hf-inference":
+ raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
+
+ model = model or self.model
+ if model is None:
+ raise ValueError("Model id not provided.")
+ if model.startswith("https://"):
+ raise NotImplementedError("Model status is only available for Inference API endpoints.")
+ url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
+
+ async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
+ response = await client.get(url, proxy=self.proxies)
+ response.raise_for_status()
+ response_data = await response.json()
+
+ if "error" in response_data:
+ raise ValueError(response_data["error"])
+
+ return ModelStatus(
+ loaded=response_data["loaded"],
+ state=response_data["state"],
+ compute_type=response_data["compute_type"],
+ framework=response_data["framework"],
+ )
+
+ @property
+ def chat(self) -> "ProxyClientChat":
+ return ProxyClientChat(self)
+
+
+class _ProxyClient:
+ """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client."""
+
+ def __init__(self, client: AsyncInferenceClient):
+ self._client = client
+
+
+class ProxyClientChat(_ProxyClient):
+ """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client."""
+
+ @property
+ def completions(self) -> "ProxyClientChatCompletions":
+ return ProxyClientChatCompletions(self._client)
+
+
+class ProxyClientChatCompletions(_ProxyClient):
+ """Proxy class to be able to call `client.chat.completion.create(...)` as OpenAI client."""
+
+ @property
+ def create(self):
+ return self._client.chat_completion
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__init__.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__init__.py
new file mode 100644
index 00000000..edbc967c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/__init__.py
@@ -0,0 +1,187 @@
+# This file is auto-generated by `utils/generate_inference_types.py`.
+# Do not modify it manually.
+#
+# ruff: noqa: F401
+
+from .audio_classification import (
+ AudioClassificationInput,
+ AudioClassificationOutputElement,
+ AudioClassificationOutputTransform,
+ AudioClassificationParameters,
+)
+from .audio_to_audio import AudioToAudioInput, AudioToAudioOutputElement
+from .automatic_speech_recognition import (
+ AutomaticSpeechRecognitionEarlyStoppingEnum,
+ AutomaticSpeechRecognitionGenerationParameters,
+ AutomaticSpeechRecognitionInput,
+ AutomaticSpeechRecognitionOutput,
+ AutomaticSpeechRecognitionOutputChunk,
+ AutomaticSpeechRecognitionParameters,
+)
+from .base import BaseInferenceType
+from .chat_completion import (
+ ChatCompletionInput,
+ ChatCompletionInputFunctionDefinition,
+ ChatCompletionInputFunctionName,
+ ChatCompletionInputGrammarType,
+ ChatCompletionInputGrammarTypeType,
+ ChatCompletionInputMessage,
+ ChatCompletionInputMessageChunk,
+ ChatCompletionInputMessageChunkType,
+ ChatCompletionInputStreamOptions,
+ ChatCompletionInputTool,
+ ChatCompletionInputToolChoiceClass,
+ ChatCompletionInputToolChoiceEnum,
+ ChatCompletionInputURL,
+ ChatCompletionOutput,
+ ChatCompletionOutputComplete,
+ ChatCompletionOutputFunctionDefinition,
+ ChatCompletionOutputLogprob,
+ ChatCompletionOutputLogprobs,
+ ChatCompletionOutputMessage,
+ ChatCompletionOutputToolCall,
+ ChatCompletionOutputTopLogprob,
+ ChatCompletionOutputUsage,
+ ChatCompletionStreamOutput,
+ ChatCompletionStreamOutputChoice,
+ ChatCompletionStreamOutputDelta,
+ ChatCompletionStreamOutputDeltaToolCall,
+ ChatCompletionStreamOutputFunction,
+ ChatCompletionStreamOutputLogprob,
+ ChatCompletionStreamOutputLogprobs,
+ ChatCompletionStreamOutputTopLogprob,
+ ChatCompletionStreamOutputUsage,
+)
+from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
+from .document_question_answering import (
+ DocumentQuestionAnsweringInput,
+ DocumentQuestionAnsweringInputData,
+ DocumentQuestionAnsweringOutputElement,
+ DocumentQuestionAnsweringParameters,
+)
+from .feature_extraction import FeatureExtractionInput, FeatureExtractionInputTruncationDirection
+from .fill_mask import FillMaskInput, FillMaskOutputElement, FillMaskParameters
+from .image_classification import (
+ ImageClassificationInput,
+ ImageClassificationOutputElement,
+ ImageClassificationOutputTransform,
+ ImageClassificationParameters,
+)
+from .image_segmentation import (
+ ImageSegmentationInput,
+ ImageSegmentationOutputElement,
+ ImageSegmentationParameters,
+ ImageSegmentationSubtask,
+)
+from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize
+from .image_to_text import (
+ ImageToTextEarlyStoppingEnum,
+ ImageToTextGenerationParameters,
+ ImageToTextInput,
+ ImageToTextOutput,
+ ImageToTextParameters,
+)
+from .object_detection import (
+ ObjectDetectionBoundingBox,
+ ObjectDetectionInput,
+ ObjectDetectionOutputElement,
+ ObjectDetectionParameters,
+)
+from .question_answering import (
+ QuestionAnsweringInput,
+ QuestionAnsweringInputData,
+ QuestionAnsweringOutputElement,
+ QuestionAnsweringParameters,
+)
+from .sentence_similarity import SentenceSimilarityInput, SentenceSimilarityInputData
+from .summarization import (
+ SummarizationInput,
+ SummarizationOutput,
+ SummarizationParameters,
+ SummarizationTruncationStrategy,
+)
+from .table_question_answering import (
+ Padding,
+ TableQuestionAnsweringInput,
+ TableQuestionAnsweringInputData,
+ TableQuestionAnsweringOutputElement,
+ TableQuestionAnsweringParameters,
+)
+from .text2text_generation import (
+ Text2TextGenerationInput,
+ Text2TextGenerationOutput,
+ Text2TextGenerationParameters,
+ Text2TextGenerationTruncationStrategy,
+)
+from .text_classification import (
+ TextClassificationInput,
+ TextClassificationOutputElement,
+ TextClassificationOutputTransform,
+ TextClassificationParameters,
+)
+from .text_generation import (
+ TextGenerationInput,
+ TextGenerationInputGenerateParameters,
+ TextGenerationInputGrammarType,
+ TextGenerationOutput,
+ TextGenerationOutputBestOfSequence,
+ TextGenerationOutputDetails,
+ TextGenerationOutputFinishReason,
+ TextGenerationOutputPrefillToken,
+ TextGenerationOutputToken,
+ TextGenerationStreamOutput,
+ TextGenerationStreamOutputStreamDetails,
+ TextGenerationStreamOutputToken,
+ TypeEnum,
+)
+from .text_to_audio import (
+ TextToAudioEarlyStoppingEnum,
+ TextToAudioGenerationParameters,
+ TextToAudioInput,
+ TextToAudioOutput,
+ TextToAudioParameters,
+)
+from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
+from .text_to_speech import (
+ TextToSpeechEarlyStoppingEnum,
+ TextToSpeechGenerationParameters,
+ TextToSpeechInput,
+ TextToSpeechOutput,
+ TextToSpeechParameters,
+)
+from .text_to_video import TextToVideoInput, TextToVideoOutput, TextToVideoParameters
+from .token_classification import (
+ TokenClassificationAggregationStrategy,
+ TokenClassificationInput,
+ TokenClassificationOutputElement,
+ TokenClassificationParameters,
+)
+from .translation import TranslationInput, TranslationOutput, TranslationParameters, TranslationTruncationStrategy
+from .video_classification import (
+ VideoClassificationInput,
+ VideoClassificationOutputElement,
+ VideoClassificationOutputTransform,
+ VideoClassificationParameters,
+)
+from .visual_question_answering import (
+ VisualQuestionAnsweringInput,
+ VisualQuestionAnsweringInputData,
+ VisualQuestionAnsweringOutputElement,
+ VisualQuestionAnsweringParameters,
+)
+from .zero_shot_classification import (
+ ZeroShotClassificationInput,
+ ZeroShotClassificationOutputElement,
+ ZeroShotClassificationParameters,
+)
+from .zero_shot_image_classification import (
+ ZeroShotImageClassificationInput,
+ ZeroShotImageClassificationOutputElement,
+ ZeroShotImageClassificationParameters,
+)
+from .zero_shot_object_detection import (
+ ZeroShotObjectDetectionBoundingBox,
+ ZeroShotObjectDetectionInput,
+ ZeroShotObjectDetectionOutputElement,
+ ZeroShotObjectDetectionParameters,
+)
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py
new file mode 100644
index 00000000..05305578
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py
@@ -0,0 +1,43 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class AudioClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Audio Classification"""
+
+ function_to_apply: Optional["AudioClassificationOutputTransform"] = None
+ """The function to apply to the model outputs in order to retrieve the scores."""
+ top_k: Optional[int] = None
+ """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class AudioClassificationInput(BaseInferenceType):
+ """Inputs for Audio Classification inference"""
+
+ inputs: str
+ """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the audio data as a raw bytes payload.
+ """
+ parameters: Optional[AudioClassificationParameters] = None
+ """Additional inference parameters for Audio Classification"""
+
+
+@dataclass_with_extra
+class AudioClassificationOutputElement(BaseInferenceType):
+ """Outputs for Audio Classification inference"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py
new file mode 100644
index 00000000..43f376b5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py
@@ -0,0 +1,30 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class AudioToAudioInput(BaseInferenceType):
+ """Inputs for Audio to Audio inference"""
+
+ inputs: Any
+ """The input audio data"""
+
+
+@dataclass_with_extra
+class AudioToAudioOutputElement(BaseInferenceType):
+ """Outputs of inference for the Audio To Audio task
+ A generated audio file with its label.
+ """
+
+ blob: Any
+ """The generated audio file."""
+ content_type: str
+ """The content type of audio file."""
+ label: str
+ """The label of the audio file."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
new file mode 100644
index 00000000..1e885b6c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -0,0 +1,114 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+AutomaticSpeechRecognitionEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
+ """Parametrization of the text generation process"""
+
+ do_sample: Optional[bool] = None
+ """Whether to use sampling instead of greedy decoding when generating new tokens."""
+ early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
+ """Controls the stopping condition for beam-based methods."""
+ epsilon_cutoff: Optional[float] = None
+ """If set to float strictly between 0 and 1, only tokens with a conditional probability
+ greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+ 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+ Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ """
+ eta_cutoff: Optional[float] = None
+ """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+ float strictly between 0 and 1, a token is only considered if it is greater than either
+ eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+ term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+ the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+ See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+ for more details.
+ """
+ max_length: Optional[int] = None
+ """The maximum length (in tokens) of the generated text, including the input."""
+ max_new_tokens: Optional[int] = None
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
+ min_length: Optional[int] = None
+ """The minimum length (in tokens) of the generated text, including the input."""
+ min_new_tokens: Optional[int] = None
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
+ num_beam_groups: Optional[int] = None
+ """Number of groups to divide num_beams into in order to ensure diversity among different
+ groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+ """
+ num_beams: Optional[int] = None
+ """Number of beams to use for beam search."""
+ penalty_alpha: Optional[float] = None
+ """The value balances the model confidence and the degeneration penalty in contrastive
+ search decoding.
+ """
+ temperature: Optional[float] = None
+ """The value used to modulate the next token probabilities."""
+ top_k: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+ top_p: Optional[float] = None
+ """If set to float < 1, only the smallest set of most probable tokens with probabilities
+ that add up to top_p or higher are kept for generation.
+ """
+ typical_p: Optional[float] = None
+ """Local typicality measures how similar the conditional probability of predicting a target
+ token next is to the expected conditional probability of predicting a random token next,
+ given the partial text already generated. If set to float < 1, the smallest set of the
+ most locally typical tokens with probabilities that add up to typical_p or higher are
+ kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+ """
+ use_cache: Optional[bool] = None
+ """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionParameters(BaseInferenceType):
+ """Additional inference parameters for Automatic Speech Recognition"""
+
+ return_timestamps: Optional[bool] = None
+ """Whether to output corresponding timestamps with the generated text"""
+ # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+ generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+ """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionInput(BaseInferenceType):
+ """Inputs for Automatic Speech Recognition inference"""
+
+ inputs: str
+ """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the audio data as a raw bytes payload.
+ """
+ parameters: Optional[AutomaticSpeechRecognitionParameters] = None
+ """Additional inference parameters for Automatic Speech Recognition"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
+ text: str
+ """A chunk of text identified by the model"""
+ timestamp: List[float]
+ """The start and end timestamps corresponding with the text"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionOutput(BaseInferenceType):
+ """Outputs of inference for the Automatic Speech Recognition task"""
+
+ text: str
+ """The recognized text."""
+ chunks: Optional[List[AutomaticSpeechRecognitionOutputChunk]] = None
+ """When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
+ the model.
+ """
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/base.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/base.py
new file mode 100644
index 00000000..1f0c4687
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/base.py
@@ -0,0 +1,161 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a base class for all inference types."""
+
+import inspect
+import json
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Type, TypeVar, Union, get_args
+
+
+T = TypeVar("T", bound="BaseInferenceType")
+
+
+def _repr_with_extra(self):
+ fields = list(self.__dataclass_fields__.keys())
+ other_fields = list(k for k in self.__dict__ if k not in fields)
+ return f"{self.__class__.__name__}({', '.join(f'{k}={self.__dict__[k]!r}' for k in fields + other_fields)})"
+
+
+def dataclass_with_extra(cls: Type[T]) -> Type[T]:
+ """Decorator to add a custom __repr__ method to a dataclass, showing all fields, including extra ones.
+
+ This decorator only works with dataclasses that inherit from `BaseInferenceType`.
+ """
+ cls = dataclass(cls)
+ cls.__repr__ = _repr_with_extra # type: ignore[method-assign]
+ return cls
+
+
+@dataclass
+class BaseInferenceType(dict):
+ """Base class for all inference types.
+
+ Object is a dataclass and a dict for backward compatibility but plan is to remove the dict part in the future.
+
+ Handle parsing from dict, list and json strings in a permissive way to ensure future-compatibility (e.g. all fields
+ are made optional, and non-expected fields are added as dict attributes).
+ """
+
+ @classmethod
+ def parse_obj_as_list(cls: Type[T], data: Union[bytes, str, List, Dict]) -> List[T]:
+ """Alias to parse server response and return a single instance.
+
+ See `parse_obj` for more details.
+ """
+ output = cls.parse_obj(data)
+ if not isinstance(output, list):
+ raise ValueError(f"Invalid input data for {cls}. Expected a list, but got {type(output)}.")
+ return output
+
+ @classmethod
+ def parse_obj_as_instance(cls: Type[T], data: Union[bytes, str, List, Dict]) -> T:
+ """Alias to parse server response and return a single instance.
+
+ See `parse_obj` for more details.
+ """
+ output = cls.parse_obj(data)
+ if isinstance(output, list):
+ raise ValueError(f"Invalid input data for {cls}. Expected a single instance, but got a list.")
+ return output
+
+ @classmethod
+ def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T], T]:
+ """Parse server response as a dataclass or list of dataclasses.
+
+ To enable future-compatibility, we want to handle cases where the server return more fields than expected.
+ In such cases, we don't want to raise an error but still create the dataclass object. Remaining fields are
+ added as dict attributes.
+ """
+ # Parse server response (from bytes)
+ if isinstance(data, bytes):
+ data = data.decode()
+ if isinstance(data, str):
+ data = json.loads(data)
+
+ # If a list, parse each item individually
+ if isinstance(data, List):
+ return [cls.parse_obj(d) for d in data] # type: ignore [misc]
+
+ # At this point, we expect a dict
+ if not isinstance(data, dict):
+ raise ValueError(f"Invalid data type: {type(data)}")
+
+ init_values = {}
+ other_values = {}
+ for key, value in data.items():
+ key = normalize_key(key)
+ if key in cls.__dataclass_fields__ and cls.__dataclass_fields__[key].init:
+ if isinstance(value, dict) or isinstance(value, list):
+ field_type = cls.__dataclass_fields__[key].type
+
+ # if `field_type` is a `BaseInferenceType`, parse it
+ if inspect.isclass(field_type) and issubclass(field_type, BaseInferenceType):
+ value = field_type.parse_obj(value)
+
+ # otherwise, recursively parse nested dataclasses (if possible)
+ # `get_args` returns handle Union and Optional for us
+ else:
+ expected_types = get_args(field_type)
+ for expected_type in expected_types:
+ if getattr(expected_type, "_name", None) == "List":
+ expected_type = get_args(expected_type)[
+ 0
+ ] # assume same type for all items in the list
+ if inspect.isclass(expected_type) and issubclass(expected_type, BaseInferenceType):
+ value = expected_type.parse_obj(value)
+ break
+ init_values[key] = value
+ else:
+ other_values[key] = value
+
+ # Make all missing fields default to None
+ # => ensure that dataclass initialization will never fail even if the server does not return all fields.
+ for key in cls.__dataclass_fields__:
+ if key not in init_values:
+ init_values[key] = None
+
+ # Initialize dataclass with expected values
+ item = cls(**init_values)
+
+ # Add remaining fields as dict attributes
+ item.update(other_values)
+
+ # Add remaining fields as extra dataclass fields.
+ # They won't be part of the dataclass fields but will be accessible as attributes.
+ # Use @dataclass_with_extra to show them in __repr__.
+ item.__dict__.update(other_values)
+ return item
+
+ def __post_init__(self):
+ self.update(asdict(self))
+
+ def __setitem__(self, __key: Any, __value: Any) -> None:
+ # Hacky way to keep dataclass values in sync when dict is updated
+ super().__setitem__(__key, __value)
+ if __key in self.__dataclass_fields__ and getattr(self, __key, None) != __value:
+ self.__setattr__(__key, __value)
+ return
+
+ def __setattr__(self, __name: str, __value: Any) -> None:
+ # Hacky way to keep dict values is sync when dataclass is updated
+ super().__setattr__(__name, __value)
+ if self.get(__name) != __value:
+ self[__name] = __value
+ return
+
+
+def normalize_key(key: str) -> str:
+ # e.g "content-type" -> "content_type", "Accept" -> "accept"
+ return key.replace("-", "_").replace(" ", "_").lower()
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py
new file mode 100644
index 00000000..a817d3f8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -0,0 +1,301 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ChatCompletionInputURL(BaseInferenceType):
+ url: str
+
+
+ChatCompletionInputMessageChunkType = Literal["text", "image_url"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputMessageChunk(BaseInferenceType):
+ type: "ChatCompletionInputMessageChunkType"
+ image_url: Optional[ChatCompletionInputURL] = None
+ text: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionInputMessage(BaseInferenceType):
+ content: Union[List[ChatCompletionInputMessageChunk], str]
+ role: str
+ name: Optional[str] = None
+
+
+ChatCompletionInputGrammarTypeType = Literal["json", "regex"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputGrammarType(BaseInferenceType):
+ type: "ChatCompletionInputGrammarTypeType"
+ value: Any
+ """A string that represents a [JSON Schema](https://json-schema.org/).
+ JSON Schema is a declarative language that allows to annotate JSON documents
+ with types and descriptions.
+ """
+
+
+@dataclass_with_extra
+class ChatCompletionInputStreamOptions(BaseInferenceType):
+ include_usage: bool
+ """If set, an additional chunk will be streamed before the data: [DONE] message. The usage
+ field on this chunk shows the token usage statistics for the entire request, and the
+ choices field will always be an empty array. All other chunks will also include a usage
+ field, but with a null value.
+ """
+
+
+@dataclass_with_extra
+class ChatCompletionInputFunctionName(BaseInferenceType):
+ name: str
+
+
+@dataclass_with_extra
+class ChatCompletionInputToolChoiceClass(BaseInferenceType):
+ function: ChatCompletionInputFunctionName
+
+
+ChatCompletionInputToolChoiceEnum = Literal["auto", "none", "required"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputFunctionDefinition(BaseInferenceType):
+ arguments: Any
+ name: str
+ description: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionInputTool(BaseInferenceType):
+ function: ChatCompletionInputFunctionDefinition
+ type: str
+
+
+@dataclass_with_extra
+class ChatCompletionInput(BaseInferenceType):
+ """Chat Completion Input.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ messages: List[ChatCompletionInputMessage]
+ """A list of messages comprising the conversation so far."""
+ frequency_penalty: Optional[float] = None
+ """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+ frequency in the text so far,
+ decreasing the model's likelihood to repeat the same line verbatim.
+ """
+ logit_bias: Optional[List[float]] = None
+ """UNUSED
+ Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
+ object that maps tokens
+ (specified by their token ID in the tokenizer) to an associated bias value from -100 to
+ 100. Mathematically,
+ the bias is added to the logits generated by the model prior to sampling. The exact
+ effect will vary per model,
+ but values between -1 and 1 should decrease or increase likelihood of selection; values
+ like -100 or 100 should
+ result in a ban or exclusive selection of the relevant token.
+ """
+ logprobs: Optional[bool] = None
+ """Whether to return log probabilities of the output tokens or not. If true, returns the log
+ probabilities of each
+ output token returned in the content of message.
+ """
+ max_tokens: Optional[int] = None
+ """The maximum number of tokens that can be generated in the chat completion."""
+ model: Optional[str] = None
+ """[UNUSED] ID of the model to use. See the model endpoint compatibility table for details
+ on which models work with the Chat API.
+ """
+ n: Optional[int] = None
+ """UNUSED
+ How many chat completion choices to generate for each input message. Note that you will
+ be charged based on the
+ number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+ """
+ presence_penalty: Optional[float] = None
+ """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
+ appear in the text so far,
+ increasing the model's likelihood to talk about new topics
+ """
+ response_format: Optional[ChatCompletionInputGrammarType] = None
+ seed: Optional[int] = None
+ stop: Optional[List[str]] = None
+ """Up to 4 sequences where the API will stop generating further tokens."""
+ stream: Optional[bool] = None
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None
+ temperature: Optional[float] = None
+ """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
+ output more random, while
+ lower values like 0.2 will make it more focused and deterministic.
+ We generally recommend altering this or `top_p` but not both.
+ """
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None
+ tool_prompt: Optional[str] = None
+ """A prompt to be appended before the tools"""
+ tools: Optional[List[ChatCompletionInputTool]] = None
+ """A list of tools the model may call. Currently, only functions are supported as a tool.
+ Use this to provide a list of
+ functions the model may generate JSON inputs for.
+ """
+ top_logprobs: Optional[int] = None
+ """An integer between 0 and 5 specifying the number of most likely tokens to return at each
+ token position, each with
+ an associated log probability. logprobs must be set to true if this parameter is used.
+ """
+ top_p: Optional[float] = None
+ """An alternative to sampling with temperature, called nucleus sampling, where the model
+ considers the results of the
+ tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+ probability mass are considered.
+ """
+
+
+@dataclass_with_extra
+class ChatCompletionOutputTopLogprob(BaseInferenceType):
+ logprob: float
+ token: str
+
+
+@dataclass_with_extra
+class ChatCompletionOutputLogprob(BaseInferenceType):
+ logprob: float
+ token: str
+ top_logprobs: List[ChatCompletionOutputTopLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionOutputLogprobs(BaseInferenceType):
+ content: List[ChatCompletionOutputLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionOutputFunctionDefinition(BaseInferenceType):
+ arguments: Any
+ name: str
+ description: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputToolCall(BaseInferenceType):
+ function: ChatCompletionOutputFunctionDefinition
+ id: str
+ type: str
+
+
+@dataclass_with_extra
+class ChatCompletionOutputMessage(BaseInferenceType):
+ role: str
+ content: Optional[str] = None
+ tool_calls: Optional[List[ChatCompletionOutputToolCall]] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputComplete(BaseInferenceType):
+ finish_reason: str
+ index: int
+ message: ChatCompletionOutputMessage
+ logprobs: Optional[ChatCompletionOutputLogprobs] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputUsage(BaseInferenceType):
+ completion_tokens: int
+ prompt_tokens: int
+ total_tokens: int
+
+
+@dataclass_with_extra
+class ChatCompletionOutput(BaseInferenceType):
+ """Chat Completion Output.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ choices: List[ChatCompletionOutputComplete]
+ created: int
+ id: str
+ model: str
+ system_fingerprint: str
+ usage: ChatCompletionOutputUsage
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputFunction(BaseInferenceType):
+ arguments: str
+ name: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputDeltaToolCall(BaseInferenceType):
+ function: ChatCompletionStreamOutputFunction
+ id: str
+ index: int
+ type: str
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputDelta(BaseInferenceType):
+ role: str
+ content: Optional[str] = None
+ tool_calls: Optional[ChatCompletionStreamOutputDeltaToolCall] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputTopLogprob(BaseInferenceType):
+ logprob: float
+ token: str
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputLogprob(BaseInferenceType):
+ logprob: float
+ token: str
+ top_logprobs: List[ChatCompletionStreamOutputTopLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputLogprobs(BaseInferenceType):
+ content: List[ChatCompletionStreamOutputLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputChoice(BaseInferenceType):
+ delta: ChatCompletionStreamOutputDelta
+ index: int
+ finish_reason: Optional[str] = None
+ logprobs: Optional[ChatCompletionStreamOutputLogprobs] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputUsage(BaseInferenceType):
+ completion_tokens: int
+ prompt_tokens: int
+ total_tokens: int
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutput(BaseInferenceType):
+ """Chat Completion Stream Output.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ choices: List[ChatCompletionStreamOutputChoice]
+ created: int
+ id: str
+ model: str
+ system_fingerprint: str
+ usage: Optional[ChatCompletionStreamOutputUsage] = None
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py
new file mode 100644
index 00000000..1e09bdff
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py
@@ -0,0 +1,28 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class DepthEstimationInput(BaseInferenceType):
+ """Inputs for Depth Estimation inference"""
+
+ inputs: Any
+ """The input image data"""
+ parameters: Optional[Dict[str, Any]] = None
+ """Additional inference parameters for Depth Estimation"""
+
+
+@dataclass_with_extra
+class DepthEstimationOutput(BaseInferenceType):
+ """Outputs of inference for the Depth Estimation task"""
+
+ depth: Any
+ """The predicted depth as an image"""
+ predicted_depth: Any
+ """The predicted depth as a tensor"""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py
new file mode 100644
index 00000000..2457d2c8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -0,0 +1,80 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringInputData(BaseInferenceType):
+ """One (document, question) pair to answer"""
+
+ image: Any
+ """The image on which the question is asked"""
+ question: str
+ """A question to ask of the document"""
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringParameters(BaseInferenceType):
+ """Additional inference parameters for Document Question Answering"""
+
+ doc_stride: Optional[int] = None
+ """If the words in the document are too long to fit with the question for the model, it will
+ be split in several chunks with some overlap. This argument controls the size of that
+ overlap.
+ """
+ handle_impossible_answer: Optional[bool] = None
+ """Whether to accept impossible as an answer"""
+ lang: Optional[str] = None
+ """Language to use while running OCR. Defaults to english."""
+ max_answer_len: Optional[int] = None
+ """The maximum length of predicted answers (e.g., only answers with a shorter length are
+ considered).
+ """
+ max_question_len: Optional[int] = None
+ """The maximum length of the question after tokenization. It will be truncated if needed."""
+ max_seq_len: Optional[int] = None
+ """The maximum length of the total sentence (context + question) in tokens of each chunk
+ passed to the model. The context will be split in several chunks (using doc_stride as
+ overlap) if needed.
+ """
+ top_k: Optional[int] = None
+ """The number of answers to return (will be chosen by order of likelihood). Can return less
+ than top_k answers if there are not enough options available within the context.
+ """
+ word_boxes: Optional[List[Union[List[float], str]]] = None
+ """A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
+ skip the OCR step and use the provided bounding boxes instead.
+ """
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringInput(BaseInferenceType):
+ """Inputs for Document Question Answering inference"""
+
+ inputs: DocumentQuestionAnsweringInputData
+ """One (document, question) pair to answer"""
+ parameters: Optional[DocumentQuestionAnsweringParameters] = None
+ """Additional inference parameters for Document Question Answering"""
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
+ """Outputs of inference for the Document Question Answering task"""
+
+ answer: str
+ """The answer to the question."""
+ end: int
+ """The end word index of the answer (in the OCR’d version of the input or provided word
+ boxes).
+ """
+ score: float
+ """The probability associated to the answer."""
+ start: int
+ """The start word index of the answer (in the OCR’d version of the input or provided word
+ boxes).
+ """
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py
new file mode 100644
index 00000000..e965ddba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -0,0 +1,36 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+FeatureExtractionInputTruncationDirection = Literal["Left", "Right"]
+
+
+@dataclass_with_extra
+class FeatureExtractionInput(BaseInferenceType):
+ """Feature Extraction Input.
+ Auto-generated from TEI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
+ """
+
+ inputs: Union[List[str], str]
+ """The text or list of texts to embed."""
+ normalize: Optional[bool] = None
+ prompt_name: Optional[str] = None
+ """The name of the prompt that should be used by for encoding. If not set, no prompt
+ will be applied.
+ Must be a key in the `sentence-transformers` configuration `prompts` dictionary.
+ For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",
+ ...},
+ then the sentence "What is the capital of France?" will be encoded as
+ "query: What is the capital of France?" because the prompt text will be prepended before
+ any text to encode.
+ """
+ truncate: Optional[bool] = None
+ truncation_direction: Optional["FeatureExtractionInputTruncationDirection"] = None
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py
new file mode 100644
index 00000000..dfcdc56b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py
@@ -0,0 +1,47 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class FillMaskParameters(BaseInferenceType):
+ """Additional inference parameters for Fill Mask"""
+
+ targets: Optional[List[str]] = None
+ """When passed, the model will limit the scores to the passed targets instead of looking up
+ in the whole vocabulary. If the provided targets are not in the model vocab, they will be
+ tokenized and the first resulting token will be used (with a warning, and that might be
+ slower).
+ """
+ top_k: Optional[int] = None
+ """When passed, overrides the number of predictions to return."""
+
+
+@dataclass_with_extra
+class FillMaskInput(BaseInferenceType):
+ """Inputs for Fill Mask inference"""
+
+ inputs: str
+ """The text with masked tokens"""
+ parameters: Optional[FillMaskParameters] = None
+ """Additional inference parameters for Fill Mask"""
+
+
+@dataclass_with_extra
+class FillMaskOutputElement(BaseInferenceType):
+ """Outputs of inference for the Fill Mask task"""
+
+ score: float
+ """The corresponding probability"""
+ sequence: str
+ """The corresponding input with the mask token prediction."""
+ token: int
+ """The predicted token id (to replace the masked one)."""
+ token_str: Any
+ fill_mask_output_token_str: Optional[str] = None
+ """The predicted token (to replace the masked one)."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_classification.py
new file mode 100644
index 00000000..0fdda6c8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_classification.py
@@ -0,0 +1,43 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class ImageClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Image Classification"""
+
+ function_to_apply: Optional["ImageClassificationOutputTransform"] = None
+ """The function to apply to the model outputs in order to retrieve the scores."""
+ top_k: Optional[int] = None
+ """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class ImageClassificationInput(BaseInferenceType):
+ """Inputs for Image Classification inference"""
+
+ inputs: str
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the image data as a raw bytes payload.
+ """
+ parameters: Optional[ImageClassificationParameters] = None
+ """Additional inference parameters for Image Classification"""
+
+
+@dataclass_with_extra
+class ImageClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Image Classification task"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py
new file mode 100644
index 00000000..3dbf61db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py
@@ -0,0 +1,51 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageSegmentationSubtask = Literal["instance", "panoptic", "semantic"]
+
+
+@dataclass_with_extra
+class ImageSegmentationParameters(BaseInferenceType):
+ """Additional inference parameters for Image Segmentation"""
+
+ mask_threshold: Optional[float] = None
+ """Threshold to use when turning the predicted masks into binary values."""
+ overlap_mask_area_threshold: Optional[float] = None
+ """Mask overlap threshold to eliminate small, disconnected segments."""
+ subtask: Optional["ImageSegmentationSubtask"] = None
+ """Segmentation task to be performed, depending on model capabilities."""
+ threshold: Optional[float] = None
+ """Probability threshold to filter out predicted masks."""
+
+
+@dataclass_with_extra
+class ImageSegmentationInput(BaseInferenceType):
+ """Inputs for Image Segmentation inference"""
+
+ inputs: str
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the image data as a raw bytes payload.
+ """
+ parameters: Optional[ImageSegmentationParameters] = None
+ """Additional inference parameters for Image Segmentation"""
+
+
+@dataclass_with_extra
+class ImageSegmentationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Image Segmentation task
+ A predicted mask / segment
+ """
+
+ label: str
+ """The label of the predicted segment."""
+ mask: str
+ """The corresponding mask as a black-and-white image (base64-encoded)."""
+ score: Optional[float] = None
+ """The score or confidence degree the model has."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py
new file mode 100644
index 00000000..0deb0374
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py
@@ -0,0 +1,54 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ImageToImageTargetSize(BaseInferenceType):
+ """The size in pixel of the output image."""
+
+ height: int
+ width: int
+
+
+@dataclass_with_extra
+class ImageToImageParameters(BaseInferenceType):
+ """Additional inference parameters for Image To Image"""
+
+ guidance_scale: Optional[float] = None
+ """For diffusion models. A higher guidance scale value encourages the model to generate
+ images closely linked to the text prompt at the expense of lower image quality.
+ """
+ negative_prompt: Optional[str] = None
+ """One prompt to guide what NOT to include in image generation."""
+ num_inference_steps: Optional[int] = None
+ """For diffusion models. The number of denoising steps. More denoising steps usually lead to
+ a higher quality image at the expense of slower inference.
+ """
+ target_size: Optional[ImageToImageTargetSize] = None
+ """The size in pixel of the output image."""
+
+
+@dataclass_with_extra
+class ImageToImageInput(BaseInferenceType):
+ """Inputs for Image To Image inference"""
+
+ inputs: str
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the image data as a raw bytes payload.
+ """
+ parameters: Optional[ImageToImageParameters] = None
+ """Additional inference parameters for Image To Image"""
+
+
+@dataclass_with_extra
+class ImageToImageOutput(BaseInferenceType):
+ """Outputs of inference for the Image To Image task"""
+
+ image: Any
+ """The output image returned as raw bytes in the payload."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py
new file mode 100644
index 00000000..550d95e1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -0,0 +1,101 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageToTextEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class ImageToTextGenerationParameters(BaseInferenceType):
+ """Parametrization of the text generation process"""
+
+ do_sample: Optional[bool] = None
+ """Whether to use sampling instead of greedy decoding when generating new tokens."""
+ early_stopping: Optional[Union[bool, "ImageToTextEarlyStoppingEnum"]] = None
+ """Controls the stopping condition for beam-based methods."""
+ epsilon_cutoff: Optional[float] = None
+ """If set to float strictly between 0 and 1, only tokens with a conditional probability
+ greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+ 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+ Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ """
+ eta_cutoff: Optional[float] = None
+ """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+ float strictly between 0 and 1, a token is only considered if it is greater than either
+ eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+ term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+ the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+ See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+ for more details.
+ """
+ max_length: Optional[int] = None
+ """The maximum length (in tokens) of the generated text, including the input."""
+ max_new_tokens: Optional[int] = None
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
+ min_length: Optional[int] = None
+ """The minimum length (in tokens) of the generated text, including the input."""
+ min_new_tokens: Optional[int] = None
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
+ num_beam_groups: Optional[int] = None
+ """Number of groups to divide num_beams into in order to ensure diversity among different
+ groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+ """
+ num_beams: Optional[int] = None
+ """Number of beams to use for beam search."""
+ penalty_alpha: Optional[float] = None
+ """The value balances the model confidence and the degeneration penalty in contrastive
+ search decoding.
+ """
+ temperature: Optional[float] = None
+ """The value used to modulate the next token probabilities."""
+ top_k: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+ top_p: Optional[float] = None
+ """If set to float < 1, only the smallest set of most probable tokens with probabilities
+ that add up to top_p or higher are kept for generation.
+ """
+ typical_p: Optional[float] = None
+ """Local typicality measures how similar the conditional probability of predicting a target
+ token next is to the expected conditional probability of predicting a random token next,
+ given the partial text already generated. If set to float < 1, the smallest set of the
+ most locally typical tokens with probabilities that add up to typical_p or higher are
+ kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+ """
+ use_cache: Optional[bool] = None
+ """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class ImageToTextParameters(BaseInferenceType):
+ """Additional inference parameters for Image To Text"""
+
+ max_new_tokens: Optional[int] = None
+ """The amount of maximum tokens to generate."""
+ # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+ generate_kwargs: Optional[ImageToTextGenerationParameters] = None
+ """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class ImageToTextInput(BaseInferenceType):
+ """Inputs for Image To Text inference"""
+
+ inputs: Any
+ """The input image data"""
+ parameters: Optional[ImageToTextParameters] = None
+ """Additional inference parameters for Image To Text"""
+
+
+@dataclass_with_extra
+class ImageToTextOutput(BaseInferenceType):
+ """Outputs of inference for the Image To Text task"""
+
+ generated_text: Any
+ image_to_text_output_generated_text: Optional[str] = None
+ """The generated text."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/object_detection.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/object_detection.py
new file mode 100644
index 00000000..75f3ebcf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/object_detection.py
@@ -0,0 +1,58 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ObjectDetectionParameters(BaseInferenceType):
+ """Additional inference parameters for Object Detection"""
+
+ threshold: Optional[float] = None
+ """The probability necessary to make a prediction."""
+
+
+@dataclass_with_extra
+class ObjectDetectionInput(BaseInferenceType):
+ """Inputs for Object Detection inference"""
+
+ inputs: str
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+ also provide the image data as a raw bytes payload.
+ """
+ parameters: Optional[ObjectDetectionParameters] = None
+ """Additional inference parameters for Object Detection"""
+
+
+@dataclass_with_extra
+class ObjectDetectionBoundingBox(BaseInferenceType):
+ """The predicted bounding box. Coordinates are relative to the top left corner of the input
+ image.
+ """
+
+ xmax: int
+ """The x-coordinate of the bottom-right corner of the bounding box."""
+ xmin: int
+ """The x-coordinate of the top-left corner of the bounding box."""
+ ymax: int
+ """The y-coordinate of the bottom-right corner of the bounding box."""
+ ymin: int
+ """The y-coordinate of the top-left corner of the bounding box."""
+
+
+@dataclass_with_extra
+class ObjectDetectionOutputElement(BaseInferenceType):
+ """Outputs of inference for the Object Detection task"""
+
+ box: ObjectDetectionBoundingBox
+ """The predicted bounding box. Coordinates are relative to the top left corner of the input
+ image.
+ """
+ label: str
+ """The predicted label for the bounding box."""
+ score: float
+ """The associated score / probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/question_answering.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/question_answering.py
new file mode 100644
index 00000000..014ab418
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/question_answering.py
@@ -0,0 +1,74 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class QuestionAnsweringInputData(BaseInferenceType):
+ """One (context, question) pair to answer"""
+
+ context: str
+ """The context to be used for answering the question"""
+ question: str
+ """The question to be answered"""
+
+
+@dataclass_with_extra
+class QuestionAnsweringParameters(BaseInferenceType):
+ """Additional inference parameters for Question Answering"""
+
+ align_to_words: Optional[bool] = None
+ """Attempts to align the answer to real words. Improves quality on space separated
+ languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
+ """
+ doc_stride: Optional[int] = None
+ """If the context is too long to fit with the question for the model, it will be split in
+ several chunks with some overlap. This argument controls the size of that overlap.
+ """
+ handle_impossible_answer: Optional[bool] = None
+ """Whether to accept impossible as an answer."""
+ max_answer_len: Optional[int] = None
+ """The maximum length of predicted answers (e.g., only answers with a shorter length are
+ considered).
+ """
+ max_question_len: Optional[int] = None
+ """The maximum length of the question after tokenization. It will be truncated if needed."""
+ max_seq_len: Optional[int] = None
+ """The maximum length of the total sentence (context + question) in tokens of each chunk
+ passed to the model. The context will be split in several chunks (using docStride as
+ overlap) if needed.
+ """
+ top_k: Optional[int] = None
+ """The number of answers to return (will be chosen by order of likelihood). Note that we
+ return less than topk answers if there are not enough options available within the
+ context.
+ """
+
+
+@dataclass_with_extra
+class QuestionAnsweringInput(BaseInferenceType):
+ """Inputs for Question Answering inference"""
+
+ inputs: QuestionAnsweringInputData
+ """One (context, question) pair to answer"""
+ parameters: Optional[QuestionAnsweringParameters] = None
+ """Additional inference parameters for Question Answering"""
+
+
+@dataclass_with_extra
+class QuestionAnsweringOutputElement(BaseInferenceType):
+ """Outputs of inference for the Question Answering task"""
+
+ answer: str
+ """The answer to the question."""
+ end: int
+ """The character position in the input where the answer ends."""
+ score: float
+ """The probability associated to the answer."""
+ start: int
+ """The character position in the input where the answer begins."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py
new file mode 100644
index 00000000..66e8bb4d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py
@@ -0,0 +1,27 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class SentenceSimilarityInputData(BaseInferenceType):
+ sentences: List[str]
+ """A list of strings which will be compared against the source_sentence."""
+ source_sentence: str
+ """The string that you wish to compare the other strings with. This can be a phrase,
+ sentence, or longer passage, depending on the model being used.
+ """
+
+
+@dataclass_with_extra
+class SentenceSimilarityInput(BaseInferenceType):
+ """Inputs for Sentence similarity inference"""
+
+ inputs: SentenceSimilarityInputData
+ parameters: Optional[Dict[str, Any]] = None
+ """Additional inference parameters for Sentence Similarity"""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/summarization.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/summarization.py
new file mode 100644
index 00000000..33eae6fc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/summarization.py
@@ -0,0 +1,41 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+SummarizationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class SummarizationParameters(BaseInferenceType):
+ """Additional inference parameters for summarization."""
+
+ clean_up_tokenization_spaces: Optional[bool] = None
+ """Whether to clean up the potential extra spaces in the text output."""
+ generate_parameters: Optional[Dict[str, Any]] = None
+ """Additional parametrization of the text generation algorithm."""
+ truncation: Optional["SummarizationTruncationStrategy"] = None
+ """The truncation strategy to use."""
+
+
+@dataclass_with_extra
+class SummarizationInput(BaseInferenceType):
+ """Inputs for Summarization inference"""
+
+ inputs: str
+ """The input text to summarize."""
+ parameters: Optional[SummarizationParameters] = None
+ """Additional inference parameters for summarization."""
+
+
+@dataclass_with_extra
+class SummarizationOutput(BaseInferenceType):
+ """Outputs of inference for the Summarization task"""
+
+ summary_text: str
+ """The summarized text."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py
new file mode 100644
index 00000000..10e208ee
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py
@@ -0,0 +1,62 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Dict, List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringInputData(BaseInferenceType):
+ """One (table, question) pair to answer"""
+
+ question: str
+ """The question to be answered about the table"""
+ table: Dict[str, List[str]]
+ """The table to serve as context for the questions"""
+
+
+Padding = Literal["do_not_pad", "longest", "max_length"]
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringParameters(BaseInferenceType):
+ """Additional inference parameters for Table Question Answering"""
+
+ padding: Optional["Padding"] = None
+ """Activates and controls padding."""
+ sequential: Optional[bool] = None
+ """Whether to do inference sequentially or as a batch. Batching is faster, but models like
+ SQA require the inference to be done sequentially to extract relations within sequences,
+ given their conversational nature.
+ """
+ truncation: Optional[bool] = None
+ """Activates and controls truncation."""
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringInput(BaseInferenceType):
+ """Inputs for Table Question Answering inference"""
+
+ inputs: TableQuestionAnsweringInputData
+ """One (table, question) pair to answer"""
+ parameters: Optional[TableQuestionAnsweringParameters] = None
+ """Additional inference parameters for Table Question Answering"""
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringOutputElement(BaseInferenceType):
+ """Outputs of inference for the Table Question Answering task"""
+
+ answer: str
+ """The answer of the question given the table. If there is an aggregator, the answer will be
+ preceded by `AGGREGATOR >`.
+ """
+ cells: List[str]
+ """List of strings made up of the answer cell values."""
+ coordinates: List[List[int]]
+ """Coordinates of the cells of the answers."""
+ aggregator: Optional[str] = None
+ """If the model has an aggregator, this returns the aggregator."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py
new file mode 100644
index 00000000..34ac74e2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py
@@ -0,0 +1,42 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+Text2TextGenerationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class Text2TextGenerationParameters(BaseInferenceType):
+ """Additional inference parameters for Text2text Generation"""
+
+ clean_up_tokenization_spaces: Optional[bool] = None
+ """Whether to clean up the potential extra spaces in the text output."""
+ generate_parameters: Optional[Dict[str, Any]] = None
+ """Additional parametrization of the text generation algorithm"""
+ truncation: Optional["Text2TextGenerationTruncationStrategy"] = None
+ """The truncation strategy to use"""
+
+
+@dataclass_with_extra
+class Text2TextGenerationInput(BaseInferenceType):
+ """Inputs for Text2text Generation inference"""
+
+ inputs: str
+ """The input text data"""
+ parameters: Optional[Text2TextGenerationParameters] = None
+ """Additional inference parameters for Text2text Generation"""
+
+
+@dataclass_with_extra
+class Text2TextGenerationOutput(BaseInferenceType):
+ """Outputs of inference for the Text2text Generation task"""
+
+ generated_text: Any
+ text2_text_generation_output_generated_text: Optional[str] = None
+ """The generated text."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_classification.py
new file mode 100644
index 00000000..9a172b23
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_classification.py
@@ -0,0 +1,41 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class TextClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Text Classification"""
+
+ function_to_apply: Optional["TextClassificationOutputTransform"] = None
+ """The function to apply to the model outputs in order to retrieve the scores."""
+ top_k: Optional[int] = None
+ """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class TextClassificationInput(BaseInferenceType):
+ """Inputs for Text Classification inference"""
+
+ inputs: str
+ """The text to classify"""
+ parameters: Optional[TextClassificationParameters] = None
+ """Additional inference parameters for Text Classification"""
+
+
+@dataclass_with_extra
+class TextClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Text Classification task"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_generation.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_generation.py
new file mode 100644
index 00000000..7ad5c039
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_generation.py
@@ -0,0 +1,168 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TypeEnum = Literal["json", "regex"]
+
+
+@dataclass_with_extra
+class TextGenerationInputGrammarType(BaseInferenceType):
+ type: "TypeEnum"
+ value: Any
+ """A string that represents a [JSON Schema](https://json-schema.org/).
+ JSON Schema is a declarative language that allows to annotate JSON documents
+ with types and descriptions.
+ """
+
+
+@dataclass_with_extra
+class TextGenerationInputGenerateParameters(BaseInferenceType):
+ adapter_id: Optional[str] = None
+ """Lora adapter id"""
+ best_of: Optional[int] = None
+ """Generate best_of sequences and return the one if the highest token logprobs."""
+ decoder_input_details: Optional[bool] = None
+ """Whether to return decoder input token logprobs and ids."""
+ details: Optional[bool] = None
+ """Whether to return generation details."""
+ do_sample: Optional[bool] = None
+ """Activate logits sampling."""
+ frequency_penalty: Optional[float] = None
+ """The parameter for frequency penalty. 1.0 means no penalty
+ Penalize new tokens based on their existing frequency in the text so far,
+ decreasing the model's likelihood to repeat the same line verbatim.
+ """
+ grammar: Optional[TextGenerationInputGrammarType] = None
+ max_new_tokens: Optional[int] = None
+ """Maximum number of tokens to generate."""
+ repetition_penalty: Optional[float] = None
+ """The parameter for repetition penalty. 1.0 means no penalty.
+ See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+ """
+ return_full_text: Optional[bool] = None
+ """Whether to prepend the prompt to the generated text"""
+ seed: Optional[int] = None
+ """Random sampling seed."""
+ stop: Optional[List[str]] = None
+ """Stop generating tokens if a member of `stop` is generated."""
+ temperature: Optional[float] = None
+ """The value used to module the logits distribution."""
+ top_k: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+ top_n_tokens: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-n-filtering."""
+ top_p: Optional[float] = None
+ """Top-p value for nucleus sampling."""
+ truncate: Optional[int] = None
+ """Truncate inputs tokens to the given size."""
+ typical_p: Optional[float] = None
+ """Typical Decoding mass
+ See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666)
+ for more information.
+ """
+ watermark: Optional[bool] = None
+ """Watermarking with [A Watermark for Large Language
+ Models](https://arxiv.org/abs/2301.10226).
+ """
+
+
+@dataclass_with_extra
+class TextGenerationInput(BaseInferenceType):
+ """Text Generation Input.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ inputs: str
+ parameters: Optional[TextGenerationInputGenerateParameters] = None
+ stream: Optional[bool] = None
+
+
+TextGenerationOutputFinishReason = Literal["length", "eos_token", "stop_sequence"]
+
+
+@dataclass_with_extra
+class TextGenerationOutputPrefillToken(BaseInferenceType):
+ id: int
+ logprob: float
+ text: str
+
+
+@dataclass_with_extra
+class TextGenerationOutputToken(BaseInferenceType):
+ id: int
+ logprob: float
+ special: bool
+ text: str
+
+
+@dataclass_with_extra
+class TextGenerationOutputBestOfSequence(BaseInferenceType):
+ finish_reason: "TextGenerationOutputFinishReason"
+ generated_text: str
+ generated_tokens: int
+ prefill: List[TextGenerationOutputPrefillToken]
+ tokens: List[TextGenerationOutputToken]
+ seed: Optional[int] = None
+ top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+
+
+@dataclass_with_extra
+class TextGenerationOutputDetails(BaseInferenceType):
+ finish_reason: "TextGenerationOutputFinishReason"
+ generated_tokens: int
+ prefill: List[TextGenerationOutputPrefillToken]
+ tokens: List[TextGenerationOutputToken]
+ best_of_sequences: Optional[List[TextGenerationOutputBestOfSequence]] = None
+ seed: Optional[int] = None
+ top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+
+
+@dataclass_with_extra
+class TextGenerationOutput(BaseInferenceType):
+ """Text Generation Output.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ generated_text: str
+ details: Optional[TextGenerationOutputDetails] = None
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutputStreamDetails(BaseInferenceType):
+ finish_reason: "TextGenerationOutputFinishReason"
+ generated_tokens: int
+ input_length: int
+ seed: Optional[int] = None
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutputToken(BaseInferenceType):
+ id: int
+ logprob: float
+ special: bool
+ text: str
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutput(BaseInferenceType):
+ """Text Generation Stream Output.
+ Auto-generated from TGI specs.
+ For more details, check out
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+ """
+
+ index: int
+ token: TextGenerationStreamOutputToken
+ details: Optional[TextGenerationStreamOutputStreamDetails] = None
+ generated_text: Optional[str] = None
+ top_tokens: Optional[List[TextGenerationStreamOutputToken]] = None
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py
new file mode 100644
index 00000000..963b4406
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -0,0 +1,100 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextToAudioEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class TextToAudioGenerationParameters(BaseInferenceType):
+ """Parametrization of the text generation process"""
+
+ do_sample: Optional[bool] = None
+ """Whether to use sampling instead of greedy decoding when generating new tokens."""
+ early_stopping: Optional[Union[bool, "TextToAudioEarlyStoppingEnum"]] = None
+ """Controls the stopping condition for beam-based methods."""
+ epsilon_cutoff: Optional[float] = None
+ """If set to float strictly between 0 and 1, only tokens with a conditional probability
+ greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+ 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+ Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ """
+ eta_cutoff: Optional[float] = None
+ """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+ float strictly between 0 and 1, a token is only considered if it is greater than either
+ eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+ term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+ the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+ See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+ for more details.
+ """
+ max_length: Optional[int] = None
+ """The maximum length (in tokens) of the generated text, including the input."""
+ max_new_tokens: Optional[int] = None
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
+ min_length: Optional[int] = None
+ """The minimum length (in tokens) of the generated text, including the input."""
+ min_new_tokens: Optional[int] = None
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
+ num_beam_groups: Optional[int] = None
+ """Number of groups to divide num_beams into in order to ensure diversity among different
+ groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+ """
+ num_beams: Optional[int] = None
+ """Number of beams to use for beam search."""
+ penalty_alpha: Optional[float] = None
+ """The value balances the model confidence and the degeneration penalty in contrastive
+ search decoding.
+ """
+ temperature: Optional[float] = None
+ """The value used to modulate the next token probabilities."""
+ top_k: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+ top_p: Optional[float] = None
+ """If set to float < 1, only the smallest set of most probable tokens with probabilities
+ that add up to top_p or higher are kept for generation.
+ """
+ typical_p: Optional[float] = None
+ """Local typicality measures how similar the conditional probability of predicting a target
+ token next is to the expected conditional probability of predicting a random token next,
+ given the partial text already generated. If set to float < 1, the smallest set of the
+ most locally typical tokens with probabilities that add up to typical_p or higher are
+ kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+ """
+ use_cache: Optional[bool] = None
+ """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class TextToAudioParameters(BaseInferenceType):
+ """Additional inference parameters for Text To Audio"""
+
+ # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+ generate_kwargs: Optional[TextToAudioGenerationParameters] = None
+ """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class TextToAudioInput(BaseInferenceType):
+ """Inputs for Text To Audio inference"""
+
+ inputs: str
+ """The input text data"""
+ parameters: Optional[TextToAudioParameters] = None
+ """Additional inference parameters for Text To Audio"""
+
+
+@dataclass_with_extra
+class TextToAudioOutput(BaseInferenceType):
+ """Outputs of inference for the Text To Audio task"""
+
+ audio: Any
+ """The generated audio waveform."""
+ sampling_rate: float
+ """The sampling rate of the generated audio waveform."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py
new file mode 100644
index 00000000..20c96373
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py
@@ -0,0 +1,50 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TextToImageParameters(BaseInferenceType):
+ """Additional inference parameters for Text To Image"""
+
+ guidance_scale: Optional[float] = None
+ """A higher guidance scale value encourages the model to generate images closely linked to
+ the text prompt, but values too high may cause saturation and other artifacts.
+ """
+ height: Optional[int] = None
+ """The height in pixels of the output image"""
+ negative_prompt: Optional[str] = None
+ """One prompt to guide what NOT to include in image generation."""
+ num_inference_steps: Optional[int] = None
+ """The number of denoising steps. More denoising steps usually lead to a higher quality
+ image at the expense of slower inference.
+ """
+ scheduler: Optional[str] = None
+ """Override the scheduler with a compatible one."""
+ seed: Optional[int] = None
+ """Seed for the random number generator."""
+ width: Optional[int] = None
+ """The width in pixels of the output image"""
+
+
+@dataclass_with_extra
+class TextToImageInput(BaseInferenceType):
+ """Inputs for Text To Image inference"""
+
+ inputs: str
+ """The input text data (sometimes called "prompt")"""
+ parameters: Optional[TextToImageParameters] = None
+ """Additional inference parameters for Text To Image"""
+
+
+@dataclass_with_extra
+class TextToImageOutput(BaseInferenceType):
+ """Outputs of inference for the Text To Image task"""
+
+ image: Any
+ """The generated image returned as raw bytes in the payload."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py
new file mode 100644
index 00000000..4399e2ae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -0,0 +1,100 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextToSpeechEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class TextToSpeechGenerationParameters(BaseInferenceType):
+ """Parametrization of the text generation process"""
+
+ do_sample: Optional[bool] = None
+ """Whether to use sampling instead of greedy decoding when generating new tokens."""
+ early_stopping: Optional[Union[bool, "TextToSpeechEarlyStoppingEnum"]] = None
+ """Controls the stopping condition for beam-based methods."""
+ epsilon_cutoff: Optional[float] = None
+ """If set to float strictly between 0 and 1, only tokens with a conditional probability
+ greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+ 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+ Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+ """
+ eta_cutoff: Optional[float] = None
+ """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+ float strictly between 0 and 1, a token is only considered if it is greater than either
+ eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+ term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+ the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+ See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+ for more details.
+ """
+ max_length: Optional[int] = None
+ """The maximum length (in tokens) of the generated text, including the input."""
+ max_new_tokens: Optional[int] = None
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
+ min_length: Optional[int] = None
+ """The minimum length (in tokens) of the generated text, including the input."""
+ min_new_tokens: Optional[int] = None
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
+ num_beam_groups: Optional[int] = None
+ """Number of groups to divide num_beams into in order to ensure diversity among different
+ groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+ """
+ num_beams: Optional[int] = None
+ """Number of beams to use for beam search."""
+ penalty_alpha: Optional[float] = None
+ """The value balances the model confidence and the degeneration penalty in contrastive
+ search decoding.
+ """
+ temperature: Optional[float] = None
+ """The value used to modulate the next token probabilities."""
+ top_k: Optional[int] = None
+ """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+ top_p: Optional[float] = None
+ """If set to float < 1, only the smallest set of most probable tokens with probabilities
+ that add up to top_p or higher are kept for generation.
+ """
+ typical_p: Optional[float] = None
+ """Local typicality measures how similar the conditional probability of predicting a target
+ token next is to the expected conditional probability of predicting a random token next,
+ given the partial text already generated. If set to float < 1, the smallest set of the
+ most locally typical tokens with probabilities that add up to typical_p or higher are
+ kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+ """
+ use_cache: Optional[bool] = None
+ """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class TextToSpeechParameters(BaseInferenceType):
+ """Additional inference parameters for Text To Speech"""
+
+ # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+ generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
+ """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class TextToSpeechInput(BaseInferenceType):
+ """Inputs for Text To Speech inference"""
+
+ inputs: str
+ """The input text data"""
+ parameters: Optional[TextToSpeechParameters] = None
+ """Additional inference parameters for Text To Speech"""
+
+
+@dataclass_with_extra
+class TextToSpeechOutput(BaseInferenceType):
+ """Outputs of inference for the Text To Speech task"""
+
+ audio: Any
+ """The generated audio"""
+ sampling_rate: Optional[float] = None
+ """The sampling rate of the generated audio waveform."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py
new file mode 100644
index 00000000..e54a1bc0
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py
@@ -0,0 +1,46 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TextToVideoParameters(BaseInferenceType):
+ """Additional inference parameters for Text To Video"""
+
+ guidance_scale: Optional[float] = None
+ """A higher guidance scale value encourages the model to generate videos closely linked to
+ the text prompt, but values too high may cause saturation and other artifacts.
+ """
+ negative_prompt: Optional[List[str]] = None
+ """One or several prompt to guide what NOT to include in video generation."""
+ num_frames: Optional[float] = None
+ """The num_frames parameter determines how many video frames are generated."""
+ num_inference_steps: Optional[int] = None
+ """The number of denoising steps. More denoising steps usually lead to a higher quality
+ video at the expense of slower inference.
+ """
+ seed: Optional[int] = None
+ """Seed for the random number generator."""
+
+
+@dataclass_with_extra
+class TextToVideoInput(BaseInferenceType):
+ """Inputs for Text To Video inference"""
+
+ inputs: str
+ """The input text data (sometimes called "prompt")"""
+ parameters: Optional[TextToVideoParameters] = None
+ """Additional inference parameters for Text To Video"""
+
+
+@dataclass_with_extra
+class TextToVideoOutput(BaseInferenceType):
+ """Outputs of inference for the Text To Video task"""
+
+ video: Any
+ """The generated video returned as raw bytes in the payload."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/token_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/token_classification.py
new file mode 100644
index 00000000..e039b6a1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/token_classification.py
@@ -0,0 +1,51 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TokenClassificationAggregationStrategy = Literal["none", "simple", "first", "average", "max"]
+
+
+@dataclass_with_extra
+class TokenClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Token Classification"""
+
+ aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None
+ """The strategy used to fuse tokens based on model predictions"""
+ ignore_labels: Optional[List[str]] = None
+ """A list of labels to ignore"""
+ stride: Optional[int] = None
+ """The number of overlapping tokens between chunks when splitting the input text."""
+
+
+@dataclass_with_extra
+class TokenClassificationInput(BaseInferenceType):
+ """Inputs for Token Classification inference"""
+
+ inputs: str
+ """The input text data"""
+ parameters: Optional[TokenClassificationParameters] = None
+ """Additional inference parameters for Token Classification"""
+
+
+@dataclass_with_extra
+class TokenClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Token Classification task"""
+
+ end: int
+ """The character position in the input where this group ends."""
+ score: float
+ """The associated score / probability"""
+ start: int
+ """The character position in the input where this group begins."""
+ word: str
+ """The corresponding text"""
+ entity: Optional[str] = None
+ """The predicted label for a single token"""
+ entity_group: Optional[str] = None
+ """The predicted label for a group of one or more tokens"""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/translation.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/translation.py
new file mode 100644
index 00000000..df95b7db
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/translation.py
@@ -0,0 +1,49 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TranslationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class TranslationParameters(BaseInferenceType):
+ """Additional inference parameters for Translation"""
+
+ clean_up_tokenization_spaces: Optional[bool] = None
+ """Whether to clean up the potential extra spaces in the text output."""
+ generate_parameters: Optional[Dict[str, Any]] = None
+ """Additional parametrization of the text generation algorithm."""
+ src_lang: Optional[str] = None
+ """The source language of the text. Required for models that can translate from multiple
+ languages.
+ """
+ tgt_lang: Optional[str] = None
+ """Target language to translate to. Required for models that can translate to multiple
+ languages.
+ """
+ truncation: Optional["TranslationTruncationStrategy"] = None
+ """The truncation strategy to use."""
+
+
+@dataclass_with_extra
+class TranslationInput(BaseInferenceType):
+ """Inputs for Translation inference"""
+
+ inputs: str
+ """The text to translate."""
+ parameters: Optional[TranslationParameters] = None
+ """Additional inference parameters for Translation"""
+
+
+@dataclass_with_extra
+class TranslationOutput(BaseInferenceType):
+ """Outputs of inference for the Translation task"""
+
+ translation_text: str
+ """The translated text."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/video_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/video_classification.py
new file mode 100644
index 00000000..e1d7a15b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/video_classification.py
@@ -0,0 +1,45 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+VideoClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class VideoClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Video Classification"""
+
+ frame_sampling_rate: Optional[int] = None
+ """The sampling rate used to select frames from the video."""
+ function_to_apply: Optional["VideoClassificationOutputTransform"] = None
+ """The function to apply to the model outputs in order to retrieve the scores."""
+ num_frames: Optional[int] = None
+ """The number of sampled frames to consider for classification."""
+ top_k: Optional[int] = None
+ """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class VideoClassificationInput(BaseInferenceType):
+ """Inputs for Video Classification inference"""
+
+ inputs: Any
+ """The input video data"""
+ parameters: Optional[VideoClassificationParameters] = None
+ """Additional inference parameters for Video Classification"""
+
+
+@dataclass_with_extra
+class VideoClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Video Classification task"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py
new file mode 100644
index 00000000..d368f162
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py
@@ -0,0 +1,49 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringInputData(BaseInferenceType):
+ """One (image, question) pair to answer"""
+
+ image: Any
+ """The image."""
+ question: str
+ """The question to answer based on the image."""
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringParameters(BaseInferenceType):
+ """Additional inference parameters for Visual Question Answering"""
+
+ top_k: Optional[int] = None
+ """The number of answers to return (will be chosen by order of likelihood). Note that we
+ return less than topk answers if there are not enough options available within the
+ context.
+ """
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringInput(BaseInferenceType):
+ """Inputs for Visual Question Answering inference"""
+
+ inputs: VisualQuestionAnsweringInputData
+ """One (image, question) pair to answer"""
+ parameters: Optional[VisualQuestionAnsweringParameters] = None
+ """Additional inference parameters for Visual Question Answering"""
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringOutputElement(BaseInferenceType):
+ """Outputs of inference for the Visual Question Answering task"""
+
+ score: float
+ """The associated score / probability"""
+ answer: Optional[str] = None
+ """The answer to the question"""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py
new file mode 100644
index 00000000..47b32492
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py
@@ -0,0 +1,45 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Zero Shot Classification"""
+
+ candidate_labels: List[str]
+ """The set of possible class labels to classify the text into."""
+ hypothesis_template: Optional[str] = None
+ """The sentence used in conjunction with `candidate_labels` to attempt the text
+ classification by replacing the placeholder with the candidate labels.
+ """
+ multi_label: Optional[bool] = None
+ """Whether multiple candidate labels can be true. If false, the scores are normalized such
+ that the sum of the label likelihoods for each sequence is 1. If true, the labels are
+ considered independent and probabilities are normalized for each candidate.
+ """
+
+
+@dataclass_with_extra
+class ZeroShotClassificationInput(BaseInferenceType):
+ """Inputs for Zero Shot Classification inference"""
+
+ inputs: str
+ """The text to classify"""
+ parameters: ZeroShotClassificationParameters
+ """Additional inference parameters for Zero Shot Classification"""
+
+
+@dataclass_with_extra
+class ZeroShotClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Zero Shot Classification task"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
new file mode 100644
index 00000000..998d66b6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
@@ -0,0 +1,40 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationParameters(BaseInferenceType):
+ """Additional inference parameters for Zero Shot Image Classification"""
+
+ candidate_labels: List[str]
+ """The candidate labels for this image"""
+ hypothesis_template: Optional[str] = None
+ """The sentence used in conjunction with `candidate_labels` to attempt the image
+ classification by replacing the placeholder with the candidate labels.
+ """
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationInput(BaseInferenceType):
+ """Inputs for Zero Shot Image Classification inference"""
+
+ inputs: str
+ """The input image data to classify as a base64-encoded string."""
+ parameters: ZeroShotImageClassificationParameters
+ """Additional inference parameters for Zero Shot Image Classification"""
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationOutputElement(BaseInferenceType):
+ """Outputs of inference for the Zero Shot Image Classification task"""
+
+ label: str
+ """The predicted class label."""
+ score: float
+ """The corresponding probability."""
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
new file mode 100644
index 00000000..8ef76b5f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
@@ -0,0 +1,52 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionParameters(BaseInferenceType):
+ """Additional inference parameters for Zero Shot Object Detection"""
+
+ candidate_labels: List[str]
+ """The candidate labels for this image"""
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionInput(BaseInferenceType):
+ """Inputs for Zero Shot Object Detection inference"""
+
+ inputs: str
+ """The input image data as a base64-encoded string."""
+ parameters: ZeroShotObjectDetectionParameters
+ """Additional inference parameters for Zero Shot Object Detection"""
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionBoundingBox(BaseInferenceType):
+ """The predicted bounding box. Coordinates are relative to the top left corner of the input
+ image.
+ """
+
+ xmax: int
+ xmin: int
+ ymax: int
+ ymin: int
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionOutputElement(BaseInferenceType):
+ """Outputs of inference for the Zero Shot Object Detection task"""
+
+ box: ZeroShotObjectDetectionBoundingBox
+ """The predicted bounding box. Coordinates are relative to the top left corner of the input
+ image.
+ """
+ label: str
+ """A candidate label"""
+ score: float
+ """The associated score / probability"""