about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py226
1 files changed, 226 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py
new file mode 100644
index 00000000..27833f28
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py
@@ -0,0 +1,226 @@
+# coding=utf-8
+# Copyright 2022-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities to validate argument values in `huggingface_hub`."""
+
+import inspect
+import re
+import warnings
+from functools import wraps
+from itertools import chain
+from typing import Any, Dict
+
+from huggingface_hub.errors import HFValidationError
+
+from ._typing import CallableT
+
+
+REPO_ID_REGEX = re.compile(
+    r"""
+    ^
+    (\b[\w\-.]+\b/)? # optional namespace (username or organization)
+    \b               # starts with a word boundary
+    [\w\-.]{1,96}    # repo_name: alphanumeric + . _ -
+    \b               # ends with a word boundary
+    $
+    """,
+    flags=re.VERBOSE,
+)
+
+
+def validate_hf_hub_args(fn: CallableT) -> CallableT:
+    """Validate values received as argument for any public method of `huggingface_hub`.
+
+    The goal of this decorator is to harmonize validation of arguments reused
+    everywhere. By default, all defined validators are tested.
+
+    Validators:
+        - [`~utils.validate_repo_id`]: `repo_id` must be `"repo_name"`
+          or `"namespace/repo_name"`. Namespace is a username or an organization.
+        - [`~utils.smoothly_deprecate_use_auth_token`]: Use `token` instead of
+          `use_auth_token` (only if `use_auth_token` is not expected by the decorated
+          function - in practice, always the case in `huggingface_hub`).
+
+    Example:
+    ```py
+    >>> from huggingface_hub.utils import validate_hf_hub_args
+
+    >>> @validate_hf_hub_args
+    ... def my_cool_method(repo_id: str):
+    ...     print(repo_id)
+
+    >>> my_cool_method(repo_id="valid_repo_id")
+    valid_repo_id
+
+    >>> my_cool_method("other..repo..id")
+    huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
+
+    >>> my_cool_method(repo_id="other..repo..id")
+    huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
+
+    >>> @validate_hf_hub_args
+    ... def my_cool_auth_method(token: str):
+    ...     print(token)
+
+    >>> my_cool_auth_method(token="a token")
+    "a token"
+
+    >>> my_cool_auth_method(use_auth_token="a use_auth_token")
+    "a use_auth_token"
+
+    >>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
+    UserWarning: Both `token` and `use_auth_token` are passed (...)
+    "a token"
+    ```
+
+    Raises:
+        [`~utils.HFValidationError`]:
+            If an input is not valid.
+    """
+    # TODO: add an argument to opt-out validation for specific argument?
+    signature = inspect.signature(fn)
+
+    # Should the validator switch `use_auth_token` values to `token`? In practice, always
+    # True in `huggingface_hub`. Might not be the case in a downstream library.
+    check_use_auth_token = "use_auth_token" not in signature.parameters and "token" in signature.parameters
+
+    @wraps(fn)
+    def _inner_fn(*args, **kwargs):
+        has_token = False
+        for arg_name, arg_value in chain(
+            zip(signature.parameters, args),  # Args values
+            kwargs.items(),  # Kwargs values
+        ):
+            if arg_name in ["repo_id", "from_id", "to_id"]:
+                validate_repo_id(arg_value)
+
+            elif arg_name == "token" and arg_value is not None:
+                has_token = True
+
+        if check_use_auth_token:
+            kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
+
+        return fn(*args, **kwargs)
+
+    return _inner_fn  # type: ignore
+
+
+def validate_repo_id(repo_id: str) -> None:
+    """Validate `repo_id` is valid.
+
+    This is not meant to replace the proper validation made on the Hub but rather to
+    avoid local inconsistencies whenever possible (example: passing `repo_type` in the
+    `repo_id` is forbidden).
+
+    Rules:
+    - Between 1 and 96 characters.
+    - Either "repo_name" or "namespace/repo_name"
+    - [a-zA-Z0-9] or "-", "_", "."
+    - "--" and ".." are forbidden
+
+    Valid: `"foo"`, `"foo/bar"`, `"123"`, `"Foo-BAR_foo.bar123"`
+
+    Not valid: `"datasets/foo/bar"`, `".repo_id"`, `"foo--bar"`, `"foo.git"`
+
+    Example:
+    ```py
+    >>> from huggingface_hub.utils import validate_repo_id
+    >>> validate_repo_id(repo_id="valid_repo_id")
+    >>> validate_repo_id(repo_id="other..repo..id")
+    huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
+    ```
+
+    Discussed in https://github.com/huggingface/huggingface_hub/issues/1008.
+    In moon-landing (internal repository):
+    - https://github.com/huggingface/moon-landing/blob/main/server/lib/Names.ts#L27
+    - https://github.com/huggingface/moon-landing/blob/main/server/views/components/NewRepoForm/NewRepoForm.svelte#L138
+    """
+    if not isinstance(repo_id, str):
+        # Typically, a Path is not a repo_id
+        raise HFValidationError(f"Repo id must be a string, not {type(repo_id)}: '{repo_id}'.")
+
+    if repo_id.count("/") > 1:
+        raise HFValidationError(
+            "Repo id must be in the form 'repo_name' or 'namespace/repo_name':"
+            f" '{repo_id}'. Use `repo_type` argument if needed."
+        )
+
+    if not REPO_ID_REGEX.match(repo_id):
+        raise HFValidationError(
+            "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are"
+            " forbidden, '-' and '.' cannot start or end the name, max length is 96:"
+            f" '{repo_id}'."
+        )
+
+    if "--" in repo_id or ".." in repo_id:
+        raise HFValidationError(f"Cannot have -- or .. in repo_id: '{repo_id}'.")
+
+    if repo_id.endswith(".git"):
+        raise HFValidationError(f"Repo_id cannot end by '.git': '{repo_id}'.")
+
+
+def smoothly_deprecate_use_auth_token(fn_name: str, has_token: bool, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+    """Smoothly deprecate `use_auth_token` in the `huggingface_hub` codebase.
+
+    The long-term goal is to remove any mention of `use_auth_token` in the codebase in
+    favor of a unique and less verbose `token` argument. This will be done a few steps:
+
+    0. Step 0: methods that require a read-access to the Hub use the `use_auth_token`
+       argument (`str`, `bool` or `None`). Methods requiring write-access have a `token`
+       argument (`str`, `None`). This implicit rule exists to be able to not send the
+       token when not necessary (`use_auth_token=False`) even if logged in.
+
+    1. Step 1: we want to harmonize everything and use `token` everywhere (supporting
+       `token=False` for read-only methods). In order not to break existing code, if
+       `use_auth_token` is passed to a function, the `use_auth_token` value is passed
+       as `token` instead, without any warning.
+       a. Corner case: if both `use_auth_token` and `token` values are passed, a warning
+          is thrown and the `use_auth_token` value is ignored.
+
+    2. Step 2: Once it is release, we should push downstream libraries to switch from
+       `use_auth_token` to `token` as much as possible, but without throwing a warning
+       (e.g. manually create issues on the corresponding repos).
+
+    3. Step 3: After a transitional period (6 months e.g. until April 2023?), we update
+       `huggingface_hub` to throw a warning on `use_auth_token`. Hopefully, very few
+       users will be impacted as it would have already been fixed.
+       In addition, unit tests in `huggingface_hub` must be adapted to expect warnings
+       to be thrown (but still use `use_auth_token` as before).
+
+    4. Step 4: After a normal deprecation cycle (3 releases ?), remove this validator.
+       `use_auth_token` will definitely not be supported.
+       In addition, we update unit tests in `huggingface_hub` to use `token` everywhere.
+
+    This has been discussed in:
+    - https://github.com/huggingface/huggingface_hub/issues/1094.
+    - https://github.com/huggingface/huggingface_hub/pull/928
+    - (related) https://github.com/huggingface/huggingface_hub/pull/1064
+    """
+    new_kwargs = kwargs.copy()  # do not mutate input !
+
+    use_auth_token = new_kwargs.pop("use_auth_token", None)  # remove from kwargs
+    if use_auth_token is not None:
+        if has_token:
+            warnings.warn(
+                "Both `token` and `use_auth_token` are passed to"
+                f" `{fn_name}` with non-None values. `token` is now the"
+                " preferred argument to pass a User Access Token."
+                " `use_auth_token` value will be ignored."
+            )
+        else:
+            # `token` argument is not passed and a non-None value is passed in
+            # `use_auth_token` => use `use_auth_token` value as `token` kwarg.
+            new_kwargs["token"] = use_auth_token
+
+    return new_kwargs