aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py135
1 files changed, 135 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py
new file mode 100644
index 00000000..e5d435df
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py
@@ -0,0 +1,135 @@
+# coding=utf-8
+# Copyright 2019-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+from typing import Union
+
+from ..constants import HF_ASSETS_CACHE
+
+
+def cached_assets_path(
+ library_name: str,
+ namespace: str = "default",
+ subfolder: str = "default",
+ *,
+ assets_dir: Union[str, Path, None] = None,
+):
+ """Return a folder path to cache arbitrary files.
+
+ `huggingface_hub` provides a canonical folder path to store assets. This is the
+ recommended way to integrate cache in a downstream library as it will benefit from
+ the builtins tools to scan and delete the cache properly.
+
+ The distinction is made between files cached from the Hub and assets. Files from the
+ Hub are cached in a git-aware manner and entirely managed by `huggingface_hub`. See
+ [related documentation](https://huggingface.co/docs/huggingface_hub/how-to-cache).
+ All other files that a downstream library caches are considered to be "assets"
+ (files downloaded from external sources, extracted from a .tar archive, preprocessed
+ for training,...).
+
+ Once the folder path is generated, it is guaranteed to exist and to be a directory.
+ The path is based on 3 levels of depth: the library name, a namespace and a
+ subfolder. Those 3 levels grants flexibility while allowing `huggingface_hub` to
+ expect folders when scanning/deleting parts of the assets cache. Within a library,
+ it is expected that all namespaces share the same subset of subfolder names but this
+ is not a mandatory rule. The downstream library has then full control on which file
+ structure to adopt within its cache. Namespace and subfolder are optional (would
+ default to a `"default/"` subfolder) but library name is mandatory as we want every
+ downstream library to manage its own cache.
+
+ Expected tree:
+ ```text
+ assets/
+ └── datasets/
+ │ ├── SQuAD/
+ │ │ ├── downloaded/
+ │ │ ├── extracted/
+ │ │ └── processed/
+ │ ├── Helsinki-NLP--tatoeba_mt/
+ │ ├── downloaded/
+ │ ├── extracted/
+ │ └── processed/
+ └── transformers/
+ ├── default/
+ │ ├── something/
+ ├── bert-base-cased/
+ │ ├── default/
+ │ └── training/
+ hub/
+ └── models--julien-c--EsperBERTo-small/
+ ├── blobs/
+ │ ├── (...)
+ │ ├── (...)
+ ├── refs/
+ │ └── (...)
+ └── [ 128] snapshots/
+ ├── 2439f60ef33a0d46d85da5001d52aeda5b00ce9f/
+ │ ├── (...)
+ └── bbc77c8132af1cc5cf678da3f1ddf2de43606d48/
+ └── (...)
+ ```
+
+
+ Args:
+ library_name (`str`):
+ Name of the library that will manage the cache folder. Example: `"dataset"`.
+ namespace (`str`, *optional*, defaults to "default"):
+ Namespace to which the data belongs. Example: `"SQuAD"`.
+ subfolder (`str`, *optional*, defaults to "default"):
+ Subfolder in which the data will be stored. Example: `extracted`.
+ assets_dir (`str`, `Path`, *optional*):
+ Path to the folder where assets are cached. This must not be the same folder
+ where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
+ Can also be set with `HF_ASSETS_CACHE` environment variable.
+
+ Returns:
+ Path to the cache folder (`Path`).
+
+ Example:
+ ```py
+ >>> from huggingface_hub import cached_assets_path
+
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="download")
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/download')
+
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="extracted")
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/extracted')
+
+ >>> cached_assets_path(library_name="datasets", namespace="Helsinki-NLP/tatoeba_mt")
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/Helsinki-NLP--tatoeba_mt/default')
+
+ >>> cached_assets_path(library_name="datasets", assets_dir="/tmp/tmp123456")
+ PosixPath('/tmp/tmp123456/datasets/default/default')
+ ```
+ """
+ # Resolve assets_dir
+ if assets_dir is None:
+ assets_dir = HF_ASSETS_CACHE
+ assets_dir = Path(assets_dir).expanduser().resolve()
+
+ # Avoid names that could create path issues
+ for part in (" ", "/", "\\"):
+ library_name = library_name.replace(part, "--")
+ namespace = namespace.replace(part, "--")
+ subfolder = subfolder.replace(part, "--")
+
+ # Path to subfolder is created
+ path = assets_dir / library_name / namespace / subfolder
+ try:
+ path.mkdir(exist_ok=True, parents=True)
+ except (FileExistsError, NotADirectoryError):
+ raise ValueError(f"Corrupted assets folder: cannot create directory because of an existing file ({path}).")
+
+ # Return
+ return path