about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py64
1 files changed, 64 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py
new file mode 100644
index 00000000..001c3fe8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py
@@ -0,0 +1,64 @@
+"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
+
+from typing import BinaryIO, Optional
+
+from .insecure_hashlib import sha1, sha256
+
+
+def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
+    """
+    Computes the sha256 hash of the given file object, by chunks of size `chunk_size`.
+
+    Args:
+        fileobj (file-like object):
+            The File object to compute sha256 for, typically obtained with `open(path, "rb")`
+        chunk_size (`int`, *optional*):
+            The number of bytes to read from `fileobj` at once, defaults to 1MB.
+
+    Returns:
+        `bytes`: `fileobj`'s sha256 hash as bytes
+    """
+    chunk_size = chunk_size if chunk_size is not None else 1024 * 1024
+
+    sha = sha256()
+    while True:
+        chunk = fileobj.read(chunk_size)
+        sha.update(chunk)
+        if not chunk:
+            break
+    return sha.digest()
+
+
+def git_hash(data: bytes) -> str:
+    """
+    Computes the git-sha1 hash of the given bytes, using the same algorithm as git.
+
+    This is equivalent to running `git hash-object`. See https://git-scm.com/docs/git-hash-object
+    for more details.
+
+    Note: this method is valid for regular files. For LFS files, the proper git hash is supposed to be computed on the
+          pointer file content, not the actual file content. However, for simplicity, we directly compare the sha256 of
+          the LFS file content when we want to compare LFS files.
+
+    Args:
+        data (`bytes`):
+            The data to compute the git-hash for.
+
+    Returns:
+        `str`: the git-hash of `data` as an hexadecimal string.
+
+    Example:
+    ```python
+    >>> from huggingface_hub.utils.sha import git_hash
+    >>> git_hash(b"Hello, World!")
+    'b45ef6fec89518d314f546fd6c3025367b721684'
+    ```
+    """
+    # Taken from https://gist.github.com/msabramo/763200
+    # Note: no need to optimize by reading the file in chunks as we're not supposed to hash huge files (5MB maximum).
+    sha = sha1()
+    sha.update(b"blob ")
+    sha.update(str(len(data)).encode())
+    sha.update(b"\0")
+    sha.update(data)
+    return sha.hexdigest()