aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py110
1 files changed, 110 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py
new file mode 100644
index 00000000..307f371f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py
@@ -0,0 +1,110 @@
+# coding=utf-8
+# Copyright 2019-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Git LFS related utilities"""
+
+import io
+import os
+from contextlib import AbstractContextManager
+from typing import BinaryIO
+
+
+class SliceFileObj(AbstractContextManager):
+ """
+ Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object.
+
+ This is NOT thread safe
+
+ Inspired by stackoverflow.com/a/29838711/593036
+
+ Credits to @julien-c
+
+ Args:
+ fileobj (`BinaryIO`):
+ A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course).
+ `fileobj` will be reset to its original position when exiting the context manager.
+ seek_from (`int`):
+ The start of the slice (offset from position 0 in bytes).
+ read_limit (`int`):
+ The maximum number of bytes to read from the slice.
+
+ Attributes:
+ previous_position (`int`):
+ The previous position
+
+ Examples:
+
+ Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327):
+ ```python
+ >>> with open("path/to/file", "rb") as file:
+ ... with SliceFileObj(file, seek_from=128, read_limit=200) as fslice:
+ ... fslice.read(...)
+ ```
+
+ Reading a file in chunks of 512 bytes
+ ```python
+ >>> import os
+ >>> chunk_size = 512
+ >>> file_size = os.getsize("path/to/file")
+ >>> with open("path/to/file", "rb") as file:
+ ... for chunk_idx in range(ceil(file_size / chunk_size)):
+ ... with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice:
+ ... chunk = fslice.read(...)
+
+ ```
+ """
+
+ def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int):
+ self.fileobj = fileobj
+ self.seek_from = seek_from
+ self.read_limit = read_limit
+
+ def __enter__(self):
+ self._previous_position = self.fileobj.tell()
+ end_of_stream = self.fileobj.seek(0, os.SEEK_END)
+ self._len = min(self.read_limit, end_of_stream - self.seek_from)
+ # ^^ The actual number of bytes that can be read from the slice
+ self.fileobj.seek(self.seek_from, io.SEEK_SET)
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.fileobj.seek(self._previous_position, io.SEEK_SET)
+
+ def read(self, n: int = -1):
+ pos = self.tell()
+ if pos >= self._len:
+ return b""
+ remaining_amount = self._len - pos
+ data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
+ return data
+
+ def tell(self) -> int:
+ return self.fileobj.tell() - self.seek_from
+
+ def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
+ start = self.seek_from
+ end = start + self._len
+ if whence in (os.SEEK_SET, os.SEEK_END):
+ offset = start + offset if whence == os.SEEK_SET else end + offset
+ offset = max(start, min(offset, end))
+ whence = os.SEEK_SET
+ elif whence == os.SEEK_CUR:
+ cur_pos = self.fileobj.tell()
+ offset = max(start - cur_pos, min(offset, end - cur_pos))
+ else:
+ raise ValueError(f"whence value {whence} is not supported")
+ return self.fileobj.seek(offset, whence) - self.seek_from
+
+ def __iter__(self):
+ yield self.read(n=4 * 1024 * 1024)