From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../site-packages/huggingface_hub/utils/_lfs.py | 110 +++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py') diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py new file mode 100644 index 00000000..307f371f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_lfs.py @@ -0,0 +1,110 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Git LFS related utilities""" + +import io +import os +from contextlib import AbstractContextManager +from typing import BinaryIO + + +class SliceFileObj(AbstractContextManager): + """ + Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object. + + This is NOT thread safe + + Inspired by stackoverflow.com/a/29838711/593036 + + Credits to @julien-c + + Args: + fileobj (`BinaryIO`): + A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course). + `fileobj` will be reset to its original position when exiting the context manager. + seek_from (`int`): + The start of the slice (offset from position 0 in bytes). + read_limit (`int`): + The maximum number of bytes to read from the slice. + + Attributes: + previous_position (`int`): + The previous position + + Examples: + + Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327): + ```python + >>> with open("path/to/file", "rb") as file: + ... with SliceFileObj(file, seek_from=128, read_limit=200) as fslice: + ... fslice.read(...) + ``` + + Reading a file in chunks of 512 bytes + ```python + >>> import os + >>> chunk_size = 512 + >>> file_size = os.getsize("path/to/file") + >>> with open("path/to/file", "rb") as file: + ... for chunk_idx in range(ceil(file_size / chunk_size)): + ... with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice: + ... chunk = fslice.read(...) + + ``` + """ + + def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int): + self.fileobj = fileobj + self.seek_from = seek_from + self.read_limit = read_limit + + def __enter__(self): + self._previous_position = self.fileobj.tell() + end_of_stream = self.fileobj.seek(0, os.SEEK_END) + self._len = min(self.read_limit, end_of_stream - self.seek_from) + # ^^ The actual number of bytes that can be read from the slice + self.fileobj.seek(self.seek_from, io.SEEK_SET) + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.fileobj.seek(self._previous_position, io.SEEK_SET) + + def read(self, n: int = -1): + pos = self.tell() + if pos >= self._len: + return b"" + remaining_amount = self._len - pos + data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount)) + return data + + def tell(self) -> int: + return self.fileobj.tell() - self.seek_from + + def seek(self, offset: int, whence: int = os.SEEK_SET) -> int: + start = self.seek_from + end = start + self._len + if whence in (os.SEEK_SET, os.SEEK_END): + offset = start + offset if whence == os.SEEK_SET else end + offset + offset = max(start, min(offset, end)) + whence = os.SEEK_SET + elif whence == os.SEEK_CUR: + cur_pos = self.fileobj.tell() + offset = max(start - cur_pos, min(offset, end - cur_pos)) + else: + raise ValueError(f"whence value {whence} is not supported") + return self.fileobj.seek(offset, whence) - self.seek_from + + def __iter__(self): + yield self.read(n=4 * 1024 * 1024) -- cgit v1.2.3