diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/pip/_internal/network/lazy_wheel.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/pip/_internal/network/lazy_wheel.py | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pip/_internal/network/lazy_wheel.py b/.venv/lib/python3.12/site-packages/pip/_internal/network/lazy_wheel.py new file mode 100644 index 00000000..03f883c1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pip/_internal/network/lazy_wheel.py @@ -0,0 +1,210 @@ +"""Lazy ZIP over HTTP""" + +__all__ = ["HTTPRangeRequestUnsupported", "dist_from_wheel_url"] + +from bisect import bisect_left, bisect_right +from contextlib import contextmanager +from tempfile import NamedTemporaryFile +from typing import Any, Dict, Generator, List, Optional, Tuple +from zipfile import BadZipFile, ZipFile + +from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response + +from pip._internal.metadata import BaseDistribution, MemoryWheel, get_wheel_distribution +from pip._internal.network.session import PipSession +from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks + + +class HTTPRangeRequestUnsupported(Exception): + pass + + +def dist_from_wheel_url(name: str, url: str, session: PipSession) -> BaseDistribution: + """Return a distribution object from the given wheel URL. + + This uses HTTP range requests to only fetch the portion of the wheel + containing metadata, just enough for the object to be constructed. + If such requests are not supported, HTTPRangeRequestUnsupported + is raised. + """ + with LazyZipOverHTTP(url, session) as zf: + # For read-only ZIP files, ZipFile only needs methods read, + # seek, seekable and tell, not the whole IO protocol. + wheel = MemoryWheel(zf.name, zf) # type: ignore + # After context manager exit, wheel.name + # is an invalid file by intention. + return get_wheel_distribution(wheel, canonicalize_name(name)) + + +class LazyZipOverHTTP: + """File-like object mapped to a ZIP file over HTTP. + + This uses HTTP range requests to lazily fetch the file's content, + which is supposed to be fed to ZipFile. If such requests are not + supported by the server, raise HTTPRangeRequestUnsupported + during initialization. + """ + + def __init__( + self, url: str, session: PipSession, chunk_size: int = CONTENT_CHUNK_SIZE + ) -> None: + head = session.head(url, headers=HEADERS) + raise_for_status(head) + assert head.status_code == 200 + self._session, self._url, self._chunk_size = session, url, chunk_size + self._length = int(head.headers["Content-Length"]) + self._file = NamedTemporaryFile() + self.truncate(self._length) + self._left: List[int] = [] + self._right: List[int] = [] + if "bytes" not in head.headers.get("Accept-Ranges", "none"): + raise HTTPRangeRequestUnsupported("range request is not supported") + self._check_zip() + + @property + def mode(self) -> str: + """Opening mode, which is always rb.""" + return "rb" + + @property + def name(self) -> str: + """Path to the underlying file.""" + return self._file.name + + def seekable(self) -> bool: + """Return whether random access is supported, which is True.""" + return True + + def close(self) -> None: + """Close the file.""" + self._file.close() + + @property + def closed(self) -> bool: + """Whether the file is closed.""" + return self._file.closed + + def read(self, size: int = -1) -> bytes: + """Read up to size bytes from the object and return them. + + As a convenience, if size is unspecified or -1, + all bytes until EOF are returned. Fewer than + size bytes may be returned if EOF is reached. + """ + download_size = max(size, self._chunk_size) + start, length = self.tell(), self._length + stop = length if size < 0 else min(start + download_size, length) + start = max(0, stop - download_size) + self._download(start, stop - 1) + return self._file.read(size) + + def readable(self) -> bool: + """Return whether the file is readable, which is True.""" + return True + + def seek(self, offset: int, whence: int = 0) -> int: + """Change stream position and return the new absolute position. + + Seek to offset relative position indicated by whence: + * 0: Start of stream (the default). pos should be >= 0; + * 1: Current position - pos may be negative; + * 2: End of stream - pos usually negative. + """ + return self._file.seek(offset, whence) + + def tell(self) -> int: + """Return the current position.""" + return self._file.tell() + + def truncate(self, size: Optional[int] = None) -> int: + """Resize the stream to the given size in bytes. + + If size is unspecified resize to the current position. + The current stream position isn't changed. + + Return the new file size. + """ + return self._file.truncate(size) + + def writable(self) -> bool: + """Return False.""" + return False + + def __enter__(self) -> "LazyZipOverHTTP": + self._file.__enter__() + return self + + def __exit__(self, *exc: Any) -> None: + self._file.__exit__(*exc) + + @contextmanager + def _stay(self) -> Generator[None, None, None]: + """Return a context manager keeping the position. + + At the end of the block, seek back to original position. + """ + pos = self.tell() + try: + yield + finally: + self.seek(pos) + + def _check_zip(self) -> None: + """Check and download until the file is a valid ZIP.""" + end = self._length - 1 + for start in reversed(range(0, end, self._chunk_size)): + self._download(start, end) + with self._stay(): + try: + # For read-only ZIP files, ZipFile only needs + # methods read, seek, seekable and tell. + ZipFile(self) + except BadZipFile: + pass + else: + break + + def _stream_response( + self, start: int, end: int, base_headers: Dict[str, str] = HEADERS + ) -> Response: + """Return HTTP response to a range request from start to end.""" + headers = base_headers.copy() + headers["Range"] = f"bytes={start}-{end}" + # TODO: Get range requests to be correctly cached + headers["Cache-Control"] = "no-cache" + return self._session.get(self._url, headers=headers, stream=True) + + def _merge( + self, start: int, end: int, left: int, right: int + ) -> Generator[Tuple[int, int], None, None]: + """Return a generator of intervals to be fetched. + + Args: + start (int): Start of needed interval + end (int): End of needed interval + left (int): Index of first overlapping downloaded data + right (int): Index after last overlapping downloaded data + """ + lslice, rslice = self._left[left:right], self._right[left:right] + i = start = min([start] + lslice[:1]) + end = max([end] + rslice[-1:]) + for j, k in zip(lslice, rslice): + if j > i: + yield i, j - 1 + i = k + 1 + if i <= end: + yield i, end + self._left[left:right], self._right[left:right] = [start], [end] + + def _download(self, start: int, end: int) -> None: + """Download bytes from start to end inclusively.""" + with self._stay(): + left = bisect_left(self._right, start) + right = bisect_right(self._left, end) + for start, end in self._merge(start, end, left, right): + response = self._stream_response(start, end) + response.raise_for_status() + self.seek(start) + for chunk in response_chunks(response, self._chunk_size): + self._file.write(chunk) |