diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/yarl | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/yarl')
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/__init__.py | 14 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_parse.py | 189 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_path.py | 41 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_query.py | 118 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_quoters.py | 32 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_quoting.py | 18 | ||||
-rwxr-xr-x | .venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so | bin | 0 -> 985824 bytes | |||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi | 16 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx | 423 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_quoting_py.py | 197 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/_url.py | 1584 | ||||
-rw-r--r-- | .venv/lib/python3.12/site-packages/yarl/py.typed | 1 |
12 files changed, 2633 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/yarl/__init__.py b/.venv/lib/python3.12/site-packages/yarl/__init__.py new file mode 100644 index 00000000..36404071 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/__init__.py @@ -0,0 +1,14 @@ +from ._query import Query, QueryVariable, SimpleQuery +from ._url import URL, cache_clear, cache_configure, cache_info + +__version__ = "1.18.3" + +__all__ = ( + "URL", + "SimpleQuery", + "QueryVariable", + "Query", + "cache_clear", + "cache_configure", + "cache_info", +) diff --git a/.venv/lib/python3.12/site-packages/yarl/_parse.py b/.venv/lib/python3.12/site-packages/yarl/_parse.py new file mode 100644 index 00000000..cc259ea8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_parse.py @@ -0,0 +1,189 @@ +"""URL parsing utilities.""" + +import re +import unicodedata +from functools import lru_cache +from typing import Union +from urllib.parse import scheme_chars, uses_netloc + +from ._quoters import QUOTER + +# Leading and trailing C0 control and space to be stripped per WHATWG spec. +# == "".join([chr(i) for i in range(0, 0x20 + 1)]) +WHATWG_C0_CONTROL_OR_SPACE = ( + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f " +) + +# Unsafe bytes to be removed per WHATWG spec +UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"] +USES_AUTHORITY = frozenset(uses_netloc) + +SplitURLType = tuple[str, str, str, str, str] + + +def split_url(url: str) -> SplitURLType: + """Split URL into parts.""" + # Adapted from urllib.parse.urlsplit + # Only lstrip url as some applications rely on preserving trailing space. + # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) + url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE) + for b in UNSAFE_URL_BYTES_TO_REMOVE: + if b in url: + url = url.replace(b, "") + + scheme = netloc = query = fragment = "" + i = url.find(":") + if i > 0 and url[0] in scheme_chars: + for c in url[1:i]: + if c not in scheme_chars: + break + else: + scheme, url = url[:i].lower(), url[i + 1 :] + has_hash = "#" in url + has_question_mark = "?" in url + if url[:2] == "//": + delim = len(url) # position of end of domain part of url, default is end + if has_hash and has_question_mark: + delim_chars = "/?#" + elif has_question_mark: + delim_chars = "/?" + elif has_hash: + delim_chars = "/#" + else: + delim_chars = "/" + for c in delim_chars: # look for delimiters; the order is NOT important + wdelim = url.find(c, 2) # find first of this delim + if wdelim >= 0 and wdelim < delim: # if found + delim = wdelim # use earliest delim position + netloc = url[2:delim] + url = url[delim:] + has_left_bracket = "[" in netloc + has_right_bracket = "]" in netloc + if (has_left_bracket and not has_right_bracket) or ( + has_right_bracket and not has_left_bracket + ): + raise ValueError("Invalid IPv6 URL") + if has_left_bracket: + bracketed_host = netloc.partition("[")[2].partition("]")[0] + # Valid bracketed hosts are defined in + # https://www.rfc-editor.org/rfc/rfc3986#page-49 + # https://url.spec.whatwg.org/ + if bracketed_host[0] == "v": + if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host): + raise ValueError("IPvFuture address is invalid") + elif ":" not in bracketed_host: + raise ValueError("An IPv4 address cannot be in brackets") + if has_hash: + url, _, fragment = url.partition("#") + if has_question_mark: + url, _, query = url.partition("?") + if netloc and not netloc.isascii(): + _check_netloc(netloc) + return scheme, netloc, url, query, fragment + + +def _check_netloc(netloc: str) -> None: + # Adapted from urllib.parse._checknetloc + # looking for characters like \u2100 that expand to 'a/c' + # IDNA uses NFKC equivalence, so normalize for this check + + # ignore characters already included + # but not the surrounding text + n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "") + normalized_netloc = unicodedata.normalize("NFKC", n) + if n == normalized_netloc: + return + # Note that there are no unicode decompositions for the character '@' so + # its currently impossible to have test coverage for this branch, however if the + # one should be added in the future we want to make sure its still checked. + for c in "/?#@:": # pragma: no branch + if c in normalized_netloc: + raise ValueError( + f"netloc '{netloc}' contains invalid " + "characters under NFKC normalization" + ) + + +@lru_cache # match the same size as urlsplit +def split_netloc( + netloc: str, +) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]: + """Split netloc into username, password, host and port.""" + if "@" not in netloc: + username: Union[str, None] = None + password: Union[str, None] = None + hostinfo = netloc + else: + userinfo, _, hostinfo = netloc.rpartition("@") + username, have_password, password = userinfo.partition(":") + if not have_password: + password = None + + if "[" in hostinfo: + _, _, bracketed = hostinfo.partition("[") + hostname, _, port_str = bracketed.partition("]") + _, _, port_str = port_str.partition(":") + else: + hostname, _, port_str = hostinfo.partition(":") + + if not port_str: + return username or None, password, hostname or None, None + + try: + port = int(port_str) + except ValueError: + raise ValueError("Invalid URL: port can't be converted to integer") + if not (0 <= port <= 65535): + raise ValueError("Port out of range 0-65535") + return username or None, password, hostname or None, port + + +def unsplit_result( + scheme: str, netloc: str, url: str, query: str, fragment: str +) -> str: + """Unsplit a URL without any normalization.""" + if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//": + if url and url[:1] != "/": + url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}" + else: + url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}" + elif scheme: + url = f"{scheme}:{url}" + if query: + url = f"{url}?{query}" + return f"{url}#{fragment}" if fragment else url + + +@lru_cache # match the same size as urlsplit +def make_netloc( + user: Union[str, None], + password: Union[str, None], + host: Union[str, None], + port: Union[int, None], + encode: bool = False, +) -> str: + """Make netloc from parts. + + The user and password are encoded if encode is True. + + The host must already be encoded with _encode_host. + """ + if host is None: + return "" + ret = host + if port is not None: + ret = f"{ret}:{port}" + if user is None and password is None: + return ret + if password is not None: + if not user: + user = "" + elif encode: + user = QUOTER(user) + if encode: + password = QUOTER(password) + user = f"{user}:{password}" + elif user and encode: + user = QUOTER(user) + return f"{user}@{ret}" if user else ret diff --git a/.venv/lib/python3.12/site-packages/yarl/_path.py b/.venv/lib/python3.12/site-packages/yarl/_path.py new file mode 100644 index 00000000..c22f0b4b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_path.py @@ -0,0 +1,41 @@ +"""Utilities for working with paths.""" + +from collections.abc import Sequence +from contextlib import suppress + + +def normalize_path_segments(segments: Sequence[str]) -> list[str]: + """Drop '.' and '..' from a sequence of str segments""" + + resolved_path: list[str] = [] + + for seg in segments: + if seg == "..": + # ignore any .. segments that would otherwise cause an + # IndexError when popped from resolved_path if + # resolving for rfc3986 + with suppress(IndexError): + resolved_path.pop() + elif seg != ".": + resolved_path.append(seg) + + if segments and segments[-1] in (".", ".."): + # do some post-processing here. + # if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append("") + + return resolved_path + + +def normalize_path(path: str) -> str: + # Drop '.' and '..' from str path + prefix = "" + if path and path[0] == "/": + # preserve the "/" root element of absolute paths, copying it to the + # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. + prefix = "/" + path = path[1:] + + segments = path.split("/") + return prefix + "/".join(normalize_path_segments(segments)) diff --git a/.venv/lib/python3.12/site-packages/yarl/_query.py b/.venv/lib/python3.12/site-packages/yarl/_query.py new file mode 100644 index 00000000..6a663fc9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_query.py @@ -0,0 +1,118 @@ +"""Query string handling.""" + +import math +from collections.abc import Iterable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, SupportsInt, Union + +from multidict import istr + +from ._quoters import QUERY_PART_QUOTER, QUERY_QUOTER + +SimpleQuery = Union[str, int, float] +QueryVariable = Union[SimpleQuery, Sequence[SimpleQuery]] +Query = Union[ + None, str, Mapping[str, QueryVariable], Sequence[tuple[str, QueryVariable]] +] + + +def query_var(v: QueryVariable) -> str: + """Convert a query variable to a string.""" + cls = type(v) + if cls is int: # Fast path for non-subclassed int + return str(v) + if issubclass(cls, str): + if TYPE_CHECKING: + assert isinstance(v, str) + return v + if cls is float or issubclass(cls, float): + if TYPE_CHECKING: + assert isinstance(v, float) + if math.isinf(v): + raise ValueError("float('inf') is not supported") + if math.isnan(v): + raise ValueError("float('nan') is not supported") + return str(float(v)) + if cls is not bool and isinstance(cls, SupportsInt): + return str(int(v)) + raise TypeError( + "Invalid variable type: value " + "should be str, int or float, got {!r} " + "of type {}".format(v, cls) + ) + + +def get_str_query_from_sequence_iterable( + items: Iterable[tuple[Union[str, istr], QueryVariable]], +) -> str: + """Return a query string from a sequence of (key, value) pairs. + + value is a single value or a sequence of values for the key + + The sequence of values must be a list or tuple. + """ + quoter = QUERY_PART_QUOTER + pairs = [ + f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" + for k, val in items + for v in ( + val if type(val) is not str and isinstance(val, (list, tuple)) else (val,) + ) + ] + return "&".join(pairs) + + +def get_str_query_from_iterable( + items: Iterable[tuple[Union[str, istr], SimpleQuery]] +) -> str: + """Return a query string from an iterable. + + The iterable must contain (key, value) pairs. + + The values are not allowed to be sequences, only single values are + allowed. For sequences, use `_get_str_query_from_sequence_iterable`. + """ + quoter = QUERY_PART_QUOTER + # A listcomp is used since listcomps are inlined on CPython 3.12+ and + # they are a bit faster than a generator expression. + pairs = [ + f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" for k, v in items + ] + return "&".join(pairs) + + +def get_str_query(*args: Any, **kwargs: Any) -> Union[str, None]: + """Return a query string from supported args.""" + query: Union[str, Mapping[str, QueryVariable], None] + if kwargs: + if args: + msg = "Either kwargs or single query parameter must be present" + raise ValueError(msg) + query = kwargs + elif len(args) == 1: + query = args[0] + else: + raise ValueError("Either kwargs or single query parameter must be present") + + if query is None: + return None + if not query: + return "" + if type(query) is dict: + return get_str_query_from_sequence_iterable(query.items()) + if type(query) is str or isinstance(query, str): + return QUERY_QUOTER(query) + if isinstance(query, Mapping): + return get_str_query_from_sequence_iterable(query.items()) + if isinstance(query, (bytes, bytearray, memoryview)): + msg = "Invalid query type: bytes, bytearray and memoryview are forbidden" + raise TypeError(msg) + if isinstance(query, Sequence): + # We don't expect sequence values if we're given a list of pairs + # already; only mappings like builtin `dict` which can't have the + # same key pointing to multiple values are allowed to use + # `_query_seq_pairs`. + return get_str_query_from_iterable(query) + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoters.py b/.venv/lib/python3.12/site-packages/yarl/_quoters.py new file mode 100644 index 00000000..c1d2d7f8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoters.py @@ -0,0 +1,32 @@ +"""Quoting and unquoting utilities for URL parts.""" + +from typing import Union +from urllib.parse import quote + +from ._quoting import _Quoter, _Unquoter + +QUOTER = _Quoter(requote=False) +REQUOTER = _Quoter() +PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False) +PATH_REQUOTER = _Quoter(safe="@:", protected="/+") +QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False) +QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True) +QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False) +FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False) +FRAGMENT_REQUOTER = _Quoter(safe="?/:@") + +UNQUOTER = _Unquoter() +PATH_UNQUOTER = _Unquoter(unsafe="+") +PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") +QS_UNQUOTER = _Unquoter(qs=True) + + +def human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]: + if not s: + return s + for c in "%" + unsafe: + if c in s: + s = s.replace(c, f"%{ord(c):02X}") + if s.isprintable(): + return s + return "".join(c if c.isprintable() else quote(c) for c in s) diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting.py b/.venv/lib/python3.12/site-packages/yarl/_quoting.py new file mode 100644 index 00000000..95e86095 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoting.py @@ -0,0 +1,18 @@ +import os +import sys + +__all__ = ("_Quoter", "_Unquoter") + + +NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool +if sys.implementation.name != "cpython": + NO_EXTENSIONS = True + + +if not NO_EXTENSIONS: # pragma: no branch + try: + from ._quoting_c import _Quoter, _Unquoter + except ImportError: # pragma: no cover + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] +else: + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..0cdf3988 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi new file mode 100644 index 00000000..9a6b79ad --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi @@ -0,0 +1,16 @@ +class _Quoter: + def __init__( + self, + *, + safe: str = ..., + protected: str = ..., + qs: bool = ..., + requote: bool = ... + ) -> None: ... + def __call__(self, val: str = ...) -> str: ... + +class _Unquoter: + def __init__( + self, *, ignore: str = ..., unsafe: str = ..., qs: bool = ... + ) -> None: ... + def __call__(self, val: str = ...) -> str: ... diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx new file mode 100644 index 00000000..067ba96e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx @@ -0,0 +1,423 @@ +# cython: language_level=3 + +from cpython.exc cimport PyErr_NoMemory +from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc +from cpython.unicode cimport ( + PyUnicode_DATA, + PyUnicode_DecodeASCII, + PyUnicode_DecodeUTF8Stateful, + PyUnicode_GET_LENGTH, + PyUnicode_KIND, + PyUnicode_READ, +) +from libc.stdint cimport uint8_t, uint64_t +from libc.string cimport memcpy, memset + +from string import ascii_letters, digits + + +cdef str GEN_DELIMS = ":/?#[]@" +cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*," +cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;' +cdef str RESERVED = GEN_DELIMS + SUB_DELIMS +cdef str UNRESERVED = ascii_letters + digits + '-._~' +cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS +cdef str QS = '+&=;' + +DEF BUF_SIZE = 8 * 1024 # 8KiB +cdef char BUFFER[BUF_SIZE] + +cdef inline Py_UCS4 _to_hex(uint8_t v) noexcept: + if v < 10: + return <Py_UCS4>(v+0x30) # ord('0') == 0x30 + else: + return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41 + + +cdef inline int _from_hex(Py_UCS4 v) noexcept: + if '0' <= v <= '9': + return <int>(v) - 0x30 # ord('0') == 0x30 + elif 'A' <= v <= 'F': + return <int>(v) - 0x41 + 10 # ord('A') == 0x41 + elif 'a' <= v <= 'f': + return <int>(v) - 0x61 + 10 # ord('a') == 0x61 + else: + return -1 + + +cdef inline int _is_lower_hex(Py_UCS4 v) noexcept: + return 'a' <= v <= 'f' + + +cdef inline Py_UCS4 _restore_ch(Py_UCS4 d1, Py_UCS4 d2): + cdef int digit1 = _from_hex(d1) + if digit1 < 0: + return <Py_UCS4>-1 + cdef int digit2 = _from_hex(d2) + if digit2 < 0: + return <Py_UCS4>-1 + return <Py_UCS4>(digit1 << 4 | digit2) + + +cdef uint8_t ALLOWED_TABLE[16] +cdef uint8_t ALLOWED_NOTQS_TABLE[16] + + +cdef inline bint bit_at(uint8_t array[], uint64_t ch) noexcept: + return array[ch >> 3] & (1 << (ch & 7)) + + +cdef inline void set_bit(uint8_t array[], uint64_t ch) noexcept: + array[ch >> 3] |= (1 << (ch & 7)) + + +memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE)) +memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE)) + +for i in range(128): + if chr(i) in ALLOWED: + set_bit(ALLOWED_TABLE, i) + set_bit(ALLOWED_NOTQS_TABLE, i) + if chr(i) in QS: + set_bit(ALLOWED_NOTQS_TABLE, i) + +# ----------------- writer --------------------------- + +cdef struct Writer: + char *buf + Py_ssize_t size + Py_ssize_t pos + bint changed + + +cdef inline void _init_writer(Writer* writer): + writer.buf = &BUFFER[0] + writer.size = BUF_SIZE + writer.pos = 0 + writer.changed = 0 + + +cdef inline void _release_writer(Writer* writer): + if writer.buf != BUFFER: + PyMem_Free(writer.buf) + + +cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed): + cdef char * buf + cdef Py_ssize_t size + + if writer.pos == writer.size: + # reallocate + size = writer.size + BUF_SIZE + if writer.buf == BUFFER: + buf = <char*>PyMem_Malloc(size) + if buf == NULL: + PyErr_NoMemory() + return -1 + memcpy(buf, writer.buf, writer.size) + else: + buf = <char*>PyMem_Realloc(writer.buf, size) + if buf == NULL: + PyErr_NoMemory() + return -1 + writer.buf = buf + writer.size = size + writer.buf[writer.pos] = <char>ch + writer.pos += 1 + writer.changed |= changed + return 0 + + +cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed): + if _write_char(writer, '%', changed) < 0: + return -1 + if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0: + return -1 + return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed) + + +cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol): + cdef uint64_t utf = <uint64_t> symbol + + if utf < 0x80: + return _write_pct(writer, <uint8_t>utf, True) + elif utf < 0x800: + if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + elif 0xD800 <= utf <= 0xDFFF: + # surogate pair, ignored + return 0 + elif utf < 0x10000: + if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + elif utf > 0x10FFFF: + # symbol is too large + return 0 + else: + if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)), + True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + + +# --------------------- end writer -------------------------- + + +cdef class _Quoter: + cdef bint _qs + cdef bint _requote + + cdef uint8_t _safe_table[16] + cdef uint8_t _protected_table[16] + + def __init__( + self, *, str safe='', str protected='', bint qs=False, bint requote=True, + ): + cdef Py_UCS4 ch + + self._qs = qs + self._requote = requote + + if not self._qs: + memcpy(self._safe_table, + ALLOWED_NOTQS_TABLE, + sizeof(self._safe_table)) + else: + memcpy(self._safe_table, + ALLOWED_TABLE, + sizeof(self._safe_table)) + for ch in safe: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + + memset(self._protected_table, 0, sizeof(self._protected_table)) + for ch in protected: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + set_bit(self._protected_table, ch) + + def __call__(self, val): + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + return self._do_quote_or_skip(<str>val) + + cdef str _do_quote_or_skip(self, str val): + cdef Py_UCS4 ch + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val) + cdef Py_ssize_t idx = length + cdef bint must_quote = 0 + cdef Writer writer + cdef int kind = PyUnicode_KIND(val) + cdef const void *data = PyUnicode_DATA(val) + + # If everything in the string is in the safe + # table and all ASCII, we can skip quoting + while idx: + idx -= 1 + ch = PyUnicode_READ(kind, data, idx) + if ch >= 128 or not bit_at(self._safe_table, ch): + must_quote = 1 + break + + if not must_quote: + return val + + _init_writer(&writer) + try: + return self._do_quote(<str>val, length, kind, data, &writer) + finally: + _release_writer(&writer) + + cdef str _do_quote( + self, + str val, + Py_ssize_t length, + int kind, + const void *data, + Writer *writer + ): + cdef Py_UCS4 ch + cdef int changed + cdef Py_ssize_t idx = 0 + + while idx < length: + ch = PyUnicode_READ(kind, data, idx) + idx += 1 + if ch == '%' and self._requote and idx <= length - 2: + ch = _restore_ch( + PyUnicode_READ(kind, data, idx), + PyUnicode_READ(kind, data, idx + 1) + ) + if ch != <Py_UCS4>-1: + idx += 2 + if ch < 128: + if bit_at(self._protected_table, ch): + if _write_pct(writer, ch, True) < 0: + raise + continue + + if bit_at(self._safe_table, ch): + if _write_char(writer, ch, True) < 0: + raise + continue + + changed = (_is_lower_hex(PyUnicode_READ(kind, data, idx - 2)) or + _is_lower_hex(PyUnicode_READ(kind, data, idx - 1))) + if _write_pct(writer, ch, changed) < 0: + raise + continue + else: + ch = '%' + + if self._write(writer, ch) < 0: + raise + + if not writer.changed: + return val + else: + return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict") + + cdef inline int _write(self, Writer *writer, Py_UCS4 ch): + if self._qs: + if ch == ' ': + return _write_char(writer, '+', True) + + if ch < 128 and bit_at(self._safe_table, ch): + return _write_char(writer, ch, False) + + return _write_utf8(writer, ch) + + +cdef class _Unquoter: + cdef str _ignore + cdef str _unsafe + cdef bint _qs + cdef _Quoter _quoter + cdef _Quoter _qs_quoter + + def __init__(self, *, ignore="", unsafe="", qs=False): + self._ignore = ignore + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val): + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + return self._do_unquote(<str>val) + + cdef str _do_unquote(self, str val): + cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val) + if length == 0: + return val + + cdef list ret = [] + cdef char buffer[4] + cdef Py_ssize_t buflen = 0 + cdef Py_ssize_t consumed + cdef str unquoted + cdef Py_UCS4 ch = 0 + cdef Py_ssize_t idx = 0 + cdef Py_ssize_t start_pct + cdef int kind = PyUnicode_KIND(val) + cdef const void *data = PyUnicode_DATA(val) + cdef bint changed = 0 + while idx < length: + ch = PyUnicode_READ(kind, data, idx) + idx += 1 + if ch == '%' and idx <= length - 2: + changed = 1 + ch = _restore_ch( + PyUnicode_READ(kind, data, idx), + PyUnicode_READ(kind, data, idx + 1) + ) + if ch != <Py_UCS4>-1: + idx += 2 + assert buflen < 4 + buffer[buflen] = ch + buflen += 1 + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + start_pct = idx - buflen * 3 + buffer[0] = ch + buflen = 1 + ret.append(val[start_pct : idx - 3]) + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + buflen = 0 + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + assert consumed == 0 + continue + assert consumed == buflen + buflen = 0 + if self._qs and unquoted in '+=&;': + ret.append(self._qs_quoter(unquoted)) + elif unquoted in self._unsafe or unquoted in self._ignore: + ret.append(self._quoter(unquoted)) + else: + ret.append(unquoted) + continue + else: + ch = '%' + + if buflen: + start_pct = idx - 1 - buflen * 3 + ret.append(val[start_pct : idx - 1]) + buflen = 0 + + if ch == '+': + if not self._qs or ch in self._unsafe: + ret.append('+') + else: + changed = 1 + ret.append(' ') + continue + + if ch in self._unsafe: + changed = 1 + ret.append('%') + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if not changed: + return val + + if buflen: + ret.append(val[length - buflen * 3 : length]) + + return ''.join(ret) diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py b/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py new file mode 100644 index 00000000..7256acd8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py @@ -0,0 +1,197 @@ +import codecs +import re +from string import ascii_letters, ascii_lowercase, digits +from typing import cast + +BASCII_LOWERCASE = ascii_lowercase.encode("ascii") +BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)} +GEN_DELIMS = ":/?#[]@" +SUB_DELIMS_WITHOUT_QS = "!$'()*," +SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;" +RESERVED = GEN_DELIMS + SUB_DELIMS +UNRESERVED = ascii_letters + digits + "-._~" +ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS + + +_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]") +_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]") + +utf8_decoder = codecs.getincrementaldecoder("utf-8") + + +class _Quoter: + def __init__( + self, + *, + safe: str = "", + protected: str = "", + qs: bool = False, + requote: bool = True, + ) -> None: + self._safe = safe + self._protected = protected + self._qs = qs + self._requote = requote + + def __call__(self, val: str) -> str: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + bval = val.encode("utf8", errors="ignore") + ret = bytearray() + pct = bytearray() + safe = self._safe + safe += ALLOWED + if not self._qs: + safe += "+&=;" + safe += self._protected + bsafe = safe.encode("ascii") + idx = 0 + while idx < len(bval): + ch = bval[idx] + idx += 1 + + if pct: + if ch in BASCII_LOWERCASE: + ch = ch - 32 # convert to uppercase + pct.append(ch) + if len(pct) == 3: # pragma: no branch # peephole optimizer + buf = pct[1:] + if not _IS_HEX.match(buf): + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + try: + unquoted = chr(int(pct[1:].decode("ascii"), base=16)) + except ValueError: + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + + if unquoted in self._protected: + ret.extend(pct) + elif unquoted in safe: + ret.append(ord(unquoted)) + else: + ret.extend(pct) + pct.clear() + + # special case, if we have only one char after "%" + elif len(pct) == 2 and idx == len(bval): + ret.extend(b"%25") + pct.clear() + idx -= 1 + + continue + + elif ch == ord("%") and self._requote: + pct.clear() + pct.append(ch) + + # special case if "%" is last char + if idx == len(bval): + ret.extend(b"%25") + + continue + + if self._qs and ch == ord(" "): + ret.append(ord("+")) + continue + if ch in bsafe: + ret.append(ch) + continue + + ret.extend((f"%{ch:02X}").encode("ascii")) + + ret2 = ret.decode("ascii") + if ret2 == val: + return val + return ret2 + + +class _Unquoter: + def __init__(self, *, ignore: str = "", unsafe: str = "", qs: bool = False) -> None: + self._ignore = ignore + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val: str) -> str: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder()) + ret = [] + idx = 0 + while idx < len(val): + ch = val[idx] + idx += 1 + if ch == "%" and idx <= len(val) - 2: + pct = val[idx : idx + 2] + if _IS_HEX_STR.fullmatch(pct): + b = bytes([int(pct, base=16)]) + idx += 2 + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + start_pct = idx - 3 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 3]) + decoder.reset() + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + continue + if self._qs and unquoted in "+=&;": + to_add = self._qs_quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + elif unquoted in self._unsafe or unquoted in self._ignore: + to_add = self._quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + else: + ret.append(unquoted) + continue + + if decoder.buffer: + start_pct = idx - 1 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 1]) + decoder.reset() + + if ch == "+": + if not self._qs or ch in self._unsafe: + ret.append("+") + else: + ret.append(" ") + continue + + if ch in self._unsafe: + ret.append("%") + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if decoder.buffer: + ret.append(val[-len(decoder.buffer) * 3 :]) + + ret2 = "".join(ret) + if ret2 == val: + return val + return ret2 diff --git a/.venv/lib/python3.12/site-packages/yarl/_url.py b/.venv/lib/python3.12/site-packages/yarl/_url.py new file mode 100644 index 00000000..4e4b8a37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/_url.py @@ -0,0 +1,1584 @@ +import re +import sys +import warnings +from collections.abc import Mapping, Sequence +from enum import Enum +from functools import _CacheInfo, lru_cache +from ipaddress import ip_address +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload +from urllib.parse import SplitResult, parse_qsl, uses_relative + +import idna +from multidict import MultiDict, MultiDictProxy +from propcache.api import under_cached_property as cached_property + +from ._parse import ( + USES_AUTHORITY, + SplitURLType, + make_netloc, + split_netloc, + split_url, + unsplit_result, +) +from ._path import normalize_path, normalize_path_segments +from ._query import ( + Query, + QueryVariable, + SimpleQuery, + get_str_query, + get_str_query_from_iterable, + get_str_query_from_sequence_iterable, +) +from ._quoters import ( + FRAGMENT_QUOTER, + FRAGMENT_REQUOTER, + PATH_QUOTER, + PATH_REQUOTER, + PATH_SAFE_UNQUOTER, + PATH_UNQUOTER, + QS_UNQUOTER, + QUERY_QUOTER, + QUERY_REQUOTER, + QUOTER, + REQUOTER, + UNQUOTER, + human_quote, +) + +DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21} +USES_RELATIVE = frozenset(uses_relative) + +# Special schemes https://url.spec.whatwg.org/#special-scheme +# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation +SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp")) + + +# reg-name: unreserved / pct-encoded / sub-delims +# this pattern matches anything that is *not* in those classes. and is only used +# on lower-cased ASCII values. +NOT_REG_NAME = re.compile( + r""" + # any character not in the unreserved or sub-delims sets, plus % + # (validated with the additional check for pct-encoded sequences below) + [^a-z0-9\-._~!$&'()*+,;=%] + | + # % only allowed if it is part of a pct-encoded + # sequence of 2 hex digits. + %(?![0-9a-f]{2}) + """, + re.VERBOSE, +) + +_T = TypeVar("_T") + +if sys.version_info >= (3, 11): + from typing import Self +else: + Self = Any + + +class UndefinedType(Enum): + """Singleton type for use with not set sentinel values.""" + + _singleton = 0 + + +UNDEFINED = UndefinedType._singleton + + +class CacheInfo(TypedDict): + """Host encoding cache.""" + + idna_encode: _CacheInfo + idna_decode: _CacheInfo + ip_address: _CacheInfo + host_validate: _CacheInfo + encode_host: _CacheInfo + + +class _InternalURLCache(TypedDict, total=False): + _val: SplitURLType + _origin: "URL" + absolute: bool + scheme: str + raw_authority: str + authority: str + raw_user: Union[str, None] + user: Union[str, None] + raw_password: Union[str, None] + password: Union[str, None] + raw_host: Union[str, None] + host: Union[str, None] + host_subcomponent: Union[str, None] + host_port_subcomponent: Union[str, None] + port: Union[int, None] + explicit_port: Union[int, None] + raw_path: str + path: str + _parsed_query: list[tuple[str, str]] + query: "MultiDictProxy[str]" + raw_query_string: str + query_string: str + path_qs: str + raw_path_qs: str + raw_fragment: str + fragment: str + raw_parts: tuple[str, ...] + parts: tuple[str, ...] + parent: "URL" + raw_name: str + name: str + raw_suffix: str + suffix: str + raw_suffixes: tuple[str, ...] + suffixes: tuple[str, ...] + + +def rewrite_module(obj: _T) -> _T: + obj.__module__ = "yarl" + return obj + + +@lru_cache +def encode_url(url_str: str) -> "URL": + """Parse unencoded URL.""" + cache: _InternalURLCache = {} + host: Union[str, None] + scheme, netloc, path, query, fragment = split_url(url_str) + if not netloc: # netloc + host = "" + else: + if ":" in netloc or "@" in netloc or "[" in netloc: + # Complex netloc + username, password, host, port = split_netloc(netloc) + else: + username = password = port = None + host = netloc + if host is None: + if scheme in SCHEME_REQUIRES_HOST: + msg = ( + "Invalid URL: host is required for " + f"absolute urls with the {scheme} scheme" + ) + raise ValueError(msg) + else: + host = "" + host = _encode_host(host, validate_host=False) + # Remove brackets as host encoder adds back brackets for IPv6 addresses + cache["raw_host"] = host[1:-1] if "[" in host else host + cache["explicit_port"] = port + if password is None and username is None: + # Fast path for URLs without user, password + netloc = host if port is None else f"{host}:{port}" + cache["raw_user"] = None + cache["raw_password"] = None + else: + raw_user = REQUOTER(username) if username else username + raw_password = REQUOTER(password) if password else password + netloc = make_netloc(raw_user, raw_password, host, port) + cache["raw_user"] = raw_user + cache["raw_password"] = raw_password + + if path: + path = PATH_REQUOTER(path) + if netloc and "." in path: + path = normalize_path(path) + if query: + query = QUERY_REQUOTER(query) + if fragment: + fragment = FRAGMENT_REQUOTER(fragment) + + cache["scheme"] = scheme + cache["raw_path"] = "/" if not path and netloc else path + cache["raw_query_string"] = query + cache["raw_fragment"] = fragment + + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = cache + return self + + +@lru_cache +def pre_encoded_url(url_str: str) -> "URL": + """Parse pre-encoded URL.""" + self = object.__new__(URL) + val = split_url(url_str) + self._scheme, self._netloc, self._path, self._query, self._fragment = val + self._cache = {} + return self + + +@lru_cache +def build_pre_encoded_url( + scheme: str, + authority: str, + user: Union[str, None], + password: Union[str, None], + host: str, + port: Union[int, None], + path: str, + query_string: str, + fragment: str, +) -> "URL": + """Build a pre-encoded URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + if authority: + self._netloc = authority + elif host: + if port is not None: + port = None if port == DEFAULT_PORTS.get(scheme) else port + if user is None and password is None: + self._netloc = host if port is None else f"{host}:{port}" + else: + self._netloc = make_netloc(user, password, host, port) + else: + self._netloc = "" + self._path = path + self._query = query_string + self._fragment = fragment + self._cache = {} + return self + + +def from_parts_uncached( + scheme: str, netloc: str, path: str, query: str, fragment: str +) -> "URL": + """Create a new URL from parts.""" + self = object.__new__(URL) + self._scheme = scheme + self._netloc = netloc + self._path = path + self._query = query + self._fragment = fragment + self._cache = {} + return self + + +from_parts = lru_cache(from_parts_uncached) + + +@rewrite_module +class URL: + # Don't derive from str + # follow pathlib.Path design + # probably URL will not suffer from pathlib problems: + # it's intended for libraries like aiohttp, + # not to be passed into standard library functions like os.open etc. + + # URL grammar (RFC 3986) + # pct-encoded = "%" HEXDIG HEXDIG + # reserved = gen-delims / sub-delims + # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + # / "*" / "+" / "," / ";" / "=" + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + # hier-part = "//" authority path-abempty + # / path-absolute + # / path-rootless + # / path-empty + # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + # authority = [ userinfo "@" ] host [ ":" port ] + # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + # host = IP-literal / IPv4address / reg-name + # IP-literal = "[" ( IPv6address / IPvFuture ) "]" + # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + # IPv6address = 6( h16 ":" ) ls32 + # / "::" 5( h16 ":" ) ls32 + # / [ h16 ] "::" 4( h16 ":" ) ls32 + # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + # / [ *4( h16 ":" ) h16 ] "::" ls32 + # / [ *5( h16 ":" ) h16 ] "::" h16 + # / [ *6( h16 ":" ) h16 ] "::" + # ls32 = ( h16 ":" h16 ) / IPv4address + # ; least-significant 32 bits of address + # h16 = 1*4HEXDIG + # ; 16 bits of address represented in hexadecimal + # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + # dec-octet = DIGIT ; 0-9 + # / %x31-39 DIGIT ; 10-99 + # / "1" 2DIGIT ; 100-199 + # / "2" %x30-34 DIGIT ; 200-249 + # / "25" %x30-35 ; 250-255 + # reg-name = *( unreserved / pct-encoded / sub-delims ) + # port = *DIGIT + # path = path-abempty ; begins with "/" or is empty + # / path-absolute ; begins with "/" but not "//" + # / path-noscheme ; begins with a non-colon segment + # / path-rootless ; begins with a segment + # / path-empty ; zero characters + # path-abempty = *( "/" segment ) + # path-absolute = "/" [ segment-nz *( "/" segment ) ] + # path-noscheme = segment-nz-nc *( "/" segment ) + # path-rootless = segment-nz *( "/" segment ) + # path-empty = 0<pchar> + # segment = *pchar + # segment-nz = 1*pchar + # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + # ; non-zero-length segment without any colon ":" + # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + # query = *( pchar / "/" / "?" ) + # fragment = *( pchar / "/" / "?" ) + # URI-reference = URI / relative-ref + # relative-ref = relative-part [ "?" query ] [ "#" fragment ] + # relative-part = "//" authority path-abempty + # / path-absolute + # / path-noscheme + # / path-empty + # absolute-URI = scheme ":" hier-part [ "?" query ] + __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment") + + _scheme: str + _netloc: str + _path: str + _query: str + _fragment: str + + def __new__( + cls, + val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED, + *, + encoded: bool = False, + strict: Union[bool, None] = None, + ) -> "URL": + if strict is not None: # pragma: no cover + warnings.warn("strict parameter is ignored") + if type(val) is str: + return pre_encoded_url(val) if encoded else encode_url(val) + if type(val) is cls: + return val + if type(val) is SplitResult: + if not encoded: + raise ValueError("Cannot apply decoding to SplitResult") + return from_parts(*val) + if isinstance(val, str): + return pre_encoded_url(str(val)) if encoded else encode_url(str(val)) + if val is UNDEFINED: + # Special case for UNDEFINED since it might be unpickling and we do + # not want to cache as the `__set_state__` call would mutate the URL + # object in the `pre_encoded_url` or `encoded_url` caches. + self = object.__new__(URL) + self._scheme = self._netloc = self._path = self._query = self._fragment = "" + self._cache = {} + return self + raise TypeError("Constructor parameter should be str") + + @classmethod + def build( + cls, + *, + scheme: str = "", + authority: str = "", + user: Union[str, None] = None, + password: Union[str, None] = None, + host: str = "", + port: Union[int, None] = None, + path: str = "", + query: Union[Query, None] = None, + query_string: str = "", + fragment: str = "", + encoded: bool = False, + ) -> "URL": + """Creates and returns a new URL""" + + if authority and (user or password or host or port): + raise ValueError( + 'Can\'t mix "authority" with "user", "password", "host" or "port".' + ) + if port is not None and not isinstance(port, int): + raise TypeError(f"The port is required to be int, got {type(port)!r}.") + if port and not host: + raise ValueError('Can\'t build URL with "port" but without "host".') + if query and query_string: + raise ValueError('Only one of "query" or "query_string" should be passed') + if ( + scheme is None + or authority is None + or host is None + or path is None + or query_string is None + or fragment is None + ): + raise TypeError( + 'NoneType is illegal for "scheme", "authority", "host", "path", ' + '"query_string", and "fragment" args, use empty string instead.' + ) + + if query: + query_string = get_str_query(query) or "" + + if encoded: + return build_pre_encoded_url( + scheme, + authority, + user, + password, + host, + port, + path, + query_string, + fragment, + ) + + self = object.__new__(URL) + self._scheme = scheme + _host: Union[str, None] = None + if authority: + user, password, _host, port = split_netloc(authority) + _host = _encode_host(_host, validate_host=False) if _host else "" + elif host: + _host = _encode_host(host, validate_host=True) + else: + self._netloc = "" + + if _host is not None: + if port is not None: + port = None if port == DEFAULT_PORTS.get(scheme) else port + if user is None and password is None: + self._netloc = _host if port is None else f"{_host}:{port}" + else: + self._netloc = make_netloc(user, password, _host, port, True) + + path = PATH_QUOTER(path) if path else path + if path and self._netloc: + if "." in path: + path = normalize_path(path) + if path[0] != "/": + msg = ( + "Path in a URL with authority should " + "start with a slash ('/') if set" + ) + raise ValueError(msg) + + self._path = path + if not query and query_string: + query_string = QUERY_QUOTER(query_string) + self._query = query_string + self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment + self._cache = {} + return self + + def __init_subclass__(cls): + raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") + + def __str__(self) -> str: + if not self._path and self._netloc and (self._query or self._fragment): + path = "/" + else: + path = self._path + if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get( + self._scheme + ): + # port normalization - using None for default ports to remove from rendering + # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3 + host = self.host_subcomponent + netloc = make_netloc(self.raw_user, self.raw_password, host, None) + else: + netloc = self._netloc + return unsplit_result(self._scheme, netloc, path, self._query, self._fragment) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}('{str(self)}')" + + def __bytes__(self) -> bytes: + return str(self).encode("ascii") + + def __eq__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + + path1 = "/" if not self._path and self._netloc else self._path + path2 = "/" if not other._path and other._netloc else other._path + return ( + self._scheme == other._scheme + and self._netloc == other._netloc + and path1 == path2 + and self._query == other._query + and self._fragment == other._fragment + ) + + def __hash__(self) -> int: + if (ret := self._cache.get("hash")) is None: + path = "/" if not self._path and self._netloc else self._path + ret = self._cache["hash"] = hash( + (self._scheme, self._netloc, path, self._query, self._fragment) + ) + return ret + + def __le__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val <= other._val + + def __lt__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val < other._val + + def __ge__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val >= other._val + + def __gt__(self, other: object) -> bool: + if type(other) is not URL: + return NotImplemented + return self._val > other._val + + def __truediv__(self, name: str) -> "URL": + if not isinstance(name, str): + return NotImplemented + return self._make_child((str(name),)) + + def __mod__(self, query: Query) -> "URL": + return self.update_query(query) + + def __bool__(self) -> bool: + return bool(self._netloc or self._path or self._query or self._fragment) + + def __getstate__(self) -> tuple[SplitResult]: + return (tuple.__new__(SplitResult, self._val),) + + def __setstate__(self, state): + if state[0] is None and isinstance(state[1], dict): + # default style pickle + val = state[1]["_val"] + else: + val, *unused = state + self._scheme, self._netloc, self._path, self._query, self._fragment = val + self._cache = {} + + def _cache_netloc(self) -> None: + """Cache the netloc parts of the URL.""" + c = self._cache + split_loc = split_netloc(self._netloc) + c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc + + def is_absolute(self) -> bool: + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + Is is preferred to call the .absolute property instead + as it is cached. + """ + return self.absolute + + def is_default_port(self) -> bool: + """A check for default port. + + Return True if port is default for specified scheme, + e.g. 'http://python.org' or 'http://python.org:80', False + otherwise. + + Return False for relative URLs. + + """ + if (explicit := self.explicit_port) is None: + # If the explicit port is None, then the URL must be + # using the default port unless its a relative URL + # which does not have an implicit port / default port + return self._netloc != "" + return explicit == DEFAULT_PORTS.get(self._scheme) + + def origin(self) -> "URL": + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + + """ + # TODO: add a keyword-only option for keeping user/pass maybe? + return self._origin + + @cached_property + def _val(self) -> SplitURLType: + return (self._scheme, self._netloc, self._path, self._query, self._fragment) + + @cached_property + def _origin(self) -> "URL": + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + """ + if not (netloc := self._netloc): + raise ValueError("URL should be absolute") + if not (scheme := self._scheme): + raise ValueError("URL should have scheme") + if "@" in netloc: + encoded_host = self.host_subcomponent + netloc = make_netloc(None, None, encoded_host, self.explicit_port) + elif not self._path and not self._query and not self._fragment: + return self + return from_parts(scheme, netloc, "", "", "") + + def relative(self) -> "URL": + """Return a relative part of the URL. + + scheme, user, password, host and port are removed. + + """ + if not self._netloc: + raise ValueError("URL should be absolute") + return from_parts("", "", self._path, self._query, self._fragment) + + @cached_property + def absolute(self) -> bool: + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + """ + # `netloc`` is an empty string for relative URLs + # Checking `netloc` is faster than checking `hostname` + # because `hostname` is a property that does some extra work + # to parse the host from the `netloc` + return self._netloc != "" + + @cached_property + def scheme(self) -> str: + """Scheme for absolute URLs. + + Empty string for relative URLs or URLs starting with // + + """ + return self._scheme + + @cached_property + def raw_authority(self) -> str: + """Encoded authority part of URL. + + Empty string for relative URLs. + + """ + return self._netloc + + @cached_property + def authority(self) -> str: + """Decoded authority part of URL. + + Empty string for relative URLs. + + """ + return make_netloc(self.user, self.password, self.host, self.port) + + @cached_property + def raw_user(self) -> Union[str, None]: + """Encoded user part of URL. + + None if user is missing. + + """ + # not .username + self._cache_netloc() + return self._cache["raw_user"] + + @cached_property + def user(self) -> Union[str, None]: + """Decoded user part of URL. + + None if user is missing. + + """ + if (raw_user := self.raw_user) is None: + return None + return UNQUOTER(raw_user) + + @cached_property + def raw_password(self) -> Union[str, None]: + """Encoded password part of URL. + + None if password is missing. + + """ + self._cache_netloc() + return self._cache["raw_password"] + + @cached_property + def password(self) -> Union[str, None]: + """Decoded password part of URL. + + None if password is missing. + + """ + if (raw_password := self.raw_password) is None: + return None + return UNQUOTER(raw_password) + + @cached_property + def raw_host(self) -> Union[str, None]: + """Encoded host part of URL. + + None for relative URLs. + + When working with IPv6 addresses, use the `host_subcomponent` property instead + as it will return the host subcomponent with brackets. + """ + # Use host instead of hostname for sake of shortness + # May add .hostname prop later + self._cache_netloc() + return self._cache["raw_host"] + + @cached_property + def host(self) -> Union[str, None]: + """Decoded host part of URL. + + None for relative URLs. + + """ + if (raw := self.raw_host) is None: + return None + if raw and raw[-1].isdigit() or ":" in raw: + # IP addresses are never IDNA encoded + return raw + return _idna_decode(raw) + + @cached_property + def host_subcomponent(self) -> Union[str, None]: + """Return the host subcomponent part of URL. + + None for relative URLs. + + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + + `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` + + Examples: + - `http://example.com:8080` -> `example.com` + - `http://example.com:80` -> `example.com` + - `https://127.0.0.1:8443` -> `127.0.0.1` + - `https://[::1]:8443` -> `[::1]` + - `http://[::1]` -> `[::1]` + + """ + if (raw := self.raw_host) is None: + return None + return f"[{raw}]" if ":" in raw else raw + + @cached_property + def host_port_subcomponent(self) -> Union[str, None]: + """Return the host and port subcomponent part of URL. + + Trailing dots are removed from the host part. + + This value is suitable for use in the Host header of an HTTP request. + + None for relative URLs. + + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + `IP-literal = "[" ( IPv6address / IPvFuture ) "]"` + https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 + port = *DIGIT + + Examples: + - `http://example.com:8080` -> `example.com:8080` + - `http://example.com:80` -> `example.com` + - `http://example.com.:80` -> `example.com` + - `https://127.0.0.1:8443` -> `127.0.0.1:8443` + - `https://[::1]:8443` -> `[::1]:8443` + - `http://[::1]` -> `[::1]` + + """ + if (raw := self.raw_host) is None: + return None + if raw[-1] == ".": + # Remove all trailing dots from the netloc as while + # they are valid FQDNs in DNS, TLS validation fails. + # See https://github.com/aio-libs/aiohttp/issues/3636. + # To avoid string manipulation we only call rstrip if + # the last character is a dot. + raw = raw.rstrip(".") + port = self.explicit_port + if port is None or port == DEFAULT_PORTS.get(self._scheme): + return f"[{raw}]" if ":" in raw else raw + return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}" + + @cached_property + def port(self) -> Union[int, None]: + """Port part of URL, with scheme-based fallback. + + None for relative URLs or URLs without explicit port and + scheme without default port substitution. + + """ + if (explicit_port := self.explicit_port) is not None: + return explicit_port + return DEFAULT_PORTS.get(self._scheme) + + @cached_property + def explicit_port(self) -> Union[int, None]: + """Port part of URL, without scheme-based fallback. + + None for relative URLs or URLs without explicit port. + + """ + self._cache_netloc() + return self._cache["explicit_port"] + + @cached_property + def raw_path(self) -> str: + """Encoded path of URL. + + / for absolute URLs without path part. + + """ + return self._path if self._path or not self._netloc else "/" + + @cached_property + def path(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + """ + return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else "" + + @cached_property + def path_safe(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + / (%2F) and % (%25) are not decoded + + """ + if self._path: + return PATH_SAFE_UNQUOTER(self._path) + return "/" if self._netloc else "" + + @cached_property + def _parsed_query(self) -> list[tuple[str, str]]: + """Parse query part of URL.""" + return parse_qsl(self._query, keep_blank_values=True) + + @cached_property + def query(self) -> "MultiDictProxy[str]": + """A MultiDictProxy representing parsed query parameters in decoded + representation. + + Empty value if URL has no query part. + + """ + return MultiDictProxy(MultiDict(self._parsed_query)) + + @cached_property + def raw_query_string(self) -> str: + """Encoded query part of URL. + + Empty string if query is missing. + + """ + return self._query + + @cached_property + def query_string(self) -> str: + """Decoded query part of URL. + + Empty string if query is missing. + + """ + return QS_UNQUOTER(self._query) if self._query else "" + + @cached_property + def path_qs(self) -> str: + """Decoded path of URL with query.""" + return self.path if not (q := self.query_string) else f"{self.path}?{q}" + + @cached_property + def raw_path_qs(self) -> str: + """Encoded path of URL with query.""" + if q := self._query: + return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}" + return self._path if self._path or not self._netloc else "/" + + @cached_property + def raw_fragment(self) -> str: + """Encoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return self._fragment + + @cached_property + def fragment(self) -> str: + """Decoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return UNQUOTER(self._fragment) if self._fragment else "" + + @cached_property + def raw_parts(self) -> tuple[str, ...]: + """A tuple containing encoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + path = self._path + if self._netloc: + return ("/", *path[1:].split("/")) if path else ("/",) + if path and path[0] == "/": + return ("/", *path[1:].split("/")) + return tuple(path.split("/")) + + @cached_property + def parts(self) -> tuple[str, ...]: + """A tuple containing decoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + return tuple(UNQUOTER(part) for part in self.raw_parts) + + @cached_property + def parent(self) -> "URL": + """A new URL with last part of path removed and cleaned up query and + fragment. + + """ + path = self._path + if not path or path == "/": + if self._fragment or self._query: + return from_parts(self._scheme, self._netloc, path, "", "") + return self + parts = path.split("/") + return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "") + + @cached_property + def raw_name(self) -> str: + """The last part of raw_parts.""" + parts = self.raw_parts + if not self._netloc: + return parts[-1] + parts = parts[1:] + return parts[-1] if parts else "" + + @cached_property + def name(self) -> str: + """The last part of parts.""" + return UNQUOTER(self.raw_name) + + @cached_property + def raw_suffix(self) -> str: + name = self.raw_name + i = name.rfind(".") + return name[i:] if 0 < i < len(name) - 1 else "" + + @cached_property + def suffix(self) -> str: + return UNQUOTER(self.raw_suffix) + + @cached_property + def raw_suffixes(self) -> tuple[str, ...]: + name = self.raw_name + if name.endswith("."): + return () + name = name.lstrip(".") + return tuple("." + suffix for suffix in name.split(".")[1:]) + + @cached_property + def suffixes(self) -> tuple[str, ...]: + return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes) + + def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": + """ + add paths to self._path, accounting for absolute vs relative paths, + keep existing, but do not create new, empty segments + """ + parsed: list[str] = [] + needs_normalize: bool = False + for idx, path in enumerate(reversed(paths)): + # empty segment of last is not removed + last = idx == 0 + if path and path[0] == "/": + raise ValueError( + f"Appending path {path!r} starting from slash is forbidden" + ) + # We need to quote the path if it is not already encoded + # This cannot be done at the end because the existing + # path is already quoted and we do not want to double quote + # the existing path. + path = path if encoded else PATH_QUOTER(path) + needs_normalize |= "." in path + segments = path.split("/") + segments.reverse() + # remove trailing empty segment for all but the last path + parsed += segments[1:] if not last and segments[0] == "" else segments + + if (path := self._path) and (old_segments := path.split("/")): + # If the old path ends with a slash, the last segment is an empty string + # and should be removed before adding the new path segments. + old = old_segments[:-1] if old_segments[-1] == "" else old_segments + old.reverse() + parsed += old + + # If the netloc is present, inject a leading slash when adding a + # path to an absolute URL where there was none before. + if (netloc := self._netloc) and parsed and parsed[-1] != "": + parsed.append("") + + parsed.reverse() + if not netloc or not needs_normalize: + return from_parts(self._scheme, netloc, "/".join(parsed), "", "") + + path = "/".join(normalize_path_segments(parsed)) + # If normalizing the path segments removed the leading slash, add it back. + if path and path[0] != "/": + path = f"/{path}" + return from_parts(self._scheme, netloc, path, "", "") + + def with_scheme(self, scheme: str) -> "URL": + """Return a new URL with scheme replaced.""" + # N.B. doesn't cleanup query/fragment + if not isinstance(scheme, str): + raise TypeError("Invalid scheme type") + lower_scheme = scheme.lower() + netloc = self._netloc + if not netloc and lower_scheme in SCHEME_REQUIRES_HOST: + msg = ( + "scheme replacement is not allowed for " + f"relative URLs for the {lower_scheme} scheme" + ) + raise ValueError(msg) + return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment) + + def with_user(self, user: Union[str, None]) -> "URL": + """Return a new URL with user replaced. + + Autoencode user if needed. + + Clear user/password if user is None. + + """ + # N.B. doesn't cleanup query/fragment + if user is None: + password = None + elif isinstance(user, str): + user = QUOTER(user) + password = self.raw_password + else: + raise TypeError("Invalid user type") + if not (netloc := self._netloc): + raise ValueError("user replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + netloc = make_netloc(user, password, encoded_host, self.explicit_port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_password(self, password: Union[str, None]) -> "URL": + """Return a new URL with password replaced. + + Autoencode password if needed. + + Clear password if argument is None. + + """ + # N.B. doesn't cleanup query/fragment + if password is None: + pass + elif isinstance(password, str): + password = QUOTER(password) + else: + raise TypeError("Invalid password type") + if not (netloc := self._netloc): + raise ValueError("password replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + port = self.explicit_port + netloc = make_netloc(self.raw_user, password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_host(self, host: str) -> "URL": + """Return a new URL with host replaced. + + Autoencode host if needed. + + Changing host for relative URLs is not allowed, use .join() + instead. + + """ + # N.B. doesn't cleanup query/fragment + if not isinstance(host, str): + raise TypeError("Invalid host type") + if not (netloc := self._netloc): + raise ValueError("host replacement is not allowed for relative URLs") + if not host: + raise ValueError("host removing is not allowed") + encoded_host = _encode_host(host, validate_host=True) if host else "" + port = self.explicit_port + netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_port(self, port: Union[int, None]) -> "URL": + """Return a new URL with port replaced. + + Clear port to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if port is not None: + if isinstance(port, bool) or not isinstance(port, int): + raise TypeError(f"port should be int or None, got {type(port)}") + if not (0 <= port <= 65535): + raise ValueError(f"port must be between 0 and 65535, got {port}") + if not (netloc := self._netloc): + raise ValueError("port replacement is not allowed for relative URLs") + encoded_host = self.host_subcomponent or "" + netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port) + return from_parts(self._scheme, netloc, self._path, self._query, self._fragment) + + def with_path( + self, + path: str, + *, + encoded: bool = False, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with path replaced.""" + netloc = self._netloc + if not encoded: + path = PATH_QUOTER(path) + if netloc: + path = normalize_path(path) if "." in path else path + if path and path[0] != "/": + path = f"/{path}" + query = self._query if keep_query else "" + fragment = self._fragment if keep_fragment else "" + return from_parts(self._scheme, netloc, path, query, fragment) + + @overload + def with_query(self, query: Query) -> "URL": ... + + @overload + def with_query(self, **kwargs: QueryVariable) -> "URL": ... + + def with_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part replaced. + + Accepts any Mapping (e.g. dict, multidict.MultiDict instances) + or str, autoencode the argument if needed. + + A sequence of (key, value) pairs is supported as well. + + It also can take an arbitrary number of keyword arguments. + + Clear query if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + query = get_str_query(*args, **kwargs) or "" + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + @overload + def extend_query(self, query: Query) -> "URL": ... + + @overload + def extend_query(self, **kwargs: QueryVariable) -> "URL": ... + + def extend_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part combined with the existing. + + This method will not remove existing query parameters. + + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.extend_query(a=3, c=4) + URL('http://example.com/?a=1&b=2&a=3&c=4') + """ + if not (new_query := get_str_query(*args, **kwargs)): + return self + if query := self._query: + # both strings are already encoded so we can use a simple + # string join + query += new_query if query[-1] == "&" else f"&{new_query}" + else: + query = new_query + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + @overload + def update_query(self, query: Query) -> "URL": ... + + @overload + def update_query(self, **kwargs: QueryVariable) -> "URL": ... + + def update_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part updated. + + This method will overwrite existing query parameters. + + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.update_query(a=3, c=4) + URL('http://example.com/?a=3&b=2&c=4') + """ + in_query: Union[str, Mapping[str, QueryVariable], None] + if kwargs: + if args: + msg = "Either kwargs or single query parameter must be present" + raise ValueError(msg) + in_query = kwargs + elif len(args) == 1: + in_query = args[0] + else: + raise ValueError("Either kwargs or single query parameter must be present") + + if in_query is None: + query = "" + elif not in_query: + query = self._query + elif isinstance(in_query, Mapping): + qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query) + qm.update(in_query) + query = get_str_query_from_sequence_iterable(qm.items()) + elif isinstance(in_query, str): + qstr: MultiDict[str] = MultiDict(self._parsed_query) + qstr.update(parse_qsl(in_query, keep_blank_values=True)) + query = get_str_query_from_iterable(qstr.items()) + elif isinstance(in_query, (bytes, bytearray, memoryview)): + msg = "Invalid query type: bytes, bytearray and memoryview are forbidden" + raise TypeError(msg) + elif isinstance(in_query, Sequence): + # We don't expect sequence values if we're given a list of pairs + # already; only mappings like builtin `dict` which can't have the + # same key pointing to multiple values are allowed to use + # `_query_seq_pairs`. + qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query) + qs.update(in_query) + query = get_str_query_from_iterable(qs.items()) + else: + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) + return from_parts_uncached( + self._scheme, self._netloc, self._path, query, self._fragment + ) + + def without_query_params(self, *query_params: str) -> "URL": + """Remove some keys from query part and return new URL.""" + params_to_remove = set(query_params) & self.query.keys() + if not params_to_remove: + return self + return self.with_query( + tuple( + (name, value) + for name, value in self.query.items() + if name not in params_to_remove + ) + ) + + def with_fragment(self, fragment: Union[str, None]) -> "URL": + """Return a new URL with fragment replaced. + + Autoencode fragment if needed. + + Clear fragment to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if fragment is None: + raw_fragment = "" + elif not isinstance(fragment, str): + raise TypeError("Invalid fragment type") + else: + raw_fragment = FRAGMENT_QUOTER(fragment) + if self._fragment == raw_fragment: + return self + return from_parts( + self._scheme, self._netloc, self._path, self._query, raw_fragment + ) + + def with_name( + self, + name: str, + *, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with name (last part of path) replaced. + + Query and fragment parts are cleaned up. + + Name is encoded if needed. + + """ + # N.B. DOES cleanup query/fragment + if not isinstance(name, str): + raise TypeError("Invalid name type") + if "/" in name: + raise ValueError("Slash in name is not allowed") + name = PATH_QUOTER(name) + if name in (".", ".."): + raise ValueError(". and .. values are forbidden") + parts = list(self.raw_parts) + if netloc := self._netloc: + if len(parts) == 1: + parts.append(name) + else: + parts[-1] = name + parts[0] = "" # replace leading '/' + else: + parts[-1] = name + if parts[0] == "/": + parts[0] = "" # replace leading '/' + + query = self._query if keep_query else "" + fragment = self._fragment if keep_fragment else "" + return from_parts(self._scheme, netloc, "/".join(parts), query, fragment) + + def with_suffix( + self, + suffix: str, + *, + keep_query: bool = False, + keep_fragment: bool = False, + ) -> "URL": + """Return a new URL with suffix (file extension of name) replaced. + + Query and fragment parts are cleaned up. + + suffix is encoded if needed. + """ + if not isinstance(suffix, str): + raise TypeError("Invalid suffix type") + if suffix and not suffix[0] == "." or suffix == ".": + raise ValueError(f"Invalid suffix {suffix!r}") + name = self.raw_name + if not name: + raise ValueError(f"{self!r} has an empty name") + old_suffix = self.raw_suffix + name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix + + return self.with_name(name, keep_query=keep_query, keep_fragment=keep_fragment) + + def join(self, url: "URL") -> "URL": + """Join URLs + + Construct a full (“absolute”) URL by combining a “base URL” + (self) with another URL (url). + + Informally, this uses components of the base URL, in + particular the addressing scheme, the network location and + (part of) the path, to provide missing components in the + relative URL. + + """ + if type(url) is not URL: + raise TypeError("url should be URL") + + scheme = url._scheme or self._scheme + if scheme != self._scheme or scheme not in USES_RELATIVE: + return url + + # scheme is in uses_authority as uses_authority is a superset of uses_relative + if (join_netloc := url._netloc) and scheme in USES_AUTHORITY: + return from_parts(scheme, join_netloc, url._path, url._query, url._fragment) + + orig_path = self._path + if join_path := url._path: + if join_path[0] == "/": + path = join_path + elif not orig_path: + path = f"/{join_path}" + elif orig_path[-1] == "/": + path = f"{orig_path}{join_path}" + else: + # … + # and relativizing ".." + # parts[0] is / for absolute urls, + # this join will add a double slash there + path = "/".join([*self.parts[:-1], ""]) + join_path + # which has to be removed + if orig_path[0] == "/": + path = path[1:] + path = normalize_path(path) if "." in path else path + else: + path = orig_path + + return from_parts( + scheme, + self._netloc, + path, + url._query if join_path or url._query else self._query, + url._fragment if join_path or url._fragment else self._fragment, + ) + + def joinpath(self, *other: str, encoded: bool = False) -> "URL": + """Return a new URL with the elements in other appended to the path.""" + return self._make_child(other, encoded=encoded) + + def human_repr(self) -> str: + """Return decoded human readable string for URL representation.""" + user = human_quote(self.user, "#/:?@[]") + password = human_quote(self.password, "#/:?@[]") + if (host := self.host) and ":" in host: + host = f"[{host}]" + path = human_quote(self.path, "#?") + if TYPE_CHECKING: + assert path is not None + query_string = "&".join( + "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;=")) + for k, v in self.query.items() + ) + fragment = human_quote(self.fragment, "") + if TYPE_CHECKING: + assert fragment is not None + netloc = make_netloc(user, password, host, self.explicit_port) + return unsplit_result(self._scheme, netloc, path, query_string, fragment) + + +_DEFAULT_IDNA_SIZE = 256 +_DEFAULT_ENCODE_SIZE = 512 + + +@lru_cache(_DEFAULT_IDNA_SIZE) +def _idna_decode(raw: str) -> str: + try: + return idna.decode(raw.encode("ascii")) + except UnicodeError: # e.g. '::1' + return raw.encode("ascii").decode("idna") + + +@lru_cache(_DEFAULT_IDNA_SIZE) +def _idna_encode(host: str) -> str: + try: + return idna.encode(host, uts46=True).decode("ascii") + except UnicodeError: + return host.encode("idna").decode("ascii") + + +@lru_cache(_DEFAULT_ENCODE_SIZE) +def _encode_host(host: str, validate_host: bool) -> str: + """Encode host part of URL.""" + # If the host ends with a digit or contains a colon, its likely + # an IP address. + if host and (host[-1].isdigit() or ":" in host): + raw_ip, sep, zone = host.partition("%") + # If it looks like an IP, we check with _ip_compressed_version + # and fall-through if its not an IP address. This is a performance + # optimization to avoid parsing IP addresses as much as possible + # because it is orders of magnitude slower than almost any other + # operation this library does. + # Might be an IP address, check it + # + # IP Addresses can look like: + # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + # - 127.0.0.1 (last character is a digit) + # - 2001:db8::ff00:42:8329 (contains a colon) + # - 2001:db8::ff00:42:8329%eth0 (contains a colon) + # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should + # have been removed before it gets here) + # Rare IP Address formats are not supported per: + # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4 + # + # IP parsing is slow, so its wrapped in an LRU + try: + ip = ip_address(raw_ip) + except ValueError: + pass + else: + # These checks should not happen in the + # LRU to keep the cache size small + host = ip.compressed + if ip.version == 6: + return f"[{host}%{zone}]" if sep else f"[{host}]" + return f"{host}%{zone}" if sep else host + + # IDNA encoding is slow, skip it for ASCII-only strings + if host.isascii(): + # Check for invalid characters explicitly; _idna_encode() does this + # for non-ascii host names. + host = host.lower() + if validate_host and (invalid := NOT_REG_NAME.search(host)): + value, pos, extra = invalid.group(), invalid.start(), "" + if value == "@" or (value == ":" and "@" in host[pos:]): + # this looks like an authority string + extra = ( + ", if the value includes a username or password, " + "use 'authority' instead of 'host'" + ) + raise ValueError( + f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}" + ) from None + return host + + return _idna_encode(host) + + +@rewrite_module +def cache_clear() -> None: + """Clear all LRU caches.""" + _idna_encode.cache_clear() + _idna_decode.cache_clear() + _encode_host.cache_clear() + + +@rewrite_module +def cache_info() -> CacheInfo: + """Report cache statistics.""" + return { + "idna_encode": _idna_encode.cache_info(), + "idna_decode": _idna_decode.cache_info(), + "ip_address": _encode_host.cache_info(), + "host_validate": _encode_host.cache_info(), + "encode_host": _encode_host.cache_info(), + } + + +@rewrite_module +def cache_configure( + *, + idna_encode_size: Union[int, None] = _DEFAULT_IDNA_SIZE, + idna_decode_size: Union[int, None] = _DEFAULT_IDNA_SIZE, + ip_address_size: Union[int, None, UndefinedType] = UNDEFINED, + host_validate_size: Union[int, None, UndefinedType] = UNDEFINED, + encode_host_size: Union[int, None, UndefinedType] = UNDEFINED, +) -> None: + """Configure LRU cache sizes.""" + global _idna_decode, _idna_encode, _encode_host + # ip_address_size, host_validate_size are no longer + # used, but are kept for backwards compatibility. + if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED: + warnings.warn( + "cache_configure() no longer accepts the " + "ip_address_size or host_validate_size arguments, " + "they are used to set the encode_host_size instead " + "and will be removed in the future", + DeprecationWarning, + stacklevel=2, + ) + + if encode_host_size is not None: + for size in (ip_address_size, host_validate_size): + if size is None: + encode_host_size = None + elif encode_host_size is UNDEFINED: + if size is not UNDEFINED: + encode_host_size = size + elif size is not UNDEFINED: + if TYPE_CHECKING: + assert isinstance(size, int) + assert isinstance(encode_host_size, int) + encode_host_size = max(size, encode_host_size) + if encode_host_size is UNDEFINED: + encode_host_size = _DEFAULT_ENCODE_SIZE + + if TYPE_CHECKING: + assert not isinstance(encode_host_size, object) + _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__) + _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__) + _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__) diff --git a/.venv/lib/python3.12/site-packages/yarl/py.typed b/.venv/lib/python3.12/site-packages/yarl/py.typed new file mode 100644 index 00000000..dcf2c804 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/yarl/py.typed @@ -0,0 +1 @@ +# Placeholder |