aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/yarl
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/yarl')
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/__init__.py14
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_parse.py189
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_path.py41
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_query.py118
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_quoters.py32
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_quoting.py18
-rwxr-xr-x.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.sobin0 -> 985824 bytes
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi16
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx423
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_quoting_py.py197
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_url.py1584
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/py.typed1
12 files changed, 2633 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/yarl/__init__.py b/.venv/lib/python3.12/site-packages/yarl/__init__.py
new file mode 100644
index 00000000..36404071
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/__init__.py
@@ -0,0 +1,14 @@
+from ._query import Query, QueryVariable, SimpleQuery
+from ._url import URL, cache_clear, cache_configure, cache_info
+
+__version__ = "1.18.3"
+
+__all__ = (
+ "URL",
+ "SimpleQuery",
+ "QueryVariable",
+ "Query",
+ "cache_clear",
+ "cache_configure",
+ "cache_info",
+)
diff --git a/.venv/lib/python3.12/site-packages/yarl/_parse.py b/.venv/lib/python3.12/site-packages/yarl/_parse.py
new file mode 100644
index 00000000..cc259ea8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_parse.py
@@ -0,0 +1,189 @@
+"""URL parsing utilities."""
+
+import re
+import unicodedata
+from functools import lru_cache
+from typing import Union
+from urllib.parse import scheme_chars, uses_netloc
+
+from ._quoters import QUOTER
+
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+WHATWG_C0_CONTROL_OR_SPACE = (
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "
+)
+
+# Unsafe bytes to be removed per WHATWG spec
+UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
+USES_AUTHORITY = frozenset(uses_netloc)
+
+SplitURLType = tuple[str, str, str, str, str]
+
+
+def split_url(url: str) -> SplitURLType:
+ """Split URL into parts."""
+ # Adapted from urllib.parse.urlsplit
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)
+ for b in UNSAFE_URL_BYTES_TO_REMOVE:
+ if b in url:
+ url = url.replace(b, "")
+
+ scheme = netloc = query = fragment = ""
+ i = url.find(":")
+ if i > 0 and url[0] in scheme_chars:
+ for c in url[1:i]:
+ if c not in scheme_chars:
+ break
+ else:
+ scheme, url = url[:i].lower(), url[i + 1 :]
+ has_hash = "#" in url
+ has_question_mark = "?" in url
+ if url[:2] == "//":
+ delim = len(url) # position of end of domain part of url, default is end
+ if has_hash and has_question_mark:
+ delim_chars = "/?#"
+ elif has_question_mark:
+ delim_chars = "/?"
+ elif has_hash:
+ delim_chars = "/#"
+ else:
+ delim_chars = "/"
+ for c in delim_chars: # look for delimiters; the order is NOT important
+ wdelim = url.find(c, 2) # find first of this delim
+ if wdelim >= 0 and wdelim < delim: # if found
+ delim = wdelim # use earliest delim position
+ netloc = url[2:delim]
+ url = url[delim:]
+ has_left_bracket = "[" in netloc
+ has_right_bracket = "]" in netloc
+ if (has_left_bracket and not has_right_bracket) or (
+ has_right_bracket and not has_left_bracket
+ ):
+ raise ValueError("Invalid IPv6 URL")
+ if has_left_bracket:
+ bracketed_host = netloc.partition("[")[2].partition("]")[0]
+ # Valid bracketed hosts are defined in
+ # https://www.rfc-editor.org/rfc/rfc3986#page-49
+ # https://url.spec.whatwg.org/
+ if bracketed_host[0] == "v":
+ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):
+ raise ValueError("IPvFuture address is invalid")
+ elif ":" not in bracketed_host:
+ raise ValueError("An IPv4 address cannot be in brackets")
+ if has_hash:
+ url, _, fragment = url.partition("#")
+ if has_question_mark:
+ url, _, query = url.partition("?")
+ if netloc and not netloc.isascii():
+ _check_netloc(netloc)
+ return scheme, netloc, url, query, fragment
+
+
+def _check_netloc(netloc: str) -> None:
+ # Adapted from urllib.parse._checknetloc
+ # looking for characters like \u2100 that expand to 'a/c'
+ # IDNA uses NFKC equivalence, so normalize for this check
+
+ # ignore characters already included
+ # but not the surrounding text
+ n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")
+ normalized_netloc = unicodedata.normalize("NFKC", n)
+ if n == normalized_netloc:
+ return
+ # Note that there are no unicode decompositions for the character '@' so
+ # its currently impossible to have test coverage for this branch, however if the
+ # one should be added in the future we want to make sure its still checked.
+ for c in "/?#@:": # pragma: no branch
+ if c in normalized_netloc:
+ raise ValueError(
+ f"netloc '{netloc}' contains invalid "
+ "characters under NFKC normalization"
+ )
+
+
+@lru_cache # match the same size as urlsplit
+def split_netloc(
+ netloc: str,
+) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]:
+ """Split netloc into username, password, host and port."""
+ if "@" not in netloc:
+ username: Union[str, None] = None
+ password: Union[str, None] = None
+ hostinfo = netloc
+ else:
+ userinfo, _, hostinfo = netloc.rpartition("@")
+ username, have_password, password = userinfo.partition(":")
+ if not have_password:
+ password = None
+
+ if "[" in hostinfo:
+ _, _, bracketed = hostinfo.partition("[")
+ hostname, _, port_str = bracketed.partition("]")
+ _, _, port_str = port_str.partition(":")
+ else:
+ hostname, _, port_str = hostinfo.partition(":")
+
+ if not port_str:
+ return username or None, password, hostname or None, None
+
+ try:
+ port = int(port_str)
+ except ValueError:
+ raise ValueError("Invalid URL: port can't be converted to integer")
+ if not (0 <= port <= 65535):
+ raise ValueError("Port out of range 0-65535")
+ return username or None, password, hostname or None, port
+
+
+def unsplit_result(
+ scheme: str, netloc: str, url: str, query: str, fragment: str
+) -> str:
+ """Unsplit a URL without any normalization."""
+ if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":
+ if url and url[:1] != "/":
+ url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"
+ else:
+ url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"
+ elif scheme:
+ url = f"{scheme}:{url}"
+ if query:
+ url = f"{url}?{query}"
+ return f"{url}#{fragment}" if fragment else url
+
+
+@lru_cache # match the same size as urlsplit
+def make_netloc(
+ user: Union[str, None],
+ password: Union[str, None],
+ host: Union[str, None],
+ port: Union[int, None],
+ encode: bool = False,
+) -> str:
+ """Make netloc from parts.
+
+ The user and password are encoded if encode is True.
+
+ The host must already be encoded with _encode_host.
+ """
+ if host is None:
+ return ""
+ ret = host
+ if port is not None:
+ ret = f"{ret}:{port}"
+ if user is None and password is None:
+ return ret
+ if password is not None:
+ if not user:
+ user = ""
+ elif encode:
+ user = QUOTER(user)
+ if encode:
+ password = QUOTER(password)
+ user = f"{user}:{password}"
+ elif user and encode:
+ user = QUOTER(user)
+ return f"{user}@{ret}" if user else ret
diff --git a/.venv/lib/python3.12/site-packages/yarl/_path.py b/.venv/lib/python3.12/site-packages/yarl/_path.py
new file mode 100644
index 00000000..c22f0b4b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_path.py
@@ -0,0 +1,41 @@
+"""Utilities for working with paths."""
+
+from collections.abc import Sequence
+from contextlib import suppress
+
+
+def normalize_path_segments(segments: Sequence[str]) -> list[str]:
+ """Drop '.' and '..' from a sequence of str segments"""
+
+ resolved_path: list[str] = []
+
+ for seg in segments:
+ if seg == "..":
+ # ignore any .. segments that would otherwise cause an
+ # IndexError when popped from resolved_path if
+ # resolving for rfc3986
+ with suppress(IndexError):
+ resolved_path.pop()
+ elif seg != ".":
+ resolved_path.append(seg)
+
+ if segments and segments[-1] in (".", ".."):
+ # do some post-processing here.
+ # if the last segment was a relative dir,
+ # then we need to append the trailing '/'
+ resolved_path.append("")
+
+ return resolved_path
+
+
+def normalize_path(path: str) -> str:
+ # Drop '.' and '..' from str path
+ prefix = ""
+ if path and path[0] == "/":
+ # preserve the "/" root element of absolute paths, copying it to the
+ # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
+ prefix = "/"
+ path = path[1:]
+
+ segments = path.split("/")
+ return prefix + "/".join(normalize_path_segments(segments))
diff --git a/.venv/lib/python3.12/site-packages/yarl/_query.py b/.venv/lib/python3.12/site-packages/yarl/_query.py
new file mode 100644
index 00000000..6a663fc9
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_query.py
@@ -0,0 +1,118 @@
+"""Query string handling."""
+
+import math
+from collections.abc import Iterable, Mapping, Sequence
+from typing import TYPE_CHECKING, Any, SupportsInt, Union
+
+from multidict import istr
+
+from ._quoters import QUERY_PART_QUOTER, QUERY_QUOTER
+
+SimpleQuery = Union[str, int, float]
+QueryVariable = Union[SimpleQuery, Sequence[SimpleQuery]]
+Query = Union[
+ None, str, Mapping[str, QueryVariable], Sequence[tuple[str, QueryVariable]]
+]
+
+
+def query_var(v: QueryVariable) -> str:
+ """Convert a query variable to a string."""
+ cls = type(v)
+ if cls is int: # Fast path for non-subclassed int
+ return str(v)
+ if issubclass(cls, str):
+ if TYPE_CHECKING:
+ assert isinstance(v, str)
+ return v
+ if cls is float or issubclass(cls, float):
+ if TYPE_CHECKING:
+ assert isinstance(v, float)
+ if math.isinf(v):
+ raise ValueError("float('inf') is not supported")
+ if math.isnan(v):
+ raise ValueError("float('nan') is not supported")
+ return str(float(v))
+ if cls is not bool and isinstance(cls, SupportsInt):
+ return str(int(v))
+ raise TypeError(
+ "Invalid variable type: value "
+ "should be str, int or float, got {!r} "
+ "of type {}".format(v, cls)
+ )
+
+
+def get_str_query_from_sequence_iterable(
+ items: Iterable[tuple[Union[str, istr], QueryVariable]],
+) -> str:
+ """Return a query string from a sequence of (key, value) pairs.
+
+ value is a single value or a sequence of values for the key
+
+ The sequence of values must be a list or tuple.
+ """
+ quoter = QUERY_PART_QUOTER
+ pairs = [
+ f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}"
+ for k, val in items
+ for v in (
+ val if type(val) is not str and isinstance(val, (list, tuple)) else (val,)
+ )
+ ]
+ return "&".join(pairs)
+
+
+def get_str_query_from_iterable(
+ items: Iterable[tuple[Union[str, istr], SimpleQuery]]
+) -> str:
+ """Return a query string from an iterable.
+
+ The iterable must contain (key, value) pairs.
+
+ The values are not allowed to be sequences, only single values are
+ allowed. For sequences, use `_get_str_query_from_sequence_iterable`.
+ """
+ quoter = QUERY_PART_QUOTER
+ # A listcomp is used since listcomps are inlined on CPython 3.12+ and
+ # they are a bit faster than a generator expression.
+ pairs = [
+ f"{quoter(k)}={quoter(v if type(v) is str else query_var(v))}" for k, v in items
+ ]
+ return "&".join(pairs)
+
+
+def get_str_query(*args: Any, **kwargs: Any) -> Union[str, None]:
+ """Return a query string from supported args."""
+ query: Union[str, Mapping[str, QueryVariable], None]
+ if kwargs:
+ if args:
+ msg = "Either kwargs or single query parameter must be present"
+ raise ValueError(msg)
+ query = kwargs
+ elif len(args) == 1:
+ query = args[0]
+ else:
+ raise ValueError("Either kwargs or single query parameter must be present")
+
+ if query is None:
+ return None
+ if not query:
+ return ""
+ if type(query) is dict:
+ return get_str_query_from_sequence_iterable(query.items())
+ if type(query) is str or isinstance(query, str):
+ return QUERY_QUOTER(query)
+ if isinstance(query, Mapping):
+ return get_str_query_from_sequence_iterable(query.items())
+ if isinstance(query, (bytes, bytearray, memoryview)):
+ msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
+ raise TypeError(msg)
+ if isinstance(query, Sequence):
+ # We don't expect sequence values if we're given a list of pairs
+ # already; only mappings like builtin `dict` which can't have the
+ # same key pointing to multiple values are allowed to use
+ # `_query_seq_pairs`.
+ return get_str_query_from_iterable(query)
+ raise TypeError(
+ "Invalid query type: only str, mapping or "
+ "sequence of (key, value) pairs is allowed"
+ )
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoters.py b/.venv/lib/python3.12/site-packages/yarl/_quoters.py
new file mode 100644
index 00000000..c1d2d7f8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoters.py
@@ -0,0 +1,32 @@
+"""Quoting and unquoting utilities for URL parts."""
+
+from typing import Union
+from urllib.parse import quote
+
+from ._quoting import _Quoter, _Unquoter
+
+QUOTER = _Quoter(requote=False)
+REQUOTER = _Quoter()
+PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False)
+PATH_REQUOTER = _Quoter(safe="@:", protected="/+")
+QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False)
+QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True)
+QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False)
+FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False)
+FRAGMENT_REQUOTER = _Quoter(safe="?/:@")
+
+UNQUOTER = _Unquoter()
+PATH_UNQUOTER = _Unquoter(unsafe="+")
+PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+")
+QS_UNQUOTER = _Unquoter(qs=True)
+
+
+def human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]:
+ if not s:
+ return s
+ for c in "%" + unsafe:
+ if c in s:
+ s = s.replace(c, f"%{ord(c):02X}")
+ if s.isprintable():
+ return s
+ return "".join(c if c.isprintable() else quote(c) for c in s)
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting.py b/.venv/lib/python3.12/site-packages/yarl/_quoting.py
new file mode 100644
index 00000000..95e86095
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoting.py
@@ -0,0 +1,18 @@
+import os
+import sys
+
+__all__ = ("_Quoter", "_Unquoter")
+
+
+NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool
+if sys.implementation.name != "cpython":
+ NO_EXTENSIONS = True
+
+
+if not NO_EXTENSIONS: # pragma: no branch
+ try:
+ from ._quoting_c import _Quoter, _Unquoter
+ except ImportError: # pragma: no cover
+ from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]
+else:
+ from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment]
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so
new file mode 100755
index 00000000..0cdf3988
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.cpython-312-x86_64-linux-gnu.so
Binary files differ
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi
new file mode 100644
index 00000000..9a6b79ad
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyi
@@ -0,0 +1,16 @@
+class _Quoter:
+ def __init__(
+ self,
+ *,
+ safe: str = ...,
+ protected: str = ...,
+ qs: bool = ...,
+ requote: bool = ...
+ ) -> None: ...
+ def __call__(self, val: str = ...) -> str: ...
+
+class _Unquoter:
+ def __init__(
+ self, *, ignore: str = ..., unsafe: str = ..., qs: bool = ...
+ ) -> None: ...
+ def __call__(self, val: str = ...) -> str: ...
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx
new file mode 100644
index 00000000..067ba96e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_c.pyx
@@ -0,0 +1,423 @@
+# cython: language_level=3
+
+from cpython.exc cimport PyErr_NoMemory
+from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc
+from cpython.unicode cimport (
+ PyUnicode_DATA,
+ PyUnicode_DecodeASCII,
+ PyUnicode_DecodeUTF8Stateful,
+ PyUnicode_GET_LENGTH,
+ PyUnicode_KIND,
+ PyUnicode_READ,
+)
+from libc.stdint cimport uint8_t, uint64_t
+from libc.string cimport memcpy, memset
+
+from string import ascii_letters, digits
+
+
+cdef str GEN_DELIMS = ":/?#[]@"
+cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*,"
+cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;'
+cdef str RESERVED = GEN_DELIMS + SUB_DELIMS
+cdef str UNRESERVED = ascii_letters + digits + '-._~'
+cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
+cdef str QS = '+&=;'
+
+DEF BUF_SIZE = 8 * 1024 # 8KiB
+cdef char BUFFER[BUF_SIZE]
+
+cdef inline Py_UCS4 _to_hex(uint8_t v) noexcept:
+ if v < 10:
+ return <Py_UCS4>(v+0x30) # ord('0') == 0x30
+ else:
+ return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41
+
+
+cdef inline int _from_hex(Py_UCS4 v) noexcept:
+ if '0' <= v <= '9':
+ return <int>(v) - 0x30 # ord('0') == 0x30
+ elif 'A' <= v <= 'F':
+ return <int>(v) - 0x41 + 10 # ord('A') == 0x41
+ elif 'a' <= v <= 'f':
+ return <int>(v) - 0x61 + 10 # ord('a') == 0x61
+ else:
+ return -1
+
+
+cdef inline int _is_lower_hex(Py_UCS4 v) noexcept:
+ return 'a' <= v <= 'f'
+
+
+cdef inline Py_UCS4 _restore_ch(Py_UCS4 d1, Py_UCS4 d2):
+ cdef int digit1 = _from_hex(d1)
+ if digit1 < 0:
+ return <Py_UCS4>-1
+ cdef int digit2 = _from_hex(d2)
+ if digit2 < 0:
+ return <Py_UCS4>-1
+ return <Py_UCS4>(digit1 << 4 | digit2)
+
+
+cdef uint8_t ALLOWED_TABLE[16]
+cdef uint8_t ALLOWED_NOTQS_TABLE[16]
+
+
+cdef inline bint bit_at(uint8_t array[], uint64_t ch) noexcept:
+ return array[ch >> 3] & (1 << (ch & 7))
+
+
+cdef inline void set_bit(uint8_t array[], uint64_t ch) noexcept:
+ array[ch >> 3] |= (1 << (ch & 7))
+
+
+memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE))
+memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE))
+
+for i in range(128):
+ if chr(i) in ALLOWED:
+ set_bit(ALLOWED_TABLE, i)
+ set_bit(ALLOWED_NOTQS_TABLE, i)
+ if chr(i) in QS:
+ set_bit(ALLOWED_NOTQS_TABLE, i)
+
+# ----------------- writer ---------------------------
+
+cdef struct Writer:
+ char *buf
+ Py_ssize_t size
+ Py_ssize_t pos
+ bint changed
+
+
+cdef inline void _init_writer(Writer* writer):
+ writer.buf = &BUFFER[0]
+ writer.size = BUF_SIZE
+ writer.pos = 0
+ writer.changed = 0
+
+
+cdef inline void _release_writer(Writer* writer):
+ if writer.buf != BUFFER:
+ PyMem_Free(writer.buf)
+
+
+cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed):
+ cdef char * buf
+ cdef Py_ssize_t size
+
+ if writer.pos == writer.size:
+ # reallocate
+ size = writer.size + BUF_SIZE
+ if writer.buf == BUFFER:
+ buf = <char*>PyMem_Malloc(size)
+ if buf == NULL:
+ PyErr_NoMemory()
+ return -1
+ memcpy(buf, writer.buf, writer.size)
+ else:
+ buf = <char*>PyMem_Realloc(writer.buf, size)
+ if buf == NULL:
+ PyErr_NoMemory()
+ return -1
+ writer.buf = buf
+ writer.size = size
+ writer.buf[writer.pos] = <char>ch
+ writer.pos += 1
+ writer.changed |= changed
+ return 0
+
+
+cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed):
+ if _write_char(writer, '%', changed) < 0:
+ return -1
+ if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0:
+ return -1
+ return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed)
+
+
+cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol):
+ cdef uint64_t utf = <uint64_t> symbol
+
+ if utf < 0x80:
+ return _write_pct(writer, <uint8_t>utf, True)
+ elif utf < 0x800:
+ if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0:
+ return -1
+ return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
+ elif 0xD800 <= utf <= 0xDFFF:
+ # surogate pair, ignored
+ return 0
+ elif utf < 0x10000:
+ if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0:
+ return -1
+ if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
+ True) < 0:
+ return -1
+ return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
+ elif utf > 0x10FFFF:
+ # symbol is too large
+ return 0
+ else:
+ if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0:
+ return -1
+ if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)),
+ True) < 0:
+ return -1
+ if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)),
+ True) < 0:
+ return -1
+ return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True)
+
+
+# --------------------- end writer --------------------------
+
+
+cdef class _Quoter:
+ cdef bint _qs
+ cdef bint _requote
+
+ cdef uint8_t _safe_table[16]
+ cdef uint8_t _protected_table[16]
+
+ def __init__(
+ self, *, str safe='', str protected='', bint qs=False, bint requote=True,
+ ):
+ cdef Py_UCS4 ch
+
+ self._qs = qs
+ self._requote = requote
+
+ if not self._qs:
+ memcpy(self._safe_table,
+ ALLOWED_NOTQS_TABLE,
+ sizeof(self._safe_table))
+ else:
+ memcpy(self._safe_table,
+ ALLOWED_TABLE,
+ sizeof(self._safe_table))
+ for ch in safe:
+ if ord(ch) > 127:
+ raise ValueError("Only safe symbols with ORD < 128 are allowed")
+ set_bit(self._safe_table, ch)
+
+ memset(self._protected_table, 0, sizeof(self._protected_table))
+ for ch in protected:
+ if ord(ch) > 127:
+ raise ValueError("Only safe symbols with ORD < 128 are allowed")
+ set_bit(self._safe_table, ch)
+ set_bit(self._protected_table, ch)
+
+ def __call__(self, val):
+ if val is None:
+ return None
+ if type(val) is not str:
+ if isinstance(val, str):
+ # derived from str
+ val = str(val)
+ else:
+ raise TypeError("Argument should be str")
+ return self._do_quote_or_skip(<str>val)
+
+ cdef str _do_quote_or_skip(self, str val):
+ cdef Py_UCS4 ch
+ cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
+ cdef Py_ssize_t idx = length
+ cdef bint must_quote = 0
+ cdef Writer writer
+ cdef int kind = PyUnicode_KIND(val)
+ cdef const void *data = PyUnicode_DATA(val)
+
+ # If everything in the string is in the safe
+ # table and all ASCII, we can skip quoting
+ while idx:
+ idx -= 1
+ ch = PyUnicode_READ(kind, data, idx)
+ if ch >= 128 or not bit_at(self._safe_table, ch):
+ must_quote = 1
+ break
+
+ if not must_quote:
+ return val
+
+ _init_writer(&writer)
+ try:
+ return self._do_quote(<str>val, length, kind, data, &writer)
+ finally:
+ _release_writer(&writer)
+
+ cdef str _do_quote(
+ self,
+ str val,
+ Py_ssize_t length,
+ int kind,
+ const void *data,
+ Writer *writer
+ ):
+ cdef Py_UCS4 ch
+ cdef int changed
+ cdef Py_ssize_t idx = 0
+
+ while idx < length:
+ ch = PyUnicode_READ(kind, data, idx)
+ idx += 1
+ if ch == '%' and self._requote and idx <= length - 2:
+ ch = _restore_ch(
+ PyUnicode_READ(kind, data, idx),
+ PyUnicode_READ(kind, data, idx + 1)
+ )
+ if ch != <Py_UCS4>-1:
+ idx += 2
+ if ch < 128:
+ if bit_at(self._protected_table, ch):
+ if _write_pct(writer, ch, True) < 0:
+ raise
+ continue
+
+ if bit_at(self._safe_table, ch):
+ if _write_char(writer, ch, True) < 0:
+ raise
+ continue
+
+ changed = (_is_lower_hex(PyUnicode_READ(kind, data, idx - 2)) or
+ _is_lower_hex(PyUnicode_READ(kind, data, idx - 1)))
+ if _write_pct(writer, ch, changed) < 0:
+ raise
+ continue
+ else:
+ ch = '%'
+
+ if self._write(writer, ch) < 0:
+ raise
+
+ if not writer.changed:
+ return val
+ else:
+ return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict")
+
+ cdef inline int _write(self, Writer *writer, Py_UCS4 ch):
+ if self._qs:
+ if ch == ' ':
+ return _write_char(writer, '+', True)
+
+ if ch < 128 and bit_at(self._safe_table, ch):
+ return _write_char(writer, ch, False)
+
+ return _write_utf8(writer, ch)
+
+
+cdef class _Unquoter:
+ cdef str _ignore
+ cdef str _unsafe
+ cdef bint _qs
+ cdef _Quoter _quoter
+ cdef _Quoter _qs_quoter
+
+ def __init__(self, *, ignore="", unsafe="", qs=False):
+ self._ignore = ignore
+ self._unsafe = unsafe
+ self._qs = qs
+ self._quoter = _Quoter()
+ self._qs_quoter = _Quoter(qs=True)
+
+ def __call__(self, val):
+ if val is None:
+ return None
+ if type(val) is not str:
+ if isinstance(val, str):
+ # derived from str
+ val = str(val)
+ else:
+ raise TypeError("Argument should be str")
+ return self._do_unquote(<str>val)
+
+ cdef str _do_unquote(self, str val):
+ cdef Py_ssize_t length = PyUnicode_GET_LENGTH(val)
+ if length == 0:
+ return val
+
+ cdef list ret = []
+ cdef char buffer[4]
+ cdef Py_ssize_t buflen = 0
+ cdef Py_ssize_t consumed
+ cdef str unquoted
+ cdef Py_UCS4 ch = 0
+ cdef Py_ssize_t idx = 0
+ cdef Py_ssize_t start_pct
+ cdef int kind = PyUnicode_KIND(val)
+ cdef const void *data = PyUnicode_DATA(val)
+ cdef bint changed = 0
+ while idx < length:
+ ch = PyUnicode_READ(kind, data, idx)
+ idx += 1
+ if ch == '%' and idx <= length - 2:
+ changed = 1
+ ch = _restore_ch(
+ PyUnicode_READ(kind, data, idx),
+ PyUnicode_READ(kind, data, idx + 1)
+ )
+ if ch != <Py_UCS4>-1:
+ idx += 2
+ assert buflen < 4
+ buffer[buflen] = ch
+ buflen += 1
+ try:
+ unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
+ NULL, &consumed)
+ except UnicodeDecodeError:
+ start_pct = idx - buflen * 3
+ buffer[0] = ch
+ buflen = 1
+ ret.append(val[start_pct : idx - 3])
+ try:
+ unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen,
+ NULL, &consumed)
+ except UnicodeDecodeError:
+ buflen = 0
+ ret.append(val[idx - 3 : idx])
+ continue
+ if not unquoted:
+ assert consumed == 0
+ continue
+ assert consumed == buflen
+ buflen = 0
+ if self._qs and unquoted in '+=&;':
+ ret.append(self._qs_quoter(unquoted))
+ elif unquoted in self._unsafe or unquoted in self._ignore:
+ ret.append(self._quoter(unquoted))
+ else:
+ ret.append(unquoted)
+ continue
+ else:
+ ch = '%'
+
+ if buflen:
+ start_pct = idx - 1 - buflen * 3
+ ret.append(val[start_pct : idx - 1])
+ buflen = 0
+
+ if ch == '+':
+ if not self._qs or ch in self._unsafe:
+ ret.append('+')
+ else:
+ changed = 1
+ ret.append(' ')
+ continue
+
+ if ch in self._unsafe:
+ changed = 1
+ ret.append('%')
+ h = hex(ord(ch)).upper()[2:]
+ for ch in h:
+ ret.append(ch)
+ continue
+
+ ret.append(ch)
+
+ if not changed:
+ return val
+
+ if buflen:
+ ret.append(val[length - buflen * 3 : length])
+
+ return ''.join(ret)
diff --git a/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py b/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py
new file mode 100644
index 00000000..7256acd8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_quoting_py.py
@@ -0,0 +1,197 @@
+import codecs
+import re
+from string import ascii_letters, ascii_lowercase, digits
+from typing import cast
+
+BASCII_LOWERCASE = ascii_lowercase.encode("ascii")
+BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)}
+GEN_DELIMS = ":/?#[]@"
+SUB_DELIMS_WITHOUT_QS = "!$'()*,"
+SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;"
+RESERVED = GEN_DELIMS + SUB_DELIMS
+UNRESERVED = ascii_letters + digits + "-._~"
+ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS
+
+
+_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]")
+_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]")
+
+utf8_decoder = codecs.getincrementaldecoder("utf-8")
+
+
+class _Quoter:
+ def __init__(
+ self,
+ *,
+ safe: str = "",
+ protected: str = "",
+ qs: bool = False,
+ requote: bool = True,
+ ) -> None:
+ self._safe = safe
+ self._protected = protected
+ self._qs = qs
+ self._requote = requote
+
+ def __call__(self, val: str) -> str:
+ if val is None:
+ return None
+ if not isinstance(val, str):
+ raise TypeError("Argument should be str")
+ if not val:
+ return ""
+ bval = val.encode("utf8", errors="ignore")
+ ret = bytearray()
+ pct = bytearray()
+ safe = self._safe
+ safe += ALLOWED
+ if not self._qs:
+ safe += "+&=;"
+ safe += self._protected
+ bsafe = safe.encode("ascii")
+ idx = 0
+ while idx < len(bval):
+ ch = bval[idx]
+ idx += 1
+
+ if pct:
+ if ch in BASCII_LOWERCASE:
+ ch = ch - 32 # convert to uppercase
+ pct.append(ch)
+ if len(pct) == 3: # pragma: no branch # peephole optimizer
+ buf = pct[1:]
+ if not _IS_HEX.match(buf):
+ ret.extend(b"%25")
+ pct.clear()
+ idx -= 2
+ continue
+ try:
+ unquoted = chr(int(pct[1:].decode("ascii"), base=16))
+ except ValueError:
+ ret.extend(b"%25")
+ pct.clear()
+ idx -= 2
+ continue
+
+ if unquoted in self._protected:
+ ret.extend(pct)
+ elif unquoted in safe:
+ ret.append(ord(unquoted))
+ else:
+ ret.extend(pct)
+ pct.clear()
+
+ # special case, if we have only one char after "%"
+ elif len(pct) == 2 and idx == len(bval):
+ ret.extend(b"%25")
+ pct.clear()
+ idx -= 1
+
+ continue
+
+ elif ch == ord("%") and self._requote:
+ pct.clear()
+ pct.append(ch)
+
+ # special case if "%" is last char
+ if idx == len(bval):
+ ret.extend(b"%25")
+
+ continue
+
+ if self._qs and ch == ord(" "):
+ ret.append(ord("+"))
+ continue
+ if ch in bsafe:
+ ret.append(ch)
+ continue
+
+ ret.extend((f"%{ch:02X}").encode("ascii"))
+
+ ret2 = ret.decode("ascii")
+ if ret2 == val:
+ return val
+ return ret2
+
+
+class _Unquoter:
+ def __init__(self, *, ignore: str = "", unsafe: str = "", qs: bool = False) -> None:
+ self._ignore = ignore
+ self._unsafe = unsafe
+ self._qs = qs
+ self._quoter = _Quoter()
+ self._qs_quoter = _Quoter(qs=True)
+
+ def __call__(self, val: str) -> str:
+ if val is None:
+ return None
+ if not isinstance(val, str):
+ raise TypeError("Argument should be str")
+ if not val:
+ return ""
+ decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder())
+ ret = []
+ idx = 0
+ while idx < len(val):
+ ch = val[idx]
+ idx += 1
+ if ch == "%" and idx <= len(val) - 2:
+ pct = val[idx : idx + 2]
+ if _IS_HEX_STR.fullmatch(pct):
+ b = bytes([int(pct, base=16)])
+ idx += 2
+ try:
+ unquoted = decoder.decode(b)
+ except UnicodeDecodeError:
+ start_pct = idx - 3 - len(decoder.buffer) * 3
+ ret.append(val[start_pct : idx - 3])
+ decoder.reset()
+ try:
+ unquoted = decoder.decode(b)
+ except UnicodeDecodeError:
+ ret.append(val[idx - 3 : idx])
+ continue
+ if not unquoted:
+ continue
+ if self._qs and unquoted in "+=&;":
+ to_add = self._qs_quoter(unquoted)
+ if to_add is None: # pragma: no cover
+ raise RuntimeError("Cannot quote None")
+ ret.append(to_add)
+ elif unquoted in self._unsafe or unquoted in self._ignore:
+ to_add = self._quoter(unquoted)
+ if to_add is None: # pragma: no cover
+ raise RuntimeError("Cannot quote None")
+ ret.append(to_add)
+ else:
+ ret.append(unquoted)
+ continue
+
+ if decoder.buffer:
+ start_pct = idx - 1 - len(decoder.buffer) * 3
+ ret.append(val[start_pct : idx - 1])
+ decoder.reset()
+
+ if ch == "+":
+ if not self._qs or ch in self._unsafe:
+ ret.append("+")
+ else:
+ ret.append(" ")
+ continue
+
+ if ch in self._unsafe:
+ ret.append("%")
+ h = hex(ord(ch)).upper()[2:]
+ for ch in h:
+ ret.append(ch)
+ continue
+
+ ret.append(ch)
+
+ if decoder.buffer:
+ ret.append(val[-len(decoder.buffer) * 3 :])
+
+ ret2 = "".join(ret)
+ if ret2 == val:
+ return val
+ return ret2
diff --git a/.venv/lib/python3.12/site-packages/yarl/_url.py b/.venv/lib/python3.12/site-packages/yarl/_url.py
new file mode 100644
index 00000000..4e4b8a37
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_url.py
@@ -0,0 +1,1584 @@
+import re
+import sys
+import warnings
+from collections.abc import Mapping, Sequence
+from enum import Enum
+from functools import _CacheInfo, lru_cache
+from ipaddress import ip_address
+from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload
+from urllib.parse import SplitResult, parse_qsl, uses_relative
+
+import idna
+from multidict import MultiDict, MultiDictProxy
+from propcache.api import under_cached_property as cached_property
+
+from ._parse import (
+ USES_AUTHORITY,
+ SplitURLType,
+ make_netloc,
+ split_netloc,
+ split_url,
+ unsplit_result,
+)
+from ._path import normalize_path, normalize_path_segments
+from ._query import (
+ Query,
+ QueryVariable,
+ SimpleQuery,
+ get_str_query,
+ get_str_query_from_iterable,
+ get_str_query_from_sequence_iterable,
+)
+from ._quoters import (
+ FRAGMENT_QUOTER,
+ FRAGMENT_REQUOTER,
+ PATH_QUOTER,
+ PATH_REQUOTER,
+ PATH_SAFE_UNQUOTER,
+ PATH_UNQUOTER,
+ QS_UNQUOTER,
+ QUERY_QUOTER,
+ QUERY_REQUOTER,
+ QUOTER,
+ REQUOTER,
+ UNQUOTER,
+ human_quote,
+)
+
+DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21}
+USES_RELATIVE = frozenset(uses_relative)
+
+# Special schemes https://url.spec.whatwg.org/#special-scheme
+# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation
+SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp"))
+
+
+# reg-name: unreserved / pct-encoded / sub-delims
+# this pattern matches anything that is *not* in those classes. and is only used
+# on lower-cased ASCII values.
+NOT_REG_NAME = re.compile(
+ r"""
+ # any character not in the unreserved or sub-delims sets, plus %
+ # (validated with the additional check for pct-encoded sequences below)
+ [^a-z0-9\-._~!$&'()*+,;=%]
+ |
+ # % only allowed if it is part of a pct-encoded
+ # sequence of 2 hex digits.
+ %(?![0-9a-f]{2})
+ """,
+ re.VERBOSE,
+)
+
+_T = TypeVar("_T")
+
+if sys.version_info >= (3, 11):
+ from typing import Self
+else:
+ Self = Any
+
+
+class UndefinedType(Enum):
+ """Singleton type for use with not set sentinel values."""
+
+ _singleton = 0
+
+
+UNDEFINED = UndefinedType._singleton
+
+
+class CacheInfo(TypedDict):
+ """Host encoding cache."""
+
+ idna_encode: _CacheInfo
+ idna_decode: _CacheInfo
+ ip_address: _CacheInfo
+ host_validate: _CacheInfo
+ encode_host: _CacheInfo
+
+
+class _InternalURLCache(TypedDict, total=False):
+ _val: SplitURLType
+ _origin: "URL"
+ absolute: bool
+ scheme: str
+ raw_authority: str
+ authority: str
+ raw_user: Union[str, None]
+ user: Union[str, None]
+ raw_password: Union[str, None]
+ password: Union[str, None]
+ raw_host: Union[str, None]
+ host: Union[str, None]
+ host_subcomponent: Union[str, None]
+ host_port_subcomponent: Union[str, None]
+ port: Union[int, None]
+ explicit_port: Union[int, None]
+ raw_path: str
+ path: str
+ _parsed_query: list[tuple[str, str]]
+ query: "MultiDictProxy[str]"
+ raw_query_string: str
+ query_string: str
+ path_qs: str
+ raw_path_qs: str
+ raw_fragment: str
+ fragment: str
+ raw_parts: tuple[str, ...]
+ parts: tuple[str, ...]
+ parent: "URL"
+ raw_name: str
+ name: str
+ raw_suffix: str
+ suffix: str
+ raw_suffixes: tuple[str, ...]
+ suffixes: tuple[str, ...]
+
+
+def rewrite_module(obj: _T) -> _T:
+ obj.__module__ = "yarl"
+ return obj
+
+
+@lru_cache
+def encode_url(url_str: str) -> "URL":
+ """Parse unencoded URL."""
+ cache: _InternalURLCache = {}
+ host: Union[str, None]
+ scheme, netloc, path, query, fragment = split_url(url_str)
+ if not netloc: # netloc
+ host = ""
+ else:
+ if ":" in netloc or "@" in netloc or "[" in netloc:
+ # Complex netloc
+ username, password, host, port = split_netloc(netloc)
+ else:
+ username = password = port = None
+ host = netloc
+ if host is None:
+ if scheme in SCHEME_REQUIRES_HOST:
+ msg = (
+ "Invalid URL: host is required for "
+ f"absolute urls with the {scheme} scheme"
+ )
+ raise ValueError(msg)
+ else:
+ host = ""
+ host = _encode_host(host, validate_host=False)
+ # Remove brackets as host encoder adds back brackets for IPv6 addresses
+ cache["raw_host"] = host[1:-1] if "[" in host else host
+ cache["explicit_port"] = port
+ if password is None and username is None:
+ # Fast path for URLs without user, password
+ netloc = host if port is None else f"{host}:{port}"
+ cache["raw_user"] = None
+ cache["raw_password"] = None
+ else:
+ raw_user = REQUOTER(username) if username else username
+ raw_password = REQUOTER(password) if password else password
+ netloc = make_netloc(raw_user, raw_password, host, port)
+ cache["raw_user"] = raw_user
+ cache["raw_password"] = raw_password
+
+ if path:
+ path = PATH_REQUOTER(path)
+ if netloc and "." in path:
+ path = normalize_path(path)
+ if query:
+ query = QUERY_REQUOTER(query)
+ if fragment:
+ fragment = FRAGMENT_REQUOTER(fragment)
+
+ cache["scheme"] = scheme
+ cache["raw_path"] = "/" if not path and netloc else path
+ cache["raw_query_string"] = query
+ cache["raw_fragment"] = fragment
+
+ self = object.__new__(URL)
+ self._scheme = scheme
+ self._netloc = netloc
+ self._path = path
+ self._query = query
+ self._fragment = fragment
+ self._cache = cache
+ return self
+
+
+@lru_cache
+def pre_encoded_url(url_str: str) -> "URL":
+ """Parse pre-encoded URL."""
+ self = object.__new__(URL)
+ val = split_url(url_str)
+ self._scheme, self._netloc, self._path, self._query, self._fragment = val
+ self._cache = {}
+ return self
+
+
+@lru_cache
+def build_pre_encoded_url(
+ scheme: str,
+ authority: str,
+ user: Union[str, None],
+ password: Union[str, None],
+ host: str,
+ port: Union[int, None],
+ path: str,
+ query_string: str,
+ fragment: str,
+) -> "URL":
+ """Build a pre-encoded URL from parts."""
+ self = object.__new__(URL)
+ self._scheme = scheme
+ if authority:
+ self._netloc = authority
+ elif host:
+ if port is not None:
+ port = None if port == DEFAULT_PORTS.get(scheme) else port
+ if user is None and password is None:
+ self._netloc = host if port is None else f"{host}:{port}"
+ else:
+ self._netloc = make_netloc(user, password, host, port)
+ else:
+ self._netloc = ""
+ self._path = path
+ self._query = query_string
+ self._fragment = fragment
+ self._cache = {}
+ return self
+
+
+def from_parts_uncached(
+ scheme: str, netloc: str, path: str, query: str, fragment: str
+) -> "URL":
+ """Create a new URL from parts."""
+ self = object.__new__(URL)
+ self._scheme = scheme
+ self._netloc = netloc
+ self._path = path
+ self._query = query
+ self._fragment = fragment
+ self._cache = {}
+ return self
+
+
+from_parts = lru_cache(from_parts_uncached)
+
+
+@rewrite_module
+class URL:
+ # Don't derive from str
+ # follow pathlib.Path design
+ # probably URL will not suffer from pathlib problems:
+ # it's intended for libraries like aiohttp,
+ # not to be passed into standard library functions like os.open etc.
+
+ # URL grammar (RFC 3986)
+ # pct-encoded = "%" HEXDIG HEXDIG
+ # reserved = gen-delims / sub-delims
+ # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ # / "*" / "+" / "," / ";" / "="
+ # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ # hier-part = "//" authority path-abempty
+ # / path-absolute
+ # / path-rootless
+ # / path-empty
+ # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ # authority = [ userinfo "@" ] host [ ":" port ]
+ # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ # host = IP-literal / IPv4address / reg-name
+ # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+ # IPv6address = 6( h16 ":" ) ls32
+ # / "::" 5( h16 ":" ) ls32
+ # / [ h16 ] "::" 4( h16 ":" ) ls32
+ # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ # / [ *4( h16 ":" ) h16 ] "::" ls32
+ # / [ *5( h16 ":" ) h16 ] "::" h16
+ # / [ *6( h16 ":" ) h16 ] "::"
+ # ls32 = ( h16 ":" h16 ) / IPv4address
+ # ; least-significant 32 bits of address
+ # h16 = 1*4HEXDIG
+ # ; 16 bits of address represented in hexadecimal
+ # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ # dec-octet = DIGIT ; 0-9
+ # / %x31-39 DIGIT ; 10-99
+ # / "1" 2DIGIT ; 100-199
+ # / "2" %x30-34 DIGIT ; 200-249
+ # / "25" %x30-35 ; 250-255
+ # reg-name = *( unreserved / pct-encoded / sub-delims )
+ # port = *DIGIT
+ # path = path-abempty ; begins with "/" or is empty
+ # / path-absolute ; begins with "/" but not "//"
+ # / path-noscheme ; begins with a non-colon segment
+ # / path-rootless ; begins with a segment
+ # / path-empty ; zero characters
+ # path-abempty = *( "/" segment )
+ # path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ # path-noscheme = segment-nz-nc *( "/" segment )
+ # path-rootless = segment-nz *( "/" segment )
+ # path-empty = 0<pchar>
+ # segment = *pchar
+ # segment-nz = 1*pchar
+ # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ # ; non-zero-length segment without any colon ":"
+ # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ # query = *( pchar / "/" / "?" )
+ # fragment = *( pchar / "/" / "?" )
+ # URI-reference = URI / relative-ref
+ # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+ # relative-part = "//" authority path-abempty
+ # / path-absolute
+ # / path-noscheme
+ # / path-empty
+ # absolute-URI = scheme ":" hier-part [ "?" query ]
+ __slots__ = ("_cache", "_scheme", "_netloc", "_path", "_query", "_fragment")
+
+ _scheme: str
+ _netloc: str
+ _path: str
+ _query: str
+ _fragment: str
+
+ def __new__(
+ cls,
+ val: Union[str, SplitResult, "URL", UndefinedType] = UNDEFINED,
+ *,
+ encoded: bool = False,
+ strict: Union[bool, None] = None,
+ ) -> "URL":
+ if strict is not None: # pragma: no cover
+ warnings.warn("strict parameter is ignored")
+ if type(val) is str:
+ return pre_encoded_url(val) if encoded else encode_url(val)
+ if type(val) is cls:
+ return val
+ if type(val) is SplitResult:
+ if not encoded:
+ raise ValueError("Cannot apply decoding to SplitResult")
+ return from_parts(*val)
+ if isinstance(val, str):
+ return pre_encoded_url(str(val)) if encoded else encode_url(str(val))
+ if val is UNDEFINED:
+ # Special case for UNDEFINED since it might be unpickling and we do
+ # not want to cache as the `__set_state__` call would mutate the URL
+ # object in the `pre_encoded_url` or `encoded_url` caches.
+ self = object.__new__(URL)
+ self._scheme = self._netloc = self._path = self._query = self._fragment = ""
+ self._cache = {}
+ return self
+ raise TypeError("Constructor parameter should be str")
+
+ @classmethod
+ def build(
+ cls,
+ *,
+ scheme: str = "",
+ authority: str = "",
+ user: Union[str, None] = None,
+ password: Union[str, None] = None,
+ host: str = "",
+ port: Union[int, None] = None,
+ path: str = "",
+ query: Union[Query, None] = None,
+ query_string: str = "",
+ fragment: str = "",
+ encoded: bool = False,
+ ) -> "URL":
+ """Creates and returns a new URL"""
+
+ if authority and (user or password or host or port):
+ raise ValueError(
+ 'Can\'t mix "authority" with "user", "password", "host" or "port".'
+ )
+ if port is not None and not isinstance(port, int):
+ raise TypeError(f"The port is required to be int, got {type(port)!r}.")
+ if port and not host:
+ raise ValueError('Can\'t build URL with "port" but without "host".')
+ if query and query_string:
+ raise ValueError('Only one of "query" or "query_string" should be passed')
+ if (
+ scheme is None
+ or authority is None
+ or host is None
+ or path is None
+ or query_string is None
+ or fragment is None
+ ):
+ raise TypeError(
+ 'NoneType is illegal for "scheme", "authority", "host", "path", '
+ '"query_string", and "fragment" args, use empty string instead.'
+ )
+
+ if query:
+ query_string = get_str_query(query) or ""
+
+ if encoded:
+ return build_pre_encoded_url(
+ scheme,
+ authority,
+ user,
+ password,
+ host,
+ port,
+ path,
+ query_string,
+ fragment,
+ )
+
+ self = object.__new__(URL)
+ self._scheme = scheme
+ _host: Union[str, None] = None
+ if authority:
+ user, password, _host, port = split_netloc(authority)
+ _host = _encode_host(_host, validate_host=False) if _host else ""
+ elif host:
+ _host = _encode_host(host, validate_host=True)
+ else:
+ self._netloc = ""
+
+ if _host is not None:
+ if port is not None:
+ port = None if port == DEFAULT_PORTS.get(scheme) else port
+ if user is None and password is None:
+ self._netloc = _host if port is None else f"{_host}:{port}"
+ else:
+ self._netloc = make_netloc(user, password, _host, port, True)
+
+ path = PATH_QUOTER(path) if path else path
+ if path and self._netloc:
+ if "." in path:
+ path = normalize_path(path)
+ if path[0] != "/":
+ msg = (
+ "Path in a URL with authority should "
+ "start with a slash ('/') if set"
+ )
+ raise ValueError(msg)
+
+ self._path = path
+ if not query and query_string:
+ query_string = QUERY_QUOTER(query_string)
+ self._query = query_string
+ self._fragment = FRAGMENT_QUOTER(fragment) if fragment else fragment
+ self._cache = {}
+ return self
+
+ def __init_subclass__(cls):
+ raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden")
+
+ def __str__(self) -> str:
+ if not self._path and self._netloc and (self._query or self._fragment):
+ path = "/"
+ else:
+ path = self._path
+ if (port := self.explicit_port) is not None and port == DEFAULT_PORTS.get(
+ self._scheme
+ ):
+ # port normalization - using None for default ports to remove from rendering
+ # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3
+ host = self.host_subcomponent
+ netloc = make_netloc(self.raw_user, self.raw_password, host, None)
+ else:
+ netloc = self._netloc
+ return unsplit_result(self._scheme, netloc, path, self._query, self._fragment)
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}('{str(self)}')"
+
+ def __bytes__(self) -> bytes:
+ return str(self).encode("ascii")
+
+ def __eq__(self, other: object) -> bool:
+ if type(other) is not URL:
+ return NotImplemented
+
+ path1 = "/" if not self._path and self._netloc else self._path
+ path2 = "/" if not other._path and other._netloc else other._path
+ return (
+ self._scheme == other._scheme
+ and self._netloc == other._netloc
+ and path1 == path2
+ and self._query == other._query
+ and self._fragment == other._fragment
+ )
+
+ def __hash__(self) -> int:
+ if (ret := self._cache.get("hash")) is None:
+ path = "/" if not self._path and self._netloc else self._path
+ ret = self._cache["hash"] = hash(
+ (self._scheme, self._netloc, path, self._query, self._fragment)
+ )
+ return ret
+
+ def __le__(self, other: object) -> bool:
+ if type(other) is not URL:
+ return NotImplemented
+ return self._val <= other._val
+
+ def __lt__(self, other: object) -> bool:
+ if type(other) is not URL:
+ return NotImplemented
+ return self._val < other._val
+
+ def __ge__(self, other: object) -> bool:
+ if type(other) is not URL:
+ return NotImplemented
+ return self._val >= other._val
+
+ def __gt__(self, other: object) -> bool:
+ if type(other) is not URL:
+ return NotImplemented
+ return self._val > other._val
+
+ def __truediv__(self, name: str) -> "URL":
+ if not isinstance(name, str):
+ return NotImplemented
+ return self._make_child((str(name),))
+
+ def __mod__(self, query: Query) -> "URL":
+ return self.update_query(query)
+
+ def __bool__(self) -> bool:
+ return bool(self._netloc or self._path or self._query or self._fragment)
+
+ def __getstate__(self) -> tuple[SplitResult]:
+ return (tuple.__new__(SplitResult, self._val),)
+
+ def __setstate__(self, state):
+ if state[0] is None and isinstance(state[1], dict):
+ # default style pickle
+ val = state[1]["_val"]
+ else:
+ val, *unused = state
+ self._scheme, self._netloc, self._path, self._query, self._fragment = val
+ self._cache = {}
+
+ def _cache_netloc(self) -> None:
+ """Cache the netloc parts of the URL."""
+ c = self._cache
+ split_loc = split_netloc(self._netloc)
+ c["raw_user"], c["raw_password"], c["raw_host"], c["explicit_port"] = split_loc
+
+ def is_absolute(self) -> bool:
+ """A check for absolute URLs.
+
+ Return True for absolute ones (having scheme or starting
+ with //), False otherwise.
+
+ Is is preferred to call the .absolute property instead
+ as it is cached.
+ """
+ return self.absolute
+
+ def is_default_port(self) -> bool:
+ """A check for default port.
+
+ Return True if port is default for specified scheme,
+ e.g. 'http://python.org' or 'http://python.org:80', False
+ otherwise.
+
+ Return False for relative URLs.
+
+ """
+ if (explicit := self.explicit_port) is None:
+ # If the explicit port is None, then the URL must be
+ # using the default port unless its a relative URL
+ # which does not have an implicit port / default port
+ return self._netloc != ""
+ return explicit == DEFAULT_PORTS.get(self._scheme)
+
+ def origin(self) -> "URL":
+ """Return an URL with scheme, host and port parts only.
+
+ user, password, path, query and fragment are removed.
+
+ """
+ # TODO: add a keyword-only option for keeping user/pass maybe?
+ return self._origin
+
+ @cached_property
+ def _val(self) -> SplitURLType:
+ return (self._scheme, self._netloc, self._path, self._query, self._fragment)
+
+ @cached_property
+ def _origin(self) -> "URL":
+ """Return an URL with scheme, host and port parts only.
+
+ user, password, path, query and fragment are removed.
+ """
+ if not (netloc := self._netloc):
+ raise ValueError("URL should be absolute")
+ if not (scheme := self._scheme):
+ raise ValueError("URL should have scheme")
+ if "@" in netloc:
+ encoded_host = self.host_subcomponent
+ netloc = make_netloc(None, None, encoded_host, self.explicit_port)
+ elif not self._path and not self._query and not self._fragment:
+ return self
+ return from_parts(scheme, netloc, "", "", "")
+
+ def relative(self) -> "URL":
+ """Return a relative part of the URL.
+
+ scheme, user, password, host and port are removed.
+
+ """
+ if not self._netloc:
+ raise ValueError("URL should be absolute")
+ return from_parts("", "", self._path, self._query, self._fragment)
+
+ @cached_property
+ def absolute(self) -> bool:
+ """A check for absolute URLs.
+
+ Return True for absolute ones (having scheme or starting
+ with //), False otherwise.
+
+ """
+ # `netloc`` is an empty string for relative URLs
+ # Checking `netloc` is faster than checking `hostname`
+ # because `hostname` is a property that does some extra work
+ # to parse the host from the `netloc`
+ return self._netloc != ""
+
+ @cached_property
+ def scheme(self) -> str:
+ """Scheme for absolute URLs.
+
+ Empty string for relative URLs or URLs starting with //
+
+ """
+ return self._scheme
+
+ @cached_property
+ def raw_authority(self) -> str:
+ """Encoded authority part of URL.
+
+ Empty string for relative URLs.
+
+ """
+ return self._netloc
+
+ @cached_property
+ def authority(self) -> str:
+ """Decoded authority part of URL.
+
+ Empty string for relative URLs.
+
+ """
+ return make_netloc(self.user, self.password, self.host, self.port)
+
+ @cached_property
+ def raw_user(self) -> Union[str, None]:
+ """Encoded user part of URL.
+
+ None if user is missing.
+
+ """
+ # not .username
+ self._cache_netloc()
+ return self._cache["raw_user"]
+
+ @cached_property
+ def user(self) -> Union[str, None]:
+ """Decoded user part of URL.
+
+ None if user is missing.
+
+ """
+ if (raw_user := self.raw_user) is None:
+ return None
+ return UNQUOTER(raw_user)
+
+ @cached_property
+ def raw_password(self) -> Union[str, None]:
+ """Encoded password part of URL.
+
+ None if password is missing.
+
+ """
+ self._cache_netloc()
+ return self._cache["raw_password"]
+
+ @cached_property
+ def password(self) -> Union[str, None]:
+ """Decoded password part of URL.
+
+ None if password is missing.
+
+ """
+ if (raw_password := self.raw_password) is None:
+ return None
+ return UNQUOTER(raw_password)
+
+ @cached_property
+ def raw_host(self) -> Union[str, None]:
+ """Encoded host part of URL.
+
+ None for relative URLs.
+
+ When working with IPv6 addresses, use the `host_subcomponent` property instead
+ as it will return the host subcomponent with brackets.
+ """
+ # Use host instead of hostname for sake of shortness
+ # May add .hostname prop later
+ self._cache_netloc()
+ return self._cache["raw_host"]
+
+ @cached_property
+ def host(self) -> Union[str, None]:
+ """Decoded host part of URL.
+
+ None for relative URLs.
+
+ """
+ if (raw := self.raw_host) is None:
+ return None
+ if raw and raw[-1].isdigit() or ":" in raw:
+ # IP addresses are never IDNA encoded
+ return raw
+ return _idna_decode(raw)
+
+ @cached_property
+ def host_subcomponent(self) -> Union[str, None]:
+ """Return the host subcomponent part of URL.
+
+ None for relative URLs.
+
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
+
+ `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
+
+ Examples:
+ - `http://example.com:8080` -> `example.com`
+ - `http://example.com:80` -> `example.com`
+ - `https://127.0.0.1:8443` -> `127.0.0.1`
+ - `https://[::1]:8443` -> `[::1]`
+ - `http://[::1]` -> `[::1]`
+
+ """
+ if (raw := self.raw_host) is None:
+ return None
+ return f"[{raw}]" if ":" in raw else raw
+
+ @cached_property
+ def host_port_subcomponent(self) -> Union[str, None]:
+ """Return the host and port subcomponent part of URL.
+
+ Trailing dots are removed from the host part.
+
+ This value is suitable for use in the Host header of an HTTP request.
+
+ None for relative URLs.
+
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
+ `IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
+ https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
+ port = *DIGIT
+
+ Examples:
+ - `http://example.com:8080` -> `example.com:8080`
+ - `http://example.com:80` -> `example.com`
+ - `http://example.com.:80` -> `example.com`
+ - `https://127.0.0.1:8443` -> `127.0.0.1:8443`
+ - `https://[::1]:8443` -> `[::1]:8443`
+ - `http://[::1]` -> `[::1]`
+
+ """
+ if (raw := self.raw_host) is None:
+ return None
+ if raw[-1] == ".":
+ # Remove all trailing dots from the netloc as while
+ # they are valid FQDNs in DNS, TLS validation fails.
+ # See https://github.com/aio-libs/aiohttp/issues/3636.
+ # To avoid string manipulation we only call rstrip if
+ # the last character is a dot.
+ raw = raw.rstrip(".")
+ port = self.explicit_port
+ if port is None or port == DEFAULT_PORTS.get(self._scheme):
+ return f"[{raw}]" if ":" in raw else raw
+ return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}"
+
+ @cached_property
+ def port(self) -> Union[int, None]:
+ """Port part of URL, with scheme-based fallback.
+
+ None for relative URLs or URLs without explicit port and
+ scheme without default port substitution.
+
+ """
+ if (explicit_port := self.explicit_port) is not None:
+ return explicit_port
+ return DEFAULT_PORTS.get(self._scheme)
+
+ @cached_property
+ def explicit_port(self) -> Union[int, None]:
+ """Port part of URL, without scheme-based fallback.
+
+ None for relative URLs or URLs without explicit port.
+
+ """
+ self._cache_netloc()
+ return self._cache["explicit_port"]
+
+ @cached_property
+ def raw_path(self) -> str:
+ """Encoded path of URL.
+
+ / for absolute URLs without path part.
+
+ """
+ return self._path if self._path or not self._netloc else "/"
+
+ @cached_property
+ def path(self) -> str:
+ """Decoded path of URL.
+
+ / for absolute URLs without path part.
+
+ """
+ return PATH_UNQUOTER(self._path) if self._path else "/" if self._netloc else ""
+
+ @cached_property
+ def path_safe(self) -> str:
+ """Decoded path of URL.
+
+ / for absolute URLs without path part.
+
+ / (%2F) and % (%25) are not decoded
+
+ """
+ if self._path:
+ return PATH_SAFE_UNQUOTER(self._path)
+ return "/" if self._netloc else ""
+
+ @cached_property
+ def _parsed_query(self) -> list[tuple[str, str]]:
+ """Parse query part of URL."""
+ return parse_qsl(self._query, keep_blank_values=True)
+
+ @cached_property
+ def query(self) -> "MultiDictProxy[str]":
+ """A MultiDictProxy representing parsed query parameters in decoded
+ representation.
+
+ Empty value if URL has no query part.
+
+ """
+ return MultiDictProxy(MultiDict(self._parsed_query))
+
+ @cached_property
+ def raw_query_string(self) -> str:
+ """Encoded query part of URL.
+
+ Empty string if query is missing.
+
+ """
+ return self._query
+
+ @cached_property
+ def query_string(self) -> str:
+ """Decoded query part of URL.
+
+ Empty string if query is missing.
+
+ """
+ return QS_UNQUOTER(self._query) if self._query else ""
+
+ @cached_property
+ def path_qs(self) -> str:
+ """Decoded path of URL with query."""
+ return self.path if not (q := self.query_string) else f"{self.path}?{q}"
+
+ @cached_property
+ def raw_path_qs(self) -> str:
+ """Encoded path of URL with query."""
+ if q := self._query:
+ return f"{self._path}?{q}" if self._path or not self._netloc else f"/?{q}"
+ return self._path if self._path or not self._netloc else "/"
+
+ @cached_property
+ def raw_fragment(self) -> str:
+ """Encoded fragment part of URL.
+
+ Empty string if fragment is missing.
+
+ """
+ return self._fragment
+
+ @cached_property
+ def fragment(self) -> str:
+ """Decoded fragment part of URL.
+
+ Empty string if fragment is missing.
+
+ """
+ return UNQUOTER(self._fragment) if self._fragment else ""
+
+ @cached_property
+ def raw_parts(self) -> tuple[str, ...]:
+ """A tuple containing encoded *path* parts.
+
+ ('/',) for absolute URLs if *path* is missing.
+
+ """
+ path = self._path
+ if self._netloc:
+ return ("/", *path[1:].split("/")) if path else ("/",)
+ if path and path[0] == "/":
+ return ("/", *path[1:].split("/"))
+ return tuple(path.split("/"))
+
+ @cached_property
+ def parts(self) -> tuple[str, ...]:
+ """A tuple containing decoded *path* parts.
+
+ ('/',) for absolute URLs if *path* is missing.
+
+ """
+ return tuple(UNQUOTER(part) for part in self.raw_parts)
+
+ @cached_property
+ def parent(self) -> "URL":
+ """A new URL with last part of path removed and cleaned up query and
+ fragment.
+
+ """
+ path = self._path
+ if not path or path == "/":
+ if self._fragment or self._query:
+ return from_parts(self._scheme, self._netloc, path, "", "")
+ return self
+ parts = path.split("/")
+ return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "")
+
+ @cached_property
+ def raw_name(self) -> str:
+ """The last part of raw_parts."""
+ parts = self.raw_parts
+ if not self._netloc:
+ return parts[-1]
+ parts = parts[1:]
+ return parts[-1] if parts else ""
+
+ @cached_property
+ def name(self) -> str:
+ """The last part of parts."""
+ return UNQUOTER(self.raw_name)
+
+ @cached_property
+ def raw_suffix(self) -> str:
+ name = self.raw_name
+ i = name.rfind(".")
+ return name[i:] if 0 < i < len(name) - 1 else ""
+
+ @cached_property
+ def suffix(self) -> str:
+ return UNQUOTER(self.raw_suffix)
+
+ @cached_property
+ def raw_suffixes(self) -> tuple[str, ...]:
+ name = self.raw_name
+ if name.endswith("."):
+ return ()
+ name = name.lstrip(".")
+ return tuple("." + suffix for suffix in name.split(".")[1:])
+
+ @cached_property
+ def suffixes(self) -> tuple[str, ...]:
+ return tuple(UNQUOTER(suffix) for suffix in self.raw_suffixes)
+
+ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL":
+ """
+ add paths to self._path, accounting for absolute vs relative paths,
+ keep existing, but do not create new, empty segments
+ """
+ parsed: list[str] = []
+ needs_normalize: bool = False
+ for idx, path in enumerate(reversed(paths)):
+ # empty segment of last is not removed
+ last = idx == 0
+ if path and path[0] == "/":
+ raise ValueError(
+ f"Appending path {path!r} starting from slash is forbidden"
+ )
+ # We need to quote the path if it is not already encoded
+ # This cannot be done at the end because the existing
+ # path is already quoted and we do not want to double quote
+ # the existing path.
+ path = path if encoded else PATH_QUOTER(path)
+ needs_normalize |= "." in path
+ segments = path.split("/")
+ segments.reverse()
+ # remove trailing empty segment for all but the last path
+ parsed += segments[1:] if not last and segments[0] == "" else segments
+
+ if (path := self._path) and (old_segments := path.split("/")):
+ # If the old path ends with a slash, the last segment is an empty string
+ # and should be removed before adding the new path segments.
+ old = old_segments[:-1] if old_segments[-1] == "" else old_segments
+ old.reverse()
+ parsed += old
+
+ # If the netloc is present, inject a leading slash when adding a
+ # path to an absolute URL where there was none before.
+ if (netloc := self._netloc) and parsed and parsed[-1] != "":
+ parsed.append("")
+
+ parsed.reverse()
+ if not netloc or not needs_normalize:
+ return from_parts(self._scheme, netloc, "/".join(parsed), "", "")
+
+ path = "/".join(normalize_path_segments(parsed))
+ # If normalizing the path segments removed the leading slash, add it back.
+ if path and path[0] != "/":
+ path = f"/{path}"
+ return from_parts(self._scheme, netloc, path, "", "")
+
+ def with_scheme(self, scheme: str) -> "URL":
+ """Return a new URL with scheme replaced."""
+ # N.B. doesn't cleanup query/fragment
+ if not isinstance(scheme, str):
+ raise TypeError("Invalid scheme type")
+ lower_scheme = scheme.lower()
+ netloc = self._netloc
+ if not netloc and lower_scheme in SCHEME_REQUIRES_HOST:
+ msg = (
+ "scheme replacement is not allowed for "
+ f"relative URLs for the {lower_scheme} scheme"
+ )
+ raise ValueError(msg)
+ return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment)
+
+ def with_user(self, user: Union[str, None]) -> "URL":
+ """Return a new URL with user replaced.
+
+ Autoencode user if needed.
+
+ Clear user/password if user is None.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ if user is None:
+ password = None
+ elif isinstance(user, str):
+ user = QUOTER(user)
+ password = self.raw_password
+ else:
+ raise TypeError("Invalid user type")
+ if not (netloc := self._netloc):
+ raise ValueError("user replacement is not allowed for relative URLs")
+ encoded_host = self.host_subcomponent or ""
+ netloc = make_netloc(user, password, encoded_host, self.explicit_port)
+ return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
+
+ def with_password(self, password: Union[str, None]) -> "URL":
+ """Return a new URL with password replaced.
+
+ Autoencode password if needed.
+
+ Clear password if argument is None.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ if password is None:
+ pass
+ elif isinstance(password, str):
+ password = QUOTER(password)
+ else:
+ raise TypeError("Invalid password type")
+ if not (netloc := self._netloc):
+ raise ValueError("password replacement is not allowed for relative URLs")
+ encoded_host = self.host_subcomponent or ""
+ port = self.explicit_port
+ netloc = make_netloc(self.raw_user, password, encoded_host, port)
+ return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
+
+ def with_host(self, host: str) -> "URL":
+ """Return a new URL with host replaced.
+
+ Autoencode host if needed.
+
+ Changing host for relative URLs is not allowed, use .join()
+ instead.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ if not isinstance(host, str):
+ raise TypeError("Invalid host type")
+ if not (netloc := self._netloc):
+ raise ValueError("host replacement is not allowed for relative URLs")
+ if not host:
+ raise ValueError("host removing is not allowed")
+ encoded_host = _encode_host(host, validate_host=True) if host else ""
+ port = self.explicit_port
+ netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
+ return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
+
+ def with_port(self, port: Union[int, None]) -> "URL":
+ """Return a new URL with port replaced.
+
+ Clear port to default if None is passed.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ if port is not None:
+ if isinstance(port, bool) or not isinstance(port, int):
+ raise TypeError(f"port should be int or None, got {type(port)}")
+ if not (0 <= port <= 65535):
+ raise ValueError(f"port must be between 0 and 65535, got {port}")
+ if not (netloc := self._netloc):
+ raise ValueError("port replacement is not allowed for relative URLs")
+ encoded_host = self.host_subcomponent or ""
+ netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
+ return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)
+
+ def with_path(
+ self,
+ path: str,
+ *,
+ encoded: bool = False,
+ keep_query: bool = False,
+ keep_fragment: bool = False,
+ ) -> "URL":
+ """Return a new URL with path replaced."""
+ netloc = self._netloc
+ if not encoded:
+ path = PATH_QUOTER(path)
+ if netloc:
+ path = normalize_path(path) if "." in path else path
+ if path and path[0] != "/":
+ path = f"/{path}"
+ query = self._query if keep_query else ""
+ fragment = self._fragment if keep_fragment else ""
+ return from_parts(self._scheme, netloc, path, query, fragment)
+
+ @overload
+ def with_query(self, query: Query) -> "URL": ...
+
+ @overload
+ def with_query(self, **kwargs: QueryVariable) -> "URL": ...
+
+ def with_query(self, *args: Any, **kwargs: Any) -> "URL":
+ """Return a new URL with query part replaced.
+
+ Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
+ or str, autoencode the argument if needed.
+
+ A sequence of (key, value) pairs is supported as well.
+
+ It also can take an arbitrary number of keyword arguments.
+
+ Clear query if None is passed.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ query = get_str_query(*args, **kwargs) or ""
+ return from_parts_uncached(
+ self._scheme, self._netloc, self._path, query, self._fragment
+ )
+
+ @overload
+ def extend_query(self, query: Query) -> "URL": ...
+
+ @overload
+ def extend_query(self, **kwargs: QueryVariable) -> "URL": ...
+
+ def extend_query(self, *args: Any, **kwargs: Any) -> "URL":
+ """Return a new URL with query part combined with the existing.
+
+ This method will not remove existing query parameters.
+
+ Example:
+ >>> url = URL('http://example.com/?a=1&b=2')
+ >>> url.extend_query(a=3, c=4)
+ URL('http://example.com/?a=1&b=2&a=3&c=4')
+ """
+ if not (new_query := get_str_query(*args, **kwargs)):
+ return self
+ if query := self._query:
+ # both strings are already encoded so we can use a simple
+ # string join
+ query += new_query if query[-1] == "&" else f"&{new_query}"
+ else:
+ query = new_query
+ return from_parts_uncached(
+ self._scheme, self._netloc, self._path, query, self._fragment
+ )
+
+ @overload
+ def update_query(self, query: Query) -> "URL": ...
+
+ @overload
+ def update_query(self, **kwargs: QueryVariable) -> "URL": ...
+
+ def update_query(self, *args: Any, **kwargs: Any) -> "URL":
+ """Return a new URL with query part updated.
+
+ This method will overwrite existing query parameters.
+
+ Example:
+ >>> url = URL('http://example.com/?a=1&b=2')
+ >>> url.update_query(a=3, c=4)
+ URL('http://example.com/?a=3&b=2&c=4')
+ """
+ in_query: Union[str, Mapping[str, QueryVariable], None]
+ if kwargs:
+ if args:
+ msg = "Either kwargs or single query parameter must be present"
+ raise ValueError(msg)
+ in_query = kwargs
+ elif len(args) == 1:
+ in_query = args[0]
+ else:
+ raise ValueError("Either kwargs or single query parameter must be present")
+
+ if in_query is None:
+ query = ""
+ elif not in_query:
+ query = self._query
+ elif isinstance(in_query, Mapping):
+ qm: MultiDict[QueryVariable] = MultiDict(self._parsed_query)
+ qm.update(in_query)
+ query = get_str_query_from_sequence_iterable(qm.items())
+ elif isinstance(in_query, str):
+ qstr: MultiDict[str] = MultiDict(self._parsed_query)
+ qstr.update(parse_qsl(in_query, keep_blank_values=True))
+ query = get_str_query_from_iterable(qstr.items())
+ elif isinstance(in_query, (bytes, bytearray, memoryview)):
+ msg = "Invalid query type: bytes, bytearray and memoryview are forbidden"
+ raise TypeError(msg)
+ elif isinstance(in_query, Sequence):
+ # We don't expect sequence values if we're given a list of pairs
+ # already; only mappings like builtin `dict` which can't have the
+ # same key pointing to multiple values are allowed to use
+ # `_query_seq_pairs`.
+ qs: MultiDict[SimpleQuery] = MultiDict(self._parsed_query)
+ qs.update(in_query)
+ query = get_str_query_from_iterable(qs.items())
+ else:
+ raise TypeError(
+ "Invalid query type: only str, mapping or "
+ "sequence of (key, value) pairs is allowed"
+ )
+ return from_parts_uncached(
+ self._scheme, self._netloc, self._path, query, self._fragment
+ )
+
+ def without_query_params(self, *query_params: str) -> "URL":
+ """Remove some keys from query part and return new URL."""
+ params_to_remove = set(query_params) & self.query.keys()
+ if not params_to_remove:
+ return self
+ return self.with_query(
+ tuple(
+ (name, value)
+ for name, value in self.query.items()
+ if name not in params_to_remove
+ )
+ )
+
+ def with_fragment(self, fragment: Union[str, None]) -> "URL":
+ """Return a new URL with fragment replaced.
+
+ Autoencode fragment if needed.
+
+ Clear fragment to default if None is passed.
+
+ """
+ # N.B. doesn't cleanup query/fragment
+ if fragment is None:
+ raw_fragment = ""
+ elif not isinstance(fragment, str):
+ raise TypeError("Invalid fragment type")
+ else:
+ raw_fragment = FRAGMENT_QUOTER(fragment)
+ if self._fragment == raw_fragment:
+ return self
+ return from_parts(
+ self._scheme, self._netloc, self._path, self._query, raw_fragment
+ )
+
+ def with_name(
+ self,
+ name: str,
+ *,
+ keep_query: bool = False,
+ keep_fragment: bool = False,
+ ) -> "URL":
+ """Return a new URL with name (last part of path) replaced.
+
+ Query and fragment parts are cleaned up.
+
+ Name is encoded if needed.
+
+ """
+ # N.B. DOES cleanup query/fragment
+ if not isinstance(name, str):
+ raise TypeError("Invalid name type")
+ if "/" in name:
+ raise ValueError("Slash in name is not allowed")
+ name = PATH_QUOTER(name)
+ if name in (".", ".."):
+ raise ValueError(". and .. values are forbidden")
+ parts = list(self.raw_parts)
+ if netloc := self._netloc:
+ if len(parts) == 1:
+ parts.append(name)
+ else:
+ parts[-1] = name
+ parts[0] = "" # replace leading '/'
+ else:
+ parts[-1] = name
+ if parts[0] == "/":
+ parts[0] = "" # replace leading '/'
+
+ query = self._query if keep_query else ""
+ fragment = self._fragment if keep_fragment else ""
+ return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
+
+ def with_suffix(
+ self,
+ suffix: str,
+ *,
+ keep_query: bool = False,
+ keep_fragment: bool = False,
+ ) -> "URL":
+ """Return a new URL with suffix (file extension of name) replaced.
+
+ Query and fragment parts are cleaned up.
+
+ suffix is encoded if needed.
+ """
+ if not isinstance(suffix, str):
+ raise TypeError("Invalid suffix type")
+ if suffix and not suffix[0] == "." or suffix == ".":
+ raise ValueError(f"Invalid suffix {suffix!r}")
+ name = self.raw_name
+ if not name:
+ raise ValueError(f"{self!r} has an empty name")
+ old_suffix = self.raw_suffix
+ name = name + suffix if not old_suffix else name[: -len(old_suffix)] + suffix
+
+ return self.with_name(name, keep_query=keep_query, keep_fragment=keep_fragment)
+
+ def join(self, url: "URL") -> "URL":
+ """Join URLs
+
+ Construct a full (“absolute”) URL by combining a “base URL”
+ (self) with another URL (url).
+
+ Informally, this uses components of the base URL, in
+ particular the addressing scheme, the network location and
+ (part of) the path, to provide missing components in the
+ relative URL.
+
+ """
+ if type(url) is not URL:
+ raise TypeError("url should be URL")
+
+ scheme = url._scheme or self._scheme
+ if scheme != self._scheme or scheme not in USES_RELATIVE:
+ return url
+
+ # scheme is in uses_authority as uses_authority is a superset of uses_relative
+ if (join_netloc := url._netloc) and scheme in USES_AUTHORITY:
+ return from_parts(scheme, join_netloc, url._path, url._query, url._fragment)
+
+ orig_path = self._path
+ if join_path := url._path:
+ if join_path[0] == "/":
+ path = join_path
+ elif not orig_path:
+ path = f"/{join_path}"
+ elif orig_path[-1] == "/":
+ path = f"{orig_path}{join_path}"
+ else:
+ # …
+ # and relativizing ".."
+ # parts[0] is / for absolute urls,
+ # this join will add a double slash there
+ path = "/".join([*self.parts[:-1], ""]) + join_path
+ # which has to be removed
+ if orig_path[0] == "/":
+ path = path[1:]
+ path = normalize_path(path) if "." in path else path
+ else:
+ path = orig_path
+
+ return from_parts(
+ scheme,
+ self._netloc,
+ path,
+ url._query if join_path or url._query else self._query,
+ url._fragment if join_path or url._fragment else self._fragment,
+ )
+
+ def joinpath(self, *other: str, encoded: bool = False) -> "URL":
+ """Return a new URL with the elements in other appended to the path."""
+ return self._make_child(other, encoded=encoded)
+
+ def human_repr(self) -> str:
+ """Return decoded human readable string for URL representation."""
+ user = human_quote(self.user, "#/:?@[]")
+ password = human_quote(self.password, "#/:?@[]")
+ if (host := self.host) and ":" in host:
+ host = f"[{host}]"
+ path = human_quote(self.path, "#?")
+ if TYPE_CHECKING:
+ assert path is not None
+ query_string = "&".join(
+ "{}={}".format(human_quote(k, "#&+;="), human_quote(v, "#&+;="))
+ for k, v in self.query.items()
+ )
+ fragment = human_quote(self.fragment, "")
+ if TYPE_CHECKING:
+ assert fragment is not None
+ netloc = make_netloc(user, password, host, self.explicit_port)
+ return unsplit_result(self._scheme, netloc, path, query_string, fragment)
+
+
+_DEFAULT_IDNA_SIZE = 256
+_DEFAULT_ENCODE_SIZE = 512
+
+
+@lru_cache(_DEFAULT_IDNA_SIZE)
+def _idna_decode(raw: str) -> str:
+ try:
+ return idna.decode(raw.encode("ascii"))
+ except UnicodeError: # e.g. '::1'
+ return raw.encode("ascii").decode("idna")
+
+
+@lru_cache(_DEFAULT_IDNA_SIZE)
+def _idna_encode(host: str) -> str:
+ try:
+ return idna.encode(host, uts46=True).decode("ascii")
+ except UnicodeError:
+ return host.encode("idna").decode("ascii")
+
+
+@lru_cache(_DEFAULT_ENCODE_SIZE)
+def _encode_host(host: str, validate_host: bool) -> str:
+ """Encode host part of URL."""
+ # If the host ends with a digit or contains a colon, its likely
+ # an IP address.
+ if host and (host[-1].isdigit() or ":" in host):
+ raw_ip, sep, zone = host.partition("%")
+ # If it looks like an IP, we check with _ip_compressed_version
+ # and fall-through if its not an IP address. This is a performance
+ # optimization to avoid parsing IP addresses as much as possible
+ # because it is orders of magnitude slower than almost any other
+ # operation this library does.
+ # Might be an IP address, check it
+ #
+ # IP Addresses can look like:
+ # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
+ # - 127.0.0.1 (last character is a digit)
+ # - 2001:db8::ff00:42:8329 (contains a colon)
+ # - 2001:db8::ff00:42:8329%eth0 (contains a colon)
+ # - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
+ # have been removed before it gets here)
+ # Rare IP Address formats are not supported per:
+ # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
+ #
+ # IP parsing is slow, so its wrapped in an LRU
+ try:
+ ip = ip_address(raw_ip)
+ except ValueError:
+ pass
+ else:
+ # These checks should not happen in the
+ # LRU to keep the cache size small
+ host = ip.compressed
+ if ip.version == 6:
+ return f"[{host}%{zone}]" if sep else f"[{host}]"
+ return f"{host}%{zone}" if sep else host
+
+ # IDNA encoding is slow, skip it for ASCII-only strings
+ if host.isascii():
+ # Check for invalid characters explicitly; _idna_encode() does this
+ # for non-ascii host names.
+ host = host.lower()
+ if validate_host and (invalid := NOT_REG_NAME.search(host)):
+ value, pos, extra = invalid.group(), invalid.start(), ""
+ if value == "@" or (value == ":" and "@" in host[pos:]):
+ # this looks like an authority string
+ extra = (
+ ", if the value includes a username or password, "
+ "use 'authority' instead of 'host'"
+ )
+ raise ValueError(
+ f"Host {host!r} cannot contain {value!r} (at position {pos}){extra}"
+ ) from None
+ return host
+
+ return _idna_encode(host)
+
+
+@rewrite_module
+def cache_clear() -> None:
+ """Clear all LRU caches."""
+ _idna_encode.cache_clear()
+ _idna_decode.cache_clear()
+ _encode_host.cache_clear()
+
+
+@rewrite_module
+def cache_info() -> CacheInfo:
+ """Report cache statistics."""
+ return {
+ "idna_encode": _idna_encode.cache_info(),
+ "idna_decode": _idna_decode.cache_info(),
+ "ip_address": _encode_host.cache_info(),
+ "host_validate": _encode_host.cache_info(),
+ "encode_host": _encode_host.cache_info(),
+ }
+
+
+@rewrite_module
+def cache_configure(
+ *,
+ idna_encode_size: Union[int, None] = _DEFAULT_IDNA_SIZE,
+ idna_decode_size: Union[int, None] = _DEFAULT_IDNA_SIZE,
+ ip_address_size: Union[int, None, UndefinedType] = UNDEFINED,
+ host_validate_size: Union[int, None, UndefinedType] = UNDEFINED,
+ encode_host_size: Union[int, None, UndefinedType] = UNDEFINED,
+) -> None:
+ """Configure LRU cache sizes."""
+ global _idna_decode, _idna_encode, _encode_host
+ # ip_address_size, host_validate_size are no longer
+ # used, but are kept for backwards compatibility.
+ if ip_address_size is not UNDEFINED or host_validate_size is not UNDEFINED:
+ warnings.warn(
+ "cache_configure() no longer accepts the "
+ "ip_address_size or host_validate_size arguments, "
+ "they are used to set the encode_host_size instead "
+ "and will be removed in the future",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
+ if encode_host_size is not None:
+ for size in (ip_address_size, host_validate_size):
+ if size is None:
+ encode_host_size = None
+ elif encode_host_size is UNDEFINED:
+ if size is not UNDEFINED:
+ encode_host_size = size
+ elif size is not UNDEFINED:
+ if TYPE_CHECKING:
+ assert isinstance(size, int)
+ assert isinstance(encode_host_size, int)
+ encode_host_size = max(size, encode_host_size)
+ if encode_host_size is UNDEFINED:
+ encode_host_size = _DEFAULT_ENCODE_SIZE
+
+ if TYPE_CHECKING:
+ assert not isinstance(encode_host_size, object)
+ _encode_host = lru_cache(encode_host_size)(_encode_host.__wrapped__)
+ _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__)
+ _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
diff --git a/.venv/lib/python3.12/site-packages/yarl/py.typed b/.venv/lib/python3.12/site-packages/yarl/py.typed
new file mode 100644
index 00000000..dcf2c804
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/py.typed
@@ -0,0 +1 @@
+# Placeholder