aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/yarl/_parse.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/yarl/_parse.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/yarl/_parse.py')
-rw-r--r--.venv/lib/python3.12/site-packages/yarl/_parse.py189
1 files changed, 189 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/yarl/_parse.py b/.venv/lib/python3.12/site-packages/yarl/_parse.py
new file mode 100644
index 00000000..cc259ea8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/yarl/_parse.py
@@ -0,0 +1,189 @@
+"""URL parsing utilities."""
+
+import re
+import unicodedata
+from functools import lru_cache
+from typing import Union
+from urllib.parse import scheme_chars, uses_netloc
+
+from ._quoters import QUOTER
+
+# Leading and trailing C0 control and space to be stripped per WHATWG spec.
+# == "".join([chr(i) for i in range(0, 0x20 + 1)])
+WHATWG_C0_CONTROL_OR_SPACE = (
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"
+ "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "
+)
+
+# Unsafe bytes to be removed per WHATWG spec
+UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
+USES_AUTHORITY = frozenset(uses_netloc)
+
+SplitURLType = tuple[str, str, str, str, str]
+
+
+def split_url(url: str) -> SplitURLType:
+ """Split URL into parts."""
+ # Adapted from urllib.parse.urlsplit
+ # Only lstrip url as some applications rely on preserving trailing space.
+ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)
+ url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)
+ for b in UNSAFE_URL_BYTES_TO_REMOVE:
+ if b in url:
+ url = url.replace(b, "")
+
+ scheme = netloc = query = fragment = ""
+ i = url.find(":")
+ if i > 0 and url[0] in scheme_chars:
+ for c in url[1:i]:
+ if c not in scheme_chars:
+ break
+ else:
+ scheme, url = url[:i].lower(), url[i + 1 :]
+ has_hash = "#" in url
+ has_question_mark = "?" in url
+ if url[:2] == "//":
+ delim = len(url) # position of end of domain part of url, default is end
+ if has_hash and has_question_mark:
+ delim_chars = "/?#"
+ elif has_question_mark:
+ delim_chars = "/?"
+ elif has_hash:
+ delim_chars = "/#"
+ else:
+ delim_chars = "/"
+ for c in delim_chars: # look for delimiters; the order is NOT important
+ wdelim = url.find(c, 2) # find first of this delim
+ if wdelim >= 0 and wdelim < delim: # if found
+ delim = wdelim # use earliest delim position
+ netloc = url[2:delim]
+ url = url[delim:]
+ has_left_bracket = "[" in netloc
+ has_right_bracket = "]" in netloc
+ if (has_left_bracket and not has_right_bracket) or (
+ has_right_bracket and not has_left_bracket
+ ):
+ raise ValueError("Invalid IPv6 URL")
+ if has_left_bracket:
+ bracketed_host = netloc.partition("[")[2].partition("]")[0]
+ # Valid bracketed hosts are defined in
+ # https://www.rfc-editor.org/rfc/rfc3986#page-49
+ # https://url.spec.whatwg.org/
+ if bracketed_host[0] == "v":
+ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):
+ raise ValueError("IPvFuture address is invalid")
+ elif ":" not in bracketed_host:
+ raise ValueError("An IPv4 address cannot be in brackets")
+ if has_hash:
+ url, _, fragment = url.partition("#")
+ if has_question_mark:
+ url, _, query = url.partition("?")
+ if netloc and not netloc.isascii():
+ _check_netloc(netloc)
+ return scheme, netloc, url, query, fragment
+
+
+def _check_netloc(netloc: str) -> None:
+ # Adapted from urllib.parse._checknetloc
+ # looking for characters like \u2100 that expand to 'a/c'
+ # IDNA uses NFKC equivalence, so normalize for this check
+
+ # ignore characters already included
+ # but not the surrounding text
+ n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")
+ normalized_netloc = unicodedata.normalize("NFKC", n)
+ if n == normalized_netloc:
+ return
+ # Note that there are no unicode decompositions for the character '@' so
+ # its currently impossible to have test coverage for this branch, however if the
+ # one should be added in the future we want to make sure its still checked.
+ for c in "/?#@:": # pragma: no branch
+ if c in normalized_netloc:
+ raise ValueError(
+ f"netloc '{netloc}' contains invalid "
+ "characters under NFKC normalization"
+ )
+
+
+@lru_cache # match the same size as urlsplit
+def split_netloc(
+ netloc: str,
+) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]:
+ """Split netloc into username, password, host and port."""
+ if "@" not in netloc:
+ username: Union[str, None] = None
+ password: Union[str, None] = None
+ hostinfo = netloc
+ else:
+ userinfo, _, hostinfo = netloc.rpartition("@")
+ username, have_password, password = userinfo.partition(":")
+ if not have_password:
+ password = None
+
+ if "[" in hostinfo:
+ _, _, bracketed = hostinfo.partition("[")
+ hostname, _, port_str = bracketed.partition("]")
+ _, _, port_str = port_str.partition(":")
+ else:
+ hostname, _, port_str = hostinfo.partition(":")
+
+ if not port_str:
+ return username or None, password, hostname or None, None
+
+ try:
+ port = int(port_str)
+ except ValueError:
+ raise ValueError("Invalid URL: port can't be converted to integer")
+ if not (0 <= port <= 65535):
+ raise ValueError("Port out of range 0-65535")
+ return username or None, password, hostname or None, port
+
+
+def unsplit_result(
+ scheme: str, netloc: str, url: str, query: str, fragment: str
+) -> str:
+ """Unsplit a URL without any normalization."""
+ if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":
+ if url and url[:1] != "/":
+ url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"
+ else:
+ url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"
+ elif scheme:
+ url = f"{scheme}:{url}"
+ if query:
+ url = f"{url}?{query}"
+ return f"{url}#{fragment}" if fragment else url
+
+
+@lru_cache # match the same size as urlsplit
+def make_netloc(
+ user: Union[str, None],
+ password: Union[str, None],
+ host: Union[str, None],
+ port: Union[int, None],
+ encode: bool = False,
+) -> str:
+ """Make netloc from parts.
+
+ The user and password are encoded if encode is True.
+
+ The host must already be encoded with _encode_host.
+ """
+ if host is None:
+ return ""
+ ret = host
+ if port is not None:
+ ret = f"{ret}:{port}"
+ if user is None and password is None:
+ return ret
+ if password is not None:
+ if not user:
+ user = ""
+ elif encode:
+ user = QUOTER(user)
+ if encode:
+ password = QUOTER(password)
+ user = f"{user}:{password}"
+ elif user and encode:
+ user = QUOTER(user)
+ return f"{user}@{ret}" if user else ret