diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/future/backports/urllib/parse.py')
| -rw-r--r-- | .venv/lib/python3.12/site-packages/future/backports/urllib/parse.py | 991 | 
1 files changed, 991 insertions, 0 deletions
| diff --git a/.venv/lib/python3.12/site-packages/future/backports/urllib/parse.py b/.venv/lib/python3.12/site-packages/future/backports/urllib/parse.py new file mode 100644 index 00000000..04e52d49 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/future/backports/urllib/parse.py @@ -0,0 +1,991 @@ +""" +Ported using Python-Future from the Python 3.3 standard library. + +Parse (absolute and relative) URLs. + +urlparse module is based upon the following RFC specifications. + +RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding +and L. Masinter, January 2005. + +RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter +and L.Masinter, December 1999. + +RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. +Berners-Lee, R. Fielding, and L. Masinter, August 1998. + +RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. + +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June +1995. + +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. +McCahill, December 1994 + +RFC 3986 is considered the current standard and any future changes to +urlparse module should conform with it. The urlparse module is +currently not entirely compliant with this RFC due to defacto +scenarios for parsing, and for backward compatibility purposes, some +parsing quirks from older RFCs are retained. The testcases in +test_urlparse.py provides a good indicator of parsing behavior. +""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import bytes, chr, dict, int, range, str +from future.utils import raise_with_traceback + +import re +import sys +import collections + +__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", + "parse_qsl", "quote", "quote_plus", "quote_from_bytes", + "unquote", "unquote_plus", "unquote_to_bytes"] + +# A classification of schemes ('' means apply by default) +uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp', + 'svn', 'svn+ssh'] +uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] +uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', + 'mms', '', 'sftp', 'tel'] + +# These are not actually used anymore, but should stay for backwards +# compatibility. (They are undocumented, but have a public-looking name.) +non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] +uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] +uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] + +# Characters valid in scheme names +scheme_chars = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789' + '+-.') + +# XXX: Consider replacing with functools.lru_cache +MAX_CACHE_SIZE = 20 +_parse_cache = {} + +def clear_cache(): + """Clear the parse cache and the quoters cache.""" + _parse_cache.clear() + _safe_quoters.clear() + + +# Helpers for bytes handling +# For 3.2, we deliberately require applications that +# handle improperly quoted URLs to do their own +# decoding and encoding. If valid use cases are +# presented, we may relax this by using latin-1 +# decoding internally for 3.3 +_implicit_encoding = 'ascii' +_implicit_errors = 'strict' + +def _noop(obj): + return obj + +def _encode_result(obj, encoding=_implicit_encoding, + errors=_implicit_errors): + return obj.encode(encoding, errors) + +def _decode_args(args, encoding=_implicit_encoding, + errors=_implicit_errors): + return tuple(x.decode(encoding, errors) if x else '' for x in args) + +def _coerce_args(*args): + # Invokes decode if necessary to create str args + # and returns the coerced inputs along with + # an appropriate result coercion function + # - noop for str inputs + # - encoding function otherwise + str_input = isinstance(args[0], str) + for arg in args[1:]: + # We special-case the empty string to support the + # "scheme=''" default argument to some functions + if arg and isinstance(arg, str) != str_input: + raise TypeError("Cannot mix str and non-str arguments") + if str_input: + return args + (_noop,) + return _decode_args(args) + (_encode_result,) + +# Result objects are more helpful than simple tuples +class _ResultMixinStr(object): + """Standard approach to encoding parsed results from str to bytes""" + __slots__ = () + + def encode(self, encoding='ascii', errors='strict'): + return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + + +class _ResultMixinBytes(object): + """Standard approach to decoding parsed results from bytes to str""" + __slots__ = () + + def decode(self, encoding='ascii', errors='strict'): + return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + + +class _NetlocResultMixinBase(object): + """Shared methods for the parsed result objects containing a netloc element""" + __slots__ = () + + @property + def username(self): + return self._userinfo[0] + + @property + def password(self): + return self._userinfo[1] + + @property + def hostname(self): + hostname = self._hostinfo[0] + if not hostname: + hostname = None + elif hostname is not None: + hostname = hostname.lower() + return hostname + + @property + def port(self): + port = self._hostinfo[1] + if port is not None: + port = int(port, 10) + # Return None on an illegal port + if not ( 0 <= port <= 65535): + return None + return port + + +class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition('@') + if have_info: + username, have_password, password = userinfo.partition(':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: + hostname, _, port = bracketed.partition(']') + _, have_port, port = port.partition(':') + else: + hostname, have_port, port = hostinfo.partition(':') + if not have_port: + port = None + return hostname, port + + +class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition(b'@') + if have_info: + username, have_password, password = userinfo.partition(b':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: + hostname, _, port = bracketed.partition(b']') + _, have_port, port = port.partition(b':') + else: + hostname, have_port, port = hostinfo.partition(b':') + if not have_port: + port = None + return hostname, port + + +from collections import namedtuple + +_DefragResultBase = namedtuple('DefragResult', 'url fragment') +_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') +_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') + +# For backwards compatibility, alias _NetlocResultMixinStr +# ResultBase is no longer part of the documented API, but it is +# retained since deprecating it isn't worth the hassle +ResultBase = _NetlocResultMixinStr + +# Structured result objects for string data +class DefragResult(_DefragResultBase, _ResultMixinStr): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + '#' + self.fragment + else: + return self.url + +class SplitResult(_SplitResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResult(_ParseResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Structured result objects for bytes data +class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + b'#' + self.fragment + else: + return self.url + +class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Set up the encode/decode result pairs +def _fix_result_transcoding(): + _result_pairs = ( + (DefragResult, DefragResultBytes), + (SplitResult, SplitResultBytes), + (ParseResult, ParseResultBytes), + ) + for _decoded, _encoded in _result_pairs: + _decoded._encoded_counterpart = _encoded + _encoded._decoded_counterpart = _decoded + +_fix_result_transcoding() +del _fix_result_transcoding + +def urlparse(url, scheme='', allow_fragments=True): + """Parse a URL into 6 components: + <scheme>://<netloc>/<path>;<params>?<query>#<fragment> + Return a 6-tuple: (scheme, netloc, path, params, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + splitresult = urlsplit(url, scheme, allow_fragments) + scheme, netloc, url, query, fragment = splitresult + if scheme in uses_params and ';' in url: + url, params = _splitparams(url) + else: + params = '' + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) + +def _splitparams(url): + if '/' in url: + i = url.find(';', url.rfind('/')) + if i < 0: + return url, '' + else: + i = url.find(';') + return url[:i], url[i+1:] + +def _splitnetloc(url, start=0): + delim = len(url) # position of end of domain part of url, default is end + for c in '/?#': # look for delimiters; the order is NOT important + wdelim = url.find(c, start) # find first of this delim + if wdelim >= 0: # if found + delim = min(delim, wdelim) # use earliest delim position + return url[start:delim], url[delim:] # return (domain, rest) + +def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + <scheme>://<netloc>/<path>?<query>#<fragment> + Return a 5-tuple: (scheme, netloc, path, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + allow_fragments = bool(allow_fragments) + key = url, scheme, allow_fragments, type(url), type(scheme) + cached = _parse_cache.get(key, None) + if cached: + return _coerce_result(cached) + if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth + clear_cache() + netloc = query = fragment = '' + i = url.find(':') + if i > 0: + if url[:i] == 'http': # optimize the common case + scheme = url[:i].lower() + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + for c in url[:i]: + if c not in scheme_chars: + break + else: + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + +def urlunparse(components): + """Put a parsed URL back together again. This may result in a + slightly different, but equivalent URL, if the URL that was parsed + originally had redundant delimiters, e.g. a ? with an empty query + (the draft states that these are equivalent).""" + scheme, netloc, url, params, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if params: + url = "%s;%s" % (url, params) + return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) + +def urlunsplit(components): + """Combine the elements of a tuple as returned by urlsplit() into a + complete URL as a string. The data argument can be any five-item iterable. + This may result in a slightly different, but equivalent URL, if the URL that + was parsed originally had unnecessary delimiters (for example, a ? with an + empty query; the RFC states that these are equivalent).""" + scheme, netloc, url, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if url and url[:1] != '/': url = '/' + url + url = '//' + (netloc or '') + url + if scheme: + url = scheme + ':' + url + if query: + url = url + '?' + query + if fragment: + url = url + '#' + fragment + return _coerce_result(url) + +def urljoin(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter.""" + if not base: + return url + if not url: + return base + base, url, _coerce_result = _coerce_args(base, url) + bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ + urlparse(base, '', allow_fragments) + scheme, netloc, path, params, query, fragment = \ + urlparse(url, bscheme, allow_fragments) + if scheme != bscheme or scheme not in uses_relative: + return _coerce_result(url) + if scheme in uses_netloc: + if netloc: + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + netloc = bnetloc + if path[:1] == '/': + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + if not path and not params: + path = bpath + params = bparams + if not query: + query = bquery + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + segments = bpath.split('/')[:-1] + path.split('/') + # XXX The stuff below is bogus in various ways... + if segments[-1] == '.': + segments[-1] = '' + while '.' in segments: + segments.remove('.') + while 1: + i = 1 + n = len(segments) - 1 + while i < n: + if (segments[i] == '..' + and segments[i-1] not in ('', '..')): + del segments[i-1:i+1] + break + i = i+1 + else: + break + if segments == ['', '..']: + segments[-1] = '' + elif len(segments) >= 2 and segments[-1] == '..': + segments[-2:] = [''] + return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), + params, query, fragment))) + +def urldefrag(url): + """Removes any existing fragment from URL. + + Returns a tuple of the defragmented URL and the fragment. If + the URL contained no fragments, the second element is the + empty string. + """ + url, _coerce_result = _coerce_args(url) + if '#' in url: + s, n, p, a, q, frag = urlparse(url) + defrag = urlunparse((s, n, p, a, q, '')) + else: + frag = '' + defrag = url + return _coerce_result(DefragResult(defrag, frag)) + +_hexdig = '0123456789ABCDEFabcdef' +_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) + for a in _hexdig for b in _hexdig) + +def unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return bytes(b'') + if isinstance(string, str): + string = string.encode('utf-8') + ### For Python-Future: + # It is already a byte-string object, but force it to be newbytes here on + # Py2: + string = bytes(string) + ### + bits = string.split(b'%') + if len(bits) == 1: + return string + res = [bits[0]] + append = res.append + for item in bits[1:]: + try: + append(_hextobyte[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return bytes(b'').join(res) + +_asciire = re.compile('([\x00-\x7f]+)') + +def unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + +def parse_qs(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + """ + parsed_result = {} + pairs = parse_qsl(qs, keep_blank_values, strict_parsing, + encoding=encoding, errors=errors) + for name, value in pairs: + if name in parsed_result: + parsed_result[name].append(value) + else: + parsed_result[name] = [value] + return parsed_result + +def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + + Returns a list, as G-d intended. + """ + qs, _coerce_result = _coerce_args(qs) + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = nv[0].replace('+', ' ') + name = unquote(name, encoding=encoding, errors=errors) + name = _coerce_result(name) + value = nv[1].replace('+', ' ') + value = unquote(value, encoding=encoding, errors=errors) + value = _coerce_result(value) + r.append((name, value)) + return r + +def unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return unquote(string, encoding, errors) + +_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + b'abcdefghijklmnopqrstuvwxyz' + b'0123456789' + b'_.-')) +_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) +_safe_quoters = {} + +class Quoter(collections.defaultdict): + """A mapping from bytes (in range(0,256)) to strings. + + String values are percent-encoded byte values, unless the key < 128, and + in the "safe" set (either the specified safe set, or default set). + """ + # Keeps a cache internally, using defaultdict, for efficiency (lookups + # of cached keys don't call Python code at all). + def __init__(self, safe): + """safe: bytes object.""" + self.safe = _ALWAYS_SAFE.union(bytes(safe)) + + def __repr__(self): + # Without this, will just display as a defaultdict + return "<Quoter %r>" % dict(self) + + def __missing__(self, b): + # Handle a cache miss. Store quoted string in cache and return. + res = chr(b) if b in self.safe else '%{0:02X}'.format(b) + self[b] = res + return res + +def quote(string, safe='/', encoding=None, errors=None): + """quote('abc def') -> 'abc%20def' + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + + string and safe may be either str or bytes objects. encoding must + not be specified if string is a str. + + The optional encoding and errors parameters specify how to deal with + non-ASCII characters, as accepted by the str.encode method. + By default, encoding='utf-8' (characters are encoded with UTF-8), and + errors='strict' (unsupported characters raise a UnicodeEncodeError). + """ + if isinstance(string, str): + if not string: + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + string = string.encode(encoding, errors) + else: + if encoding is not None: + raise TypeError("quote() doesn't support 'encoding' for bytes") + if errors is not None: + raise TypeError("quote() doesn't support 'errors' for bytes") + return quote_from_bytes(string, safe) + +def quote_plus(string, safe='', encoding=None, errors=None): + """Like quote(), but also replace ' ' with '+', as required for quoting + HTML form values. Plus signs in the original string are escaped unless + they are included in safe. It also does not have safe default to '/'. + """ + # Check if ' ' in string, where string may either be a str or bytes. If + # there are no spaces, the regular quote will produce the right answer. + if ((isinstance(string, str) and ' ' not in string) or + (isinstance(string, bytes) and b' ' not in string)): + return quote(string, safe, encoding, errors) + if isinstance(safe, str): + space = str(' ') + else: + space = bytes(b' ') + string = quote(string, safe + space, encoding, errors) + return string.replace(' ', '+') + +def quote_from_bytes(bs, safe='/'): + """Like quote(), but accepts a bytes object rather than a str, and does + not perform string-to-bytes encoding. It always returns an ASCII string. + quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' + """ + if not isinstance(bs, (bytes, bytearray)): + raise TypeError("quote_from_bytes() expected bytes") + if not bs: + return str('') + ### For Python-Future: + bs = bytes(bs) + ### + if isinstance(safe, str): + # Normalize 'safe' by converting to bytes and removing non-ASCII chars + safe = str(safe).encode('ascii', 'ignore') + else: + ### For Python-Future: + safe = bytes(safe) + ### + safe = bytes([c for c in safe if c < 128]) + if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): + return bs.decode() + try: + quoter = _safe_quoters[safe] + except KeyError: + _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ + return str('').join([quoter(char) for char in bs]) + +def urlencode(query, doseq=False, safe='', encoding=None, errors=None): + """Encode a sequence of two-element tuples or dictionary into a URL query string. + + If any values in the query arg are sequences and doseq is true, each + sequence element is converted to a separate parameter. + + If the query arg is a sequence of two-element tuples, the order of the + parameters in the output will match the order of parameters in the + input. + + The query arg may be either a string or a bytes type. When query arg is a + string, the safe, encoding and error parameters are sent the quote_plus for + encoding. + """ + + if hasattr(query, "items"): + query = query.items() + else: + # It's a bother at times that strings and string-like objects are + # sequences. + try: + # non-sequence items should not work with len() + # non-empty strings will fail this + if len(query) and not isinstance(query[0], tuple): + raise TypeError + # Zero-length sequences of all types will get here and succeed, + # but that's a minor nit. Since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty, va, tb = sys.exc_info() + raise_with_traceback(TypeError("not a valid non-string sequence " + "or mapping object"), tb) + + l = [] + if not doseq: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + else: + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + l.append(k + '=' + v) + elif isinstance(v, str): + v = quote_plus(v, safe, encoding, errors) + l.append(k + '=' + v) + else: + try: + # Is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_plus(elt, safe) + else: + elt = quote_plus(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) + return str('&').join(l) + +# Utilities to parse URLs (most of these return None for missing parts): +# unwrap('<URL:type://host/path>') --> 'type://host/path' +# splittype('type:opaquestring') --> 'type', 'opaquestring' +# splithost('//host[:port]/path') --> 'host[:port]', '/path' +# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' +# splitpasswd('user:passwd') -> 'user', 'passwd' +# splitport('host:port') --> 'host', 'port' +# splitquery('/path?query') --> '/path', 'query' +# splittag('/path#tag') --> '/path', 'tag' +# splitattr('/path;attr1=value1;attr2=value2;...') -> +# '/path', ['attr1=value1', 'attr2=value2', ...] +# splitvalue('attr=value') --> 'attr', 'value' +# urllib.parse.unquote('abc%20def') -> 'abc def' +# quote('abc def') -> 'abc%20def') + +def to_bytes(url): + """to_bytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed. + # XXX get rid of to_bytes() + if isinstance(url, str): + try: + url = url.encode("ASCII").decode() + except UnicodeError: + raise UnicodeError("URL " + repr(url) + + " contains non-ASCII characters") + return url + +def unwrap(url): + """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" + url = str(url).strip() + if url[:1] == '<' and url[-1:] == '>': + url = url[1:-1].strip() + if url[:4] == 'URL:': url = url[4:].strip() + return url + +_typeprog = None +def splittype(url): + """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" + global _typeprog + if _typeprog is None: + import re + _typeprog = re.compile('^([^/:]+):') + + match = _typeprog.match(url) + if match: + scheme = match.group(1) + return scheme.lower(), url[len(scheme) + 1:] + return None, url + +_hostprog = None +def splithost(url): + """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" + global _hostprog + if _hostprog is None: + import re + _hostprog = re.compile('^//([^/?]*)(.*)$') + + match = _hostprog.match(url) + if match: + host_port = match.group(1) + path = match.group(2) + if path and not path.startswith('/'): + path = '/' + path + return host_port, path + return None, url + +_userprog = None +def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + global _userprog + if _userprog is None: + import re + _userprog = re.compile('^(.*)@(.*)$') + + match = _userprog.match(host) + if match: return match.group(1, 2) + return None, host + +_passwdprog = None +def splitpasswd(user): + """splitpasswd('user:passwd') -> 'user', 'passwd'.""" + global _passwdprog + if _passwdprog is None: + import re + _passwdprog = re.compile('^([^:]*):(.*)$',re.S) + + match = _passwdprog.match(user) + if match: return match.group(1, 2) + return user, None + +# splittag('/path#tag') --> '/path', 'tag' +_portprog = None +def splitport(host): + """splitport('host:port') --> 'host', 'port'.""" + global _portprog + if _portprog is None: + import re + _portprog = re.compile('^(.*):([0-9]+)$') + + match = _portprog.match(host) + if match: return match.group(1, 2) + return host, None + +_nportprog = None +def splitnport(host, defport=-1): + """Split host and port, returning numeric port. + Return given default port if no ':' found; defaults to -1. + Return numerical port if a valid number are found after ':'. + Return None if ':' but not a valid number.""" + global _nportprog + if _nportprog is None: + import re + _nportprog = re.compile('^(.*):(.*)$') + + match = _nportprog.match(host) + if match: + host, port = match.group(1, 2) + try: + if not port: raise ValueError("no digits") + nport = int(port) + except ValueError: + nport = None + return host, nport + return host, defport + +_queryprog = None +def splitquery(url): + """splitquery('/path?query') --> '/path', 'query'.""" + global _queryprog + if _queryprog is None: + import re + _queryprog = re.compile('^(.*)\?([^?]*)$') + + match = _queryprog.match(url) + if match: return match.group(1, 2) + return url, None + +_tagprog = None +def splittag(url): + """splittag('/path#tag') --> '/path', 'tag'.""" + global _tagprog + if _tagprog is None: + import re + _tagprog = re.compile('^(.*)#([^#]*)$') + + match = _tagprog.match(url) + if match: return match.group(1, 2) + return url, None + +def splitattr(url): + """splitattr('/path;attr1=value1;attr2=value2;...') -> + '/path', ['attr1=value1', 'attr2=value2', ...].""" + words = url.split(';') + return words[0], words[1:] + +_valueprog = None +def splitvalue(attr): + """splitvalue('attr=value') --> 'attr', 'value'.""" + global _valueprog + if _valueprog is None: + import re + _valueprog = re.compile('^([^=]*)=(.*)$') + + match = _valueprog.match(attr) + if match: return match.group(1, 2) + return attr, None | 
