aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py')
-rw-r--r--.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py198
1 files changed, 198 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py b/.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py
new file mode 100644
index 00000000..0dcc9fa6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/future/utils/surrogateescape.py
@@ -0,0 +1,198 @@
+"""
+This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
+handler of Python 3.
+
+Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
+"""
+
+# This code is released under the Python license and the BSD 2-clause license
+
+import codecs
+import sys
+
+from future import utils
+
+
+FS_ERRORS = 'surrogateescape'
+
+# # -- Python 2/3 compatibility -------------------------------------
+# FS_ERRORS = 'my_surrogateescape'
+
+def u(text):
+ if utils.PY3:
+ return text
+ else:
+ return text.decode('unicode_escape')
+
+def b(data):
+ if utils.PY3:
+ return data.encode('latin1')
+ else:
+ return data
+
+if utils.PY3:
+ _unichr = chr
+ bytes_chr = lambda code: bytes((code,))
+else:
+ _unichr = unichr
+ bytes_chr = chr
+
+def surrogateescape_handler(exc):
+ """
+ Pure Python implementation of the PEP 383: the "surrogateescape" error
+ handler of Python 3. Undecodable bytes will be replaced by a Unicode
+ character U+DCxx on decoding, and these are translated into the
+ original bytes on encoding.
+ """
+ mystring = exc.object[exc.start:exc.end]
+
+ try:
+ if isinstance(exc, UnicodeDecodeError):
+ # mystring is a byte-string in this case
+ decoded = replace_surrogate_decode(mystring)
+ elif isinstance(exc, UnicodeEncodeError):
+ # In the case of u'\udcc3'.encode('ascii',
+ # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
+ # exception anyway after this function is called, even though I think
+ # it's doing what it should. It seems that the strict encoder is called
+ # to encode the unicode string that this function returns ...
+ decoded = replace_surrogate_encode(mystring)
+ else:
+ raise exc
+ except NotASurrogateError:
+ raise exc
+ return (decoded, exc.end)
+
+
+class NotASurrogateError(Exception):
+ pass
+
+
+def replace_surrogate_encode(mystring):
+ """
+ Returns a (unicode) string, not the more logical bytes, because the codecs
+ register_error functionality expects this.
+ """
+ decoded = []
+ for ch in mystring:
+ # if utils.PY3:
+ # code = ch
+ # else:
+ code = ord(ch)
+
+ # The following magic comes from Py3.3's Python/codecs.c file:
+ if not 0xD800 <= code <= 0xDCFF:
+ # Not a surrogate. Fail with the original exception.
+ raise NotASurrogateError
+ # mybytes = [0xe0 | (code >> 12),
+ # 0x80 | ((code >> 6) & 0x3f),
+ # 0x80 | (code & 0x3f)]
+ # Is this a good idea?
+ if 0xDC00 <= code <= 0xDC7F:
+ decoded.append(_unichr(code - 0xDC00))
+ elif code <= 0xDCFF:
+ decoded.append(_unichr(code - 0xDC00))
+ else:
+ raise NotASurrogateError
+ return str().join(decoded)
+
+
+def replace_surrogate_decode(mybytes):
+ """
+ Returns a (unicode) string
+ """
+ decoded = []
+ for ch in mybytes:
+ # We may be parsing newbytes (in which case ch is an int) or a native
+ # str on Py2
+ if isinstance(ch, int):
+ code = ch
+ else:
+ code = ord(ch)
+ if 0x80 <= code <= 0xFF:
+ decoded.append(_unichr(0xDC00 + code))
+ elif code <= 0x7F:
+ decoded.append(_unichr(code))
+ else:
+ # # It may be a bad byte
+ # # Try swallowing it.
+ # continue
+ # print("RAISE!")
+ raise NotASurrogateError
+ return str().join(decoded)
+
+
+def encodefilename(fn):
+ if FS_ENCODING == 'ascii':
+ # ASCII encoder of Python 2 expects that the error handler returns a
+ # Unicode string encodable to ASCII, whereas our surrogateescape error
+ # handler has to return bytes in 0x80-0xFF range.
+ encoded = []
+ for index, ch in enumerate(fn):
+ code = ord(ch)
+ if code < 128:
+ ch = bytes_chr(code)
+ elif 0xDC80 <= code <= 0xDCFF:
+ ch = bytes_chr(code - 0xDC00)
+ else:
+ raise UnicodeEncodeError(FS_ENCODING,
+ fn, index, index+1,
+ 'ordinal not in range(128)')
+ encoded.append(ch)
+ return bytes().join(encoded)
+ elif FS_ENCODING == 'utf-8':
+ # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
+ # doesn't go through our error handler
+ encoded = []
+ for index, ch in enumerate(fn):
+ code = ord(ch)
+ if 0xD800 <= code <= 0xDFFF:
+ if 0xDC80 <= code <= 0xDCFF:
+ ch = bytes_chr(code - 0xDC00)
+ encoded.append(ch)
+ else:
+ raise UnicodeEncodeError(
+ FS_ENCODING,
+ fn, index, index+1, 'surrogates not allowed')
+ else:
+ ch_utf8 = ch.encode('utf-8')
+ encoded.append(ch_utf8)
+ return bytes().join(encoded)
+ else:
+ return fn.encode(FS_ENCODING, FS_ERRORS)
+
+def decodefilename(fn):
+ return fn.decode(FS_ENCODING, FS_ERRORS)
+
+FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
+# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
+# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
+
+
+# normalize the filesystem encoding name.
+# For example, we expect "utf-8", not "UTF8".
+FS_ENCODING = codecs.lookup(FS_ENCODING).name
+
+
+def register_surrogateescape():
+ """
+ Registers the surrogateescape error handler on Python 2 (only)
+ """
+ if utils.PY3:
+ return
+ try:
+ codecs.lookup_error(FS_ERRORS)
+ except LookupError:
+ codecs.register_error(FS_ERRORS, surrogateescape_handler)
+
+
+if __name__ == '__main__':
+ pass
+ # # Tests:
+ # register_surrogateescape()
+
+ # b = decodefilename(fn)
+ # assert b == encoded, "%r != %r" % (b, encoded)
+ # c = encodefilename(b)
+ # assert c == fn, '%r != %r' % (c, fn)
+ # # print("ok")