aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py')
-rw-r--r--.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py b/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
new file mode 100644
index 00000000..03c27dc1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
@@ -0,0 +1,106 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Encoding related utilities."""
+import re
+
+def _AsciiIsPrint(i):
+ return i >= 32 and i < 127
+
+def _MakeStrEscapes():
+ ret = {}
+ for i in range(0, 128):
+ if not _AsciiIsPrint(i):
+ ret[i] = r'\%03o' % i
+ ret[ord('\t')] = r'\t' # optional escape
+ ret[ord('\n')] = r'\n' # optional escape
+ ret[ord('\r')] = r'\r' # optional escape
+ ret[ord('"')] = r'\"' # necessary escape
+ ret[ord('\'')] = r"\'" # optional escape
+ ret[ord('\\')] = r'\\' # necessary escape
+ return ret
+
+# Maps int -> char, performing string escapes.
+_str_escapes = _MakeStrEscapes()
+
+# Maps int -> char, performing byte escaping and string escapes
+_byte_escapes = {i: chr(i) for i in range(0, 256)}
+_byte_escapes.update(_str_escapes)
+_byte_escapes.update({i: r'\%03o' % i for i in range(128, 256)})
+
+
+def _DecodeUtf8EscapeErrors(text_bytes):
+ ret = ''
+ while text_bytes:
+ try:
+ ret += text_bytes.decode('utf-8').translate(_str_escapes)
+ text_bytes = ''
+ except UnicodeDecodeError as e:
+ ret += text_bytes[:e.start].decode('utf-8').translate(_str_escapes)
+ ret += _byte_escapes[text_bytes[e.start]]
+ text_bytes = text_bytes[e.start+1:]
+ return ret
+
+
+def CEscape(text, as_utf8) -> str:
+ """Escape a bytes string for use in an text protocol buffer.
+
+ Args:
+ text: A byte string to be escaped.
+ as_utf8: Specifies if result may contain non-ASCII characters.
+ In Python 3 this allows unescaped non-ASCII Unicode characters.
+ In Python 2 the return value will be valid UTF-8 rather than only ASCII.
+ Returns:
+ Escaped string (str).
+ """
+ # Python's text.encode() 'string_escape' or 'unicode_escape' codecs do not
+ # satisfy our needs; they encodes unprintable characters using two-digit hex
+ # escapes whereas our C++ unescaping function allows hex escapes to be any
+ # length. So, "\0011".encode('string_escape') ends up being "\\x011", which
+ # will be decoded in C++ as a single-character string with char code 0x11.
+ text_is_unicode = isinstance(text, str)
+ if as_utf8:
+ if text_is_unicode:
+ return text.translate(_str_escapes)
+ else:
+ return _DecodeUtf8EscapeErrors(text)
+ else:
+ if text_is_unicode:
+ text = text.encode('utf-8')
+ return ''.join([_byte_escapes[c] for c in text])
+
+
+_CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
+
+
+def CUnescape(text: str) -> bytes:
+ """Unescape a text string with C-style escape sequences to UTF-8 bytes.
+
+ Args:
+ text: The data to parse in a str.
+ Returns:
+ A byte string.
+ """
+
+ def ReplaceHex(m):
+ # Only replace the match if the number of leading back slashes is odd. i.e.
+ # the slash itself is not escaped.
+ if len(m.group(1)) & 1:
+ return m.group(1) + 'x0' + m.group(2)
+ return m.group(0)
+
+ # This is required because the 'string_escape' encoding doesn't
+ # allow single-digit hex escapes (like '\xf').
+ result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
+
+ # Replaces Unicode escape sequences with their character equivalents.
+ result = result.encode('raw_unicode_escape').decode('raw_unicode_escape')
+ # Encode Unicode characters as UTF-8, then decode to Latin-1 escaping
+ # unprintable characters.
+ result = result.encode('utf-8').decode('unicode_escape')
+ # Convert Latin-1 text back to a byte string (latin-1 codec also works here).
+ return result.encode('latin-1')