about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py')
-rw-r--r--.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py b/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
new file mode 100644
index 00000000..03c27dc1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/google/protobuf/text_encoding.py
@@ -0,0 +1,106 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Encoding related utilities."""
+import re
+
+def _AsciiIsPrint(i):
+  return i >= 32 and i < 127
+
+def _MakeStrEscapes():
+  ret = {}
+  for i in range(0, 128):
+    if not _AsciiIsPrint(i):
+      ret[i] = r'\%03o' % i
+  ret[ord('\t')] = r'\t'  # optional escape
+  ret[ord('\n')] = r'\n'  # optional escape
+  ret[ord('\r')] = r'\r'  # optional escape
+  ret[ord('"')] = r'\"'  # necessary escape
+  ret[ord('\'')] = r"\'"  # optional escape
+  ret[ord('\\')] = r'\\'  # necessary escape
+  return ret
+
+# Maps int -> char, performing string escapes.
+_str_escapes = _MakeStrEscapes()
+
+# Maps int -> char, performing byte escaping and string escapes
+_byte_escapes = {i: chr(i) for i in range(0, 256)}
+_byte_escapes.update(_str_escapes)
+_byte_escapes.update({i: r'\%03o' % i for i in range(128, 256)})
+
+
+def _DecodeUtf8EscapeErrors(text_bytes):
+  ret = ''
+  while text_bytes:
+    try:
+      ret += text_bytes.decode('utf-8').translate(_str_escapes)
+      text_bytes = ''
+    except UnicodeDecodeError as e:
+      ret += text_bytes[:e.start].decode('utf-8').translate(_str_escapes)
+      ret += _byte_escapes[text_bytes[e.start]]
+      text_bytes = text_bytes[e.start+1:]
+  return ret
+
+
+def CEscape(text, as_utf8) -> str:
+  """Escape a bytes string for use in an text protocol buffer.
+
+  Args:
+    text: A byte string to be escaped.
+    as_utf8: Specifies if result may contain non-ASCII characters.
+        In Python 3 this allows unescaped non-ASCII Unicode characters.
+        In Python 2 the return value will be valid UTF-8 rather than only ASCII.
+  Returns:
+    Escaped string (str).
+  """
+  # Python's text.encode() 'string_escape' or 'unicode_escape' codecs do not
+  # satisfy our needs; they encodes unprintable characters using two-digit hex
+  # escapes whereas our C++ unescaping function allows hex escapes to be any
+  # length.  So, "\0011".encode('string_escape') ends up being "\\x011", which
+  # will be decoded in C++ as a single-character string with char code 0x11.
+  text_is_unicode = isinstance(text, str)
+  if as_utf8:
+    if text_is_unicode:
+      return text.translate(_str_escapes)
+    else:
+      return _DecodeUtf8EscapeErrors(text)
+  else:
+    if text_is_unicode:
+      text = text.encode('utf-8')
+    return ''.join([_byte_escapes[c] for c in text])
+
+
+_CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
+
+
+def CUnescape(text: str) -> bytes:
+  """Unescape a text string with C-style escape sequences to UTF-8 bytes.
+
+  Args:
+    text: The data to parse in a str.
+  Returns:
+    A byte string.
+  """
+
+  def ReplaceHex(m):
+    # Only replace the match if the number of leading back slashes is odd. i.e.
+    # the slash itself is not escaped.
+    if len(m.group(1)) & 1:
+      return m.group(1) + 'x0' + m.group(2)
+    return m.group(0)
+
+  # This is required because the 'string_escape' encoding doesn't
+  # allow single-digit hex escapes (like '\xf').
+  result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
+
+  # Replaces Unicode escape sequences with their character equivalents.
+  result = result.encode('raw_unicode_escape').decode('raw_unicode_escape')
+  # Encode Unicode characters as UTF-8, then decode to Latin-1 escaping
+  # unprintable characters.
+  result = result.encode('utf-8').decode('unicode_escape')
+  # Convert Latin-1 text back to a byte string (latin-1 codec also works here).
+  return result.encode('latin-1')