about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py')
-rw-r--r--.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py388
1 files changed, 388 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py b/.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py
new file mode 100644
index 00000000..bc8e38c6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/sentry_sdk/serializer.py
@@ -0,0 +1,388 @@
+import sys
+import math
+from collections.abc import Mapping, Sequence, Set
+from datetime import datetime
+
+from sentry_sdk.utils import (
+    AnnotatedValue,
+    capture_internal_exception,
+    disable_capture_event,
+    format_timestamp,
+    safe_repr,
+    strip_string,
+)
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from types import TracebackType
+
+    from typing import Any
+    from typing import Callable
+    from typing import ContextManager
+    from typing import Dict
+    from typing import List
+    from typing import Optional
+    from typing import Type
+    from typing import Union
+
+    from sentry_sdk._types import NotImplementedType
+
+    Span = Dict[str, Any]
+
+    ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
+    Segment = Union[str, int]
+
+
+# Bytes are technically not strings in Python 3, but we can serialize them
+serializable_str_types = (str, bytes, bytearray, memoryview)
+
+
+# Maximum length of JSON-serialized event payloads that can be safely sent
+# before the server may reject the event due to its size. This is not intended
+# to reflect actual values defined server-side, but rather only be an upper
+# bound for events sent by the SDK.
+#
+# Can be overwritten if wanting to send more bytes, e.g. with a custom server.
+# When changing this, keep in mind that events may be a little bit larger than
+# this value due to attached metadata, so keep the number conservative.
+MAX_EVENT_BYTES = 10**6
+
+# Maximum depth and breadth of databags. Excess data will be trimmed. If
+# max_request_body_size is "always", request bodies won't be trimmed.
+MAX_DATABAG_DEPTH = 5
+MAX_DATABAG_BREADTH = 10
+CYCLE_MARKER = "<cyclic>"
+
+
+global_repr_processors = []  # type: List[ReprProcessor]
+
+
+def add_global_repr_processor(processor):
+    # type: (ReprProcessor) -> None
+    global_repr_processors.append(processor)
+
+
+class Memo:
+    __slots__ = ("_ids", "_objs")
+
+    def __init__(self):
+        # type: () -> None
+        self._ids = {}  # type: Dict[int, Any]
+        self._objs = []  # type: List[Any]
+
+    def memoize(self, obj):
+        # type: (Any) -> ContextManager[bool]
+        self._objs.append(obj)
+        return self
+
+    def __enter__(self):
+        # type: () -> bool
+        obj = self._objs[-1]
+        if id(obj) in self._ids:
+            return True
+        else:
+            self._ids[id(obj)] = obj
+            return False
+
+    def __exit__(
+        self,
+        ty,  # type: Optional[Type[BaseException]]
+        value,  # type: Optional[BaseException]
+        tb,  # type: Optional[TracebackType]
+    ):
+        # type: (...) -> None
+        self._ids.pop(id(self._objs.pop()), None)
+
+
+def serialize(event, **kwargs):
+    # type: (Dict[str, Any], **Any) -> Dict[str, Any]
+    """
+    A very smart serializer that takes a dict and emits a json-friendly dict.
+    Currently used for serializing the final Event and also prematurely while fetching the stack
+    local variables for each frame in a stacktrace.
+
+    It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
+    The algorithm itself is a recursive graph walk down the data structures it encounters.
+
+    It has the following responsibilities:
+    * Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
+    * Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
+    * Annotating the payload with the _meta field whenever trimming happens.
+
+    :param max_request_body_size: If set to "always", will never trim request bodies.
+    :param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
+    :param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
+    :param custom_repr: A custom repr function that runs before safe_repr on the object to be serialized. If it returns None or throws internally, we will fallback to safe_repr.
+
+    """
+    memo = Memo()
+    path = []  # type: List[Segment]
+    meta_stack = []  # type: List[Dict[str, Any]]
+
+    keep_request_bodies = (
+        kwargs.pop("max_request_body_size", None) == "always"
+    )  # type: bool
+    max_value_length = kwargs.pop("max_value_length", None)  # type: Optional[int]
+    is_vars = kwargs.pop("is_vars", False)
+    custom_repr = kwargs.pop("custom_repr", None)  # type: Callable[..., Optional[str]]
+
+    def _safe_repr_wrapper(value):
+        # type: (Any) -> str
+        try:
+            repr_value = None
+            if custom_repr is not None:
+                repr_value = custom_repr(value)
+            return repr_value or safe_repr(value)
+        except Exception:
+            return safe_repr(value)
+
+    def _annotate(**meta):
+        # type: (**Any) -> None
+        while len(meta_stack) <= len(path):
+            try:
+                segment = path[len(meta_stack) - 1]
+                node = meta_stack[-1].setdefault(str(segment), {})
+            except IndexError:
+                node = {}
+
+            meta_stack.append(node)
+
+        meta_stack[-1].setdefault("", {}).update(meta)
+
+    def _is_databag():
+        # type: () -> Optional[bool]
+        """
+        A databag is any value that we need to trim.
+        True for stuff like vars, request bodies, breadcrumbs and extra.
+
+        :returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
+        """
+        try:
+            if is_vars:
+                return True
+
+            is_request_body = _is_request_body()
+            if is_request_body in (True, None):
+                return is_request_body
+
+            p0 = path[0]
+            if p0 == "breadcrumbs" and path[1] == "values":
+                path[2]
+                return True
+
+            if p0 == "extra":
+                return True
+
+        except IndexError:
+            return None
+
+        return False
+
+    def _is_request_body():
+        # type: () -> Optional[bool]
+        try:
+            if path[0] == "request" and path[1] == "data":
+                return True
+        except IndexError:
+            return None
+
+        return False
+
+    def _serialize_node(
+        obj,  # type: Any
+        is_databag=None,  # type: Optional[bool]
+        is_request_body=None,  # type: Optional[bool]
+        should_repr_strings=None,  # type: Optional[bool]
+        segment=None,  # type: Optional[Segment]
+        remaining_breadth=None,  # type: Optional[Union[int, float]]
+        remaining_depth=None,  # type: Optional[Union[int, float]]
+    ):
+        # type: (...) -> Any
+        if segment is not None:
+            path.append(segment)
+
+        try:
+            with memo.memoize(obj) as result:
+                if result:
+                    return CYCLE_MARKER
+
+                return _serialize_node_impl(
+                    obj,
+                    is_databag=is_databag,
+                    is_request_body=is_request_body,
+                    should_repr_strings=should_repr_strings,
+                    remaining_depth=remaining_depth,
+                    remaining_breadth=remaining_breadth,
+                )
+        except BaseException:
+            capture_internal_exception(sys.exc_info())
+
+            if is_databag:
+                return "<failed to serialize, use init(debug=True) to see error logs>"
+
+            return None
+        finally:
+            if segment is not None:
+                path.pop()
+                del meta_stack[len(path) + 1 :]
+
+    def _flatten_annotated(obj):
+        # type: (Any) -> Any
+        if isinstance(obj, AnnotatedValue):
+            _annotate(**obj.metadata)
+            obj = obj.value
+        return obj
+
+    def _serialize_node_impl(
+        obj,
+        is_databag,
+        is_request_body,
+        should_repr_strings,
+        remaining_depth,
+        remaining_breadth,
+    ):
+        # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any
+        if isinstance(obj, AnnotatedValue):
+            should_repr_strings = False
+        if should_repr_strings is None:
+            should_repr_strings = is_vars
+
+        if is_databag is None:
+            is_databag = _is_databag()
+
+        if is_request_body is None:
+            is_request_body = _is_request_body()
+
+        if is_databag:
+            if is_request_body and keep_request_bodies:
+                remaining_depth = float("inf")
+                remaining_breadth = float("inf")
+            else:
+                if remaining_depth is None:
+                    remaining_depth = MAX_DATABAG_DEPTH
+                if remaining_breadth is None:
+                    remaining_breadth = MAX_DATABAG_BREADTH
+
+        obj = _flatten_annotated(obj)
+
+        if remaining_depth is not None and remaining_depth <= 0:
+            _annotate(rem=[["!limit", "x"]])
+            if is_databag:
+                return _flatten_annotated(
+                    strip_string(_safe_repr_wrapper(obj), max_length=max_value_length)
+                )
+            return None
+
+        if is_databag and global_repr_processors:
+            hints = {"memo": memo, "remaining_depth": remaining_depth}
+            for processor in global_repr_processors:
+                result = processor(obj, hints)
+                if result is not NotImplemented:
+                    return _flatten_annotated(result)
+
+        sentry_repr = getattr(type(obj), "__sentry_repr__", None)
+
+        if obj is None or isinstance(obj, (bool, int, float)):
+            if should_repr_strings or (
+                isinstance(obj, float) and (math.isinf(obj) or math.isnan(obj))
+            ):
+                return _safe_repr_wrapper(obj)
+            else:
+                return obj
+
+        elif callable(sentry_repr):
+            return sentry_repr(obj)
+
+        elif isinstance(obj, datetime):
+            return (
+                str(format_timestamp(obj))
+                if not should_repr_strings
+                else _safe_repr_wrapper(obj)
+            )
+
+        elif isinstance(obj, Mapping):
+            # Create temporary copy here to avoid calling too much code that
+            # might mutate our dictionary while we're still iterating over it.
+            obj = dict(obj.items())
+
+            rv_dict = {}  # type: Dict[str, Any]
+            i = 0
+
+            for k, v in obj.items():
+                if remaining_breadth is not None and i >= remaining_breadth:
+                    _annotate(len=len(obj))
+                    break
+
+                str_k = str(k)
+                v = _serialize_node(
+                    v,
+                    segment=str_k,
+                    should_repr_strings=should_repr_strings,
+                    is_databag=is_databag,
+                    is_request_body=is_request_body,
+                    remaining_depth=(
+                        remaining_depth - 1 if remaining_depth is not None else None
+                    ),
+                    remaining_breadth=remaining_breadth,
+                )
+                rv_dict[str_k] = v
+                i += 1
+
+            return rv_dict
+
+        elif not isinstance(obj, serializable_str_types) and isinstance(
+            obj, (Set, Sequence)
+        ):
+            rv_list = []
+
+            for i, v in enumerate(obj):
+                if remaining_breadth is not None and i >= remaining_breadth:
+                    _annotate(len=len(obj))
+                    break
+
+                rv_list.append(
+                    _serialize_node(
+                        v,
+                        segment=i,
+                        should_repr_strings=should_repr_strings,
+                        is_databag=is_databag,
+                        is_request_body=is_request_body,
+                        remaining_depth=(
+                            remaining_depth - 1 if remaining_depth is not None else None
+                        ),
+                        remaining_breadth=remaining_breadth,
+                    )
+                )
+
+            return rv_list
+
+        if should_repr_strings:
+            obj = _safe_repr_wrapper(obj)
+        else:
+            if isinstance(obj, bytes) or isinstance(obj, bytearray):
+                obj = obj.decode("utf-8", "replace")
+
+            if not isinstance(obj, str):
+                obj = _safe_repr_wrapper(obj)
+
+        is_span_description = (
+            len(path) == 3 and path[0] == "spans" and path[-1] == "description"
+        )
+        if is_span_description:
+            return obj
+
+        return _flatten_annotated(strip_string(obj, max_length=max_value_length))
+
+    #
+    # Start of serialize() function
+    #
+    disable_capture_event.set(True)
+    try:
+        serialized_event = _serialize_node(event, **kwargs)
+        if not is_vars and meta_stack and isinstance(serialized_event, dict):
+            serialized_event["_meta"] = meta_stack[0]
+
+        return serialized_event
+    finally:
+        disable_capture_event.set(False)