diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/sentry_sdk/profiler')
4 files changed, 1789 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/__init__.py b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/__init__.py new file mode 100644 index 00000000..0bc63e3a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/__init__.py @@ -0,0 +1,49 @@ +from sentry_sdk.profiler.continuous_profiler import ( + start_profile_session, + start_profiler, + stop_profile_session, + stop_profiler, +) +from sentry_sdk.profiler.transaction_profiler import ( + MAX_PROFILE_DURATION_NS, + PROFILE_MINIMUM_SAMPLES, + Profile, + Scheduler, + ThreadScheduler, + GeventScheduler, + has_profiling_enabled, + setup_profiler, + teardown_profiler, +) +from sentry_sdk.profiler.utils import ( + DEFAULT_SAMPLING_FREQUENCY, + MAX_STACK_DEPTH, + get_frame_name, + extract_frame, + extract_stack, + frame_id, +) + +__all__ = [ + "start_profile_session", # TODO: Deprecate this in favor of `start_profiler` + "start_profiler", + "stop_profile_session", # TODO: Deprecate this in favor of `stop_profiler` + "stop_profiler", + # DEPRECATED: The following was re-exported for backwards compatibility. It + # will be removed from sentry_sdk.profiler in a future release. + "MAX_PROFILE_DURATION_NS", + "PROFILE_MINIMUM_SAMPLES", + "Profile", + "Scheduler", + "ThreadScheduler", + "GeventScheduler", + "has_profiling_enabled", + "setup_profiler", + "teardown_profiler", + "DEFAULT_SAMPLING_FREQUENCY", + "MAX_STACK_DEPTH", + "get_frame_name", + "extract_frame", + "extract_stack", + "frame_id", +] diff --git a/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/continuous_profiler.py b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/continuous_profiler.py new file mode 100644 index 00000000..77ba60db --- /dev/null +++ b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/continuous_profiler.py @@ -0,0 +1,704 @@ +import atexit +import os +import random +import sys +import threading +import time +import uuid +import warnings +from collections import deque +from datetime import datetime, timezone + +from sentry_sdk.consts import VERSION +from sentry_sdk.envelope import Envelope +from sentry_sdk._lru_cache import LRUCache +from sentry_sdk.profiler.utils import ( + DEFAULT_SAMPLING_FREQUENCY, + extract_stack, +) +from sentry_sdk.utils import ( + capture_internal_exception, + is_gevent, + logger, + now, + set_in_app_in_frames, +) + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + from typing import Callable + from typing import Deque + from typing import Dict + from typing import List + from typing import Optional + from typing import Set + from typing import Type + from typing import Union + from typing_extensions import TypedDict + from sentry_sdk._types import ContinuousProfilerMode, SDKInfo + from sentry_sdk.profiler.utils import ( + ExtractedSample, + FrameId, + StackId, + ThreadId, + ProcessedFrame, + ProcessedStack, + ) + + ProcessedSample = TypedDict( + "ProcessedSample", + { + "timestamp": float, + "thread_id": ThreadId, + "stack_id": int, + }, + ) + + +try: + from gevent.monkey import get_original + from gevent.threadpool import ThreadPool as _ThreadPool + + ThreadPool = _ThreadPool # type: Optional[Type[_ThreadPool]] + thread_sleep = get_original("time", "sleep") +except ImportError: + thread_sleep = time.sleep + ThreadPool = None + + +_scheduler = None # type: Optional[ContinuousScheduler] + + +def setup_continuous_profiler(options, sdk_info, capture_func): + # type: (Dict[str, Any], SDKInfo, Callable[[Envelope], None]) -> bool + global _scheduler + + if _scheduler is not None: + logger.debug("[Profiling] Continuous Profiler is already setup") + return False + + if is_gevent(): + # If gevent has patched the threading modules then we cannot rely on + # them to spawn a native thread for sampling. + # Instead we default to the GeventContinuousScheduler which is capable of + # spawning native threads within gevent. + default_profiler_mode = GeventContinuousScheduler.mode + else: + default_profiler_mode = ThreadContinuousScheduler.mode + + if options.get("profiler_mode") is not None: + profiler_mode = options["profiler_mode"] + else: + # TODO: deprecate this and just use the existing `profiler_mode` + experiments = options.get("_experiments", {}) + + profiler_mode = ( + experiments.get("continuous_profiling_mode") or default_profiler_mode + ) + + frequency = DEFAULT_SAMPLING_FREQUENCY + + if profiler_mode == ThreadContinuousScheduler.mode: + _scheduler = ThreadContinuousScheduler( + frequency, options, sdk_info, capture_func + ) + elif profiler_mode == GeventContinuousScheduler.mode: + _scheduler = GeventContinuousScheduler( + frequency, options, sdk_info, capture_func + ) + else: + raise ValueError("Unknown continuous profiler mode: {}".format(profiler_mode)) + + logger.debug( + "[Profiling] Setting up continuous profiler in {mode} mode".format( + mode=_scheduler.mode + ) + ) + + atexit.register(teardown_continuous_profiler) + + return True + + +def try_autostart_continuous_profiler(): + # type: () -> None + + # TODO: deprecate this as it'll be replaced by the auto lifecycle option + + if _scheduler is None: + return + + if not _scheduler.is_auto_start_enabled(): + return + + _scheduler.manual_start() + + +def try_profile_lifecycle_trace_start(): + # type: () -> Union[ContinuousProfile, None] + if _scheduler is None: + return None + + return _scheduler.auto_start() + + +def start_profiler(): + # type: () -> None + if _scheduler is None: + return + + _scheduler.manual_start() + + +def start_profile_session(): + # type: () -> None + + warnings.warn( + "The `start_profile_session` function is deprecated. Please use `start_profile` instead.", + DeprecationWarning, + stacklevel=2, + ) + start_profiler() + + +def stop_profiler(): + # type: () -> None + if _scheduler is None: + return + + _scheduler.manual_stop() + + +def stop_profile_session(): + # type: () -> None + + warnings.warn( + "The `stop_profile_session` function is deprecated. Please use `stop_profile` instead.", + DeprecationWarning, + stacklevel=2, + ) + stop_profiler() + + +def teardown_continuous_profiler(): + # type: () -> None + stop_profiler() + + global _scheduler + _scheduler = None + + +def get_profiler_id(): + # type: () -> Union[str, None] + if _scheduler is None: + return None + return _scheduler.profiler_id + + +def determine_profile_session_sampling_decision(sample_rate): + # type: (Union[float, None]) -> bool + + # `None` is treated as `0.0` + if not sample_rate: + return False + + return random.random() < float(sample_rate) + + +class ContinuousProfile: + active: bool = True + + def stop(self): + # type: () -> None + self.active = False + + +class ContinuousScheduler: + mode = "unknown" # type: ContinuousProfilerMode + + def __init__(self, frequency, options, sdk_info, capture_func): + # type: (int, Dict[str, Any], SDKInfo, Callable[[Envelope], None]) -> None + self.interval = 1.0 / frequency + self.options = options + self.sdk_info = sdk_info + self.capture_func = capture_func + + self.lifecycle = self.options.get("profile_lifecycle") + profile_session_sample_rate = self.options.get("profile_session_sample_rate") + self.sampled = determine_profile_session_sampling_decision( + profile_session_sample_rate + ) + + self.sampler = self.make_sampler() + self.buffer = None # type: Optional[ProfileBuffer] + self.pid = None # type: Optional[int] + + self.running = False + + self.new_profiles = deque(maxlen=128) # type: Deque[ContinuousProfile] + self.active_profiles = set() # type: Set[ContinuousProfile] + + def is_auto_start_enabled(self): + # type: () -> bool + + # Ensure that the scheduler only autostarts once per process. + # This is necessary because many web servers use forks to spawn + # additional processes. And the profiler is only spawned on the + # master process, then it often only profiles the main process + # and not the ones where the requests are being handled. + if self.pid == os.getpid(): + return False + + experiments = self.options.get("_experiments") + if not experiments: + return False + + return experiments.get("continuous_profiling_auto_start") + + def auto_start(self): + # type: () -> Union[ContinuousProfile, None] + if not self.sampled: + return None + + if self.lifecycle != "trace": + return None + + logger.debug("[Profiling] Auto starting profiler") + + profile = ContinuousProfile() + + self.new_profiles.append(profile) + self.ensure_running() + + return profile + + def manual_start(self): + # type: () -> None + if not self.sampled: + return + + if self.lifecycle != "manual": + return + + self.ensure_running() + + def manual_stop(self): + # type: () -> None + if self.lifecycle != "manual": + return + + self.teardown() + + def ensure_running(self): + # type: () -> None + raise NotImplementedError + + def teardown(self): + # type: () -> None + raise NotImplementedError + + def pause(self): + # type: () -> None + raise NotImplementedError + + def reset_buffer(self): + # type: () -> None + self.buffer = ProfileBuffer( + self.options, self.sdk_info, PROFILE_BUFFER_SECONDS, self.capture_func + ) + + @property + def profiler_id(self): + # type: () -> Union[str, None] + if self.buffer is None: + return None + return self.buffer.profiler_id + + def make_sampler(self): + # type: () -> Callable[..., None] + cwd = os.getcwd() + + cache = LRUCache(max_size=256) + + if self.lifecycle == "trace": + + def _sample_stack(*args, **kwargs): + # type: (*Any, **Any) -> None + """ + Take a sample of the stack on all the threads in the process. + This should be called at a regular interval to collect samples. + """ + + # no profiles taking place, so we can stop early + if not self.new_profiles and not self.active_profiles: + self.running = False + return + + # This is the number of profiles we want to pop off. + # It's possible another thread adds a new profile to + # the list and we spend longer than we want inside + # the loop below. + # + # Also make sure to set this value before extracting + # frames so we do not write to any new profiles that + # were started after this point. + new_profiles = len(self.new_profiles) + + ts = now() + + try: + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) + for tid, frame in sys._current_frames().items() + ] + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + return + + # Move the new profiles into the active_profiles set. + # + # We cannot directly add the to active_profiles set + # in `start_profiling` because it is called from other + # threads which can cause a RuntimeError when it the + # set sizes changes during iteration without a lock. + # + # We also want to avoid using a lock here so threads + # that are starting profiles are not blocked until it + # can acquire the lock. + for _ in range(new_profiles): + self.active_profiles.add(self.new_profiles.popleft()) + inactive_profiles = [] + + for profile in self.active_profiles: + if profile.active: + pass + else: + # If a profile is marked inactive, we buffer it + # to `inactive_profiles` so it can be removed. + # We cannot remove it here as it would result + # in a RuntimeError. + inactive_profiles.append(profile) + + for profile in inactive_profiles: + self.active_profiles.remove(profile) + + if self.buffer is not None: + self.buffer.write(ts, sample) + + else: + + def _sample_stack(*args, **kwargs): + # type: (*Any, **Any) -> None + """ + Take a sample of the stack on all the threads in the process. + This should be called at a regular interval to collect samples. + """ + + ts = now() + + try: + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) + for tid, frame in sys._current_frames().items() + ] + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + return + + if self.buffer is not None: + self.buffer.write(ts, sample) + + return _sample_stack + + def run(self): + # type: () -> None + last = time.perf_counter() + + while self.running: + self.sampler() + + # some time may have elapsed since the last time + # we sampled, so we need to account for that and + # not sleep for too long + elapsed = time.perf_counter() - last + if elapsed < self.interval: + thread_sleep(self.interval - elapsed) + + # after sleeping, make sure to take the current + # timestamp so we can use it next iteration + last = time.perf_counter() + + if self.buffer is not None: + self.buffer.flush() + self.buffer = None + + +class ThreadContinuousScheduler(ContinuousScheduler): + """ + This scheduler is based on running a daemon thread that will call + the sampler at a regular interval. + """ + + mode = "thread" # type: ContinuousProfilerMode + name = "sentry.profiler.ThreadContinuousScheduler" + + def __init__(self, frequency, options, sdk_info, capture_func): + # type: (int, Dict[str, Any], SDKInfo, Callable[[Envelope], None]) -> None + super().__init__(frequency, options, sdk_info, capture_func) + + self.thread = None # type: Optional[threading.Thread] + self.lock = threading.Lock() + + def ensure_running(self): + # type: () -> None + + pid = os.getpid() + + # is running on the right process + if self.running and self.pid == pid: + return + + with self.lock: + # another thread may have tried to acquire the lock + # at the same time so it may start another thread + # make sure to check again before proceeding + if self.running and self.pid == pid: + return + + self.pid = pid + self.running = True + + # if the profiler thread is changing, + # we should create a new buffer along with it + self.reset_buffer() + + # make sure the thread is a daemon here otherwise this + # can keep the application running after other threads + # have exited + self.thread = threading.Thread(name=self.name, target=self.run, daemon=True) + + try: + self.thread.start() + except RuntimeError: + # Unfortunately at this point the interpreter is in a state that no + # longer allows us to spawn a thread and we have to bail. + self.running = False + self.thread = None + + def teardown(self): + # type: () -> None + if self.running: + self.running = False + + if self.thread is not None: + self.thread.join() + self.thread = None + + self.buffer = None + + +class GeventContinuousScheduler(ContinuousScheduler): + """ + This scheduler is based on the thread scheduler but adapted to work with + gevent. When using gevent, it may monkey patch the threading modules + (`threading` and `_thread`). This results in the use of greenlets instead + of native threads. + + This is an issue because the sampler CANNOT run in a greenlet because + 1. Other greenlets doing sync work will prevent the sampler from running + 2. The greenlet runs in the same thread as other greenlets so when taking + a sample, other greenlets will have been evicted from the thread. This + results in a sample containing only the sampler's code. + """ + + mode = "gevent" # type: ContinuousProfilerMode + + def __init__(self, frequency, options, sdk_info, capture_func): + # type: (int, Dict[str, Any], SDKInfo, Callable[[Envelope], None]) -> None + + if ThreadPool is None: + raise ValueError("Profiler mode: {} is not available".format(self.mode)) + + super().__init__(frequency, options, sdk_info, capture_func) + + self.thread = None # type: Optional[_ThreadPool] + self.lock = threading.Lock() + + def ensure_running(self): + # type: () -> None + pid = os.getpid() + + # is running on the right process + if self.running and self.pid == pid: + return + + with self.lock: + # another thread may have tried to acquire the lock + # at the same time so it may start another thread + # make sure to check again before proceeding + if self.running and self.pid == pid: + return + + self.pid = pid + self.running = True + + # if the profiler thread is changing, + # we should create a new buffer along with it + self.reset_buffer() + + self.thread = ThreadPool(1) # type: ignore[misc] + try: + self.thread.spawn(self.run) + except RuntimeError: + # Unfortunately at this point the interpreter is in a state that no + # longer allows us to spawn a thread and we have to bail. + self.running = False + self.thread = None + + def teardown(self): + # type: () -> None + if self.running: + self.running = False + + if self.thread is not None: + self.thread.join() + self.thread = None + + self.buffer = None + + +PROFILE_BUFFER_SECONDS = 60 + + +class ProfileBuffer: + def __init__(self, options, sdk_info, buffer_size, capture_func): + # type: (Dict[str, Any], SDKInfo, int, Callable[[Envelope], None]) -> None + self.options = options + self.sdk_info = sdk_info + self.buffer_size = buffer_size + self.capture_func = capture_func + + self.profiler_id = uuid.uuid4().hex + self.chunk = ProfileChunk() + + # Make sure to use the same clock to compute a sample's monotonic timestamp + # to ensure the timestamps are correctly aligned. + self.start_monotonic_time = now() + + # Make sure the start timestamp is defined only once per profiler id. + # This prevents issues with clock drift within a single profiler session. + # + # Subtracting the start_monotonic_time here to find a fixed starting position + # for relative monotonic timestamps for each sample. + self.start_timestamp = ( + datetime.now(timezone.utc).timestamp() - self.start_monotonic_time + ) + + def write(self, monotonic_time, sample): + # type: (float, ExtractedSample) -> None + if self.should_flush(monotonic_time): + self.flush() + self.chunk = ProfileChunk() + self.start_monotonic_time = now() + + self.chunk.write(self.start_timestamp + monotonic_time, sample) + + def should_flush(self, monotonic_time): + # type: (float) -> bool + + # If the delta between the new monotonic time and the start monotonic time + # exceeds the buffer size, it means we should flush the chunk + return monotonic_time - self.start_monotonic_time >= self.buffer_size + + def flush(self): + # type: () -> None + chunk = self.chunk.to_json(self.profiler_id, self.options, self.sdk_info) + envelope = Envelope() + envelope.add_profile_chunk(chunk) + self.capture_func(envelope) + + +class ProfileChunk: + def __init__(self): + # type: () -> None + self.chunk_id = uuid.uuid4().hex + + self.indexed_frames = {} # type: Dict[FrameId, int] + self.indexed_stacks = {} # type: Dict[StackId, int] + self.frames = [] # type: List[ProcessedFrame] + self.stacks = [] # type: List[ProcessedStack] + self.samples = [] # type: List[ProcessedSample] + + def write(self, ts, sample): + # type: (float, ExtractedSample) -> None + for tid, (stack_id, frame_ids, frames) in sample: + try: + # Check if the stack is indexed first, this lets us skip + # indexing frames if it's not necessary + if stack_id not in self.indexed_stacks: + for i, frame_id in enumerate(frame_ids): + if frame_id not in self.indexed_frames: + self.indexed_frames[frame_id] = len(self.indexed_frames) + self.frames.append(frames[i]) + + self.indexed_stacks[stack_id] = len(self.indexed_stacks) + self.stacks.append( + [self.indexed_frames[frame_id] for frame_id in frame_ids] + ) + + self.samples.append( + { + "timestamp": ts, + "thread_id": tid, + "stack_id": self.indexed_stacks[stack_id], + } + ) + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + + def to_json(self, profiler_id, options, sdk_info): + # type: (str, Dict[str, Any], SDKInfo) -> Dict[str, Any] + profile = { + "frames": self.frames, + "stacks": self.stacks, + "samples": self.samples, + "thread_metadata": { + str(thread.ident): { + "name": str(thread.name), + } + for thread in threading.enumerate() + }, + } + + set_in_app_in_frames( + profile["frames"], + options["in_app_exclude"], + options["in_app_include"], + options["project_root"], + ) + + payload = { + "chunk_id": self.chunk_id, + "client_sdk": { + "name": sdk_info["name"], + "version": VERSION, + }, + "platform": "python", + "profile": profile, + "profiler_id": profiler_id, + "version": "2", + } + + for key in "release", "environment", "dist": + if options[key] is not None: + payload[key] = str(options[key]).strip() + + return payload diff --git a/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/transaction_profiler.py b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/transaction_profiler.py new file mode 100644 index 00000000..3743b7c9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/transaction_profiler.py @@ -0,0 +1,837 @@ +""" +This file is originally based on code from https://github.com/nylas/nylas-perftools, +which is published under the following license: + +The MIT License (MIT) + +Copyright (c) 2014 Nylas + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import atexit +import os +import platform +import random +import sys +import threading +import time +import uuid +import warnings +from abc import ABC, abstractmethod +from collections import deque + +import sentry_sdk +from sentry_sdk._lru_cache import LRUCache +from sentry_sdk.profiler.utils import ( + DEFAULT_SAMPLING_FREQUENCY, + extract_stack, +) +from sentry_sdk.utils import ( + capture_internal_exception, + get_current_thread_meta, + is_gevent, + is_valid_sample_rate, + logger, + nanosecond_time, + set_in_app_in_frames, +) + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + from typing import Callable + from typing import Deque + from typing import Dict + from typing import List + from typing import Optional + from typing import Set + from typing import Type + from typing_extensions import TypedDict + + from sentry_sdk.profiler.utils import ( + ProcessedStack, + ProcessedFrame, + ProcessedThreadMetadata, + FrameId, + StackId, + ThreadId, + ExtractedSample, + ) + from sentry_sdk._types import Event, SamplingContext, ProfilerMode + + ProcessedSample = TypedDict( + "ProcessedSample", + { + "elapsed_since_start_ns": str, + "thread_id": ThreadId, + "stack_id": int, + }, + ) + + ProcessedProfile = TypedDict( + "ProcessedProfile", + { + "frames": List[ProcessedFrame], + "stacks": List[ProcessedStack], + "samples": List[ProcessedSample], + "thread_metadata": Dict[ThreadId, ProcessedThreadMetadata], + }, + ) + + +try: + from gevent.monkey import get_original + from gevent.threadpool import ThreadPool as _ThreadPool + + ThreadPool = _ThreadPool # type: Optional[Type[_ThreadPool]] + thread_sleep = get_original("time", "sleep") +except ImportError: + thread_sleep = time.sleep + + ThreadPool = None + + +_scheduler = None # type: Optional[Scheduler] + + +# The minimum number of unique samples that must exist in a profile to be +# considered valid. +PROFILE_MINIMUM_SAMPLES = 2 + + +def has_profiling_enabled(options): + # type: (Dict[str, Any]) -> bool + profiles_sampler = options["profiles_sampler"] + if profiles_sampler is not None: + return True + + profiles_sample_rate = options["profiles_sample_rate"] + if profiles_sample_rate is not None and profiles_sample_rate > 0: + return True + + profiles_sample_rate = options["_experiments"].get("profiles_sample_rate") + if profiles_sample_rate is not None: + logger.warning( + "_experiments['profiles_sample_rate'] is deprecated. " + "Please use the non-experimental profiles_sample_rate option " + "directly." + ) + if profiles_sample_rate > 0: + return True + + return False + + +def setup_profiler(options): + # type: (Dict[str, Any]) -> bool + global _scheduler + + if _scheduler is not None: + logger.debug("[Profiling] Profiler is already setup") + return False + + frequency = DEFAULT_SAMPLING_FREQUENCY + + if is_gevent(): + # If gevent has patched the threading modules then we cannot rely on + # them to spawn a native thread for sampling. + # Instead we default to the GeventScheduler which is capable of + # spawning native threads within gevent. + default_profiler_mode = GeventScheduler.mode + else: + default_profiler_mode = ThreadScheduler.mode + + if options.get("profiler_mode") is not None: + profiler_mode = options["profiler_mode"] + else: + profiler_mode = options.get("_experiments", {}).get("profiler_mode") + if profiler_mode is not None: + logger.warning( + "_experiments['profiler_mode'] is deprecated. Please use the " + "non-experimental profiler_mode option directly." + ) + profiler_mode = profiler_mode or default_profiler_mode + + if ( + profiler_mode == ThreadScheduler.mode + # for legacy reasons, we'll keep supporting sleep mode for this scheduler + or profiler_mode == "sleep" + ): + _scheduler = ThreadScheduler(frequency=frequency) + elif profiler_mode == GeventScheduler.mode: + _scheduler = GeventScheduler(frequency=frequency) + else: + raise ValueError("Unknown profiler mode: {}".format(profiler_mode)) + + logger.debug( + "[Profiling] Setting up profiler in {mode} mode".format(mode=_scheduler.mode) + ) + _scheduler.setup() + + atexit.register(teardown_profiler) + + return True + + +def teardown_profiler(): + # type: () -> None + + global _scheduler + + if _scheduler is not None: + _scheduler.teardown() + + _scheduler = None + + +MAX_PROFILE_DURATION_NS = int(3e10) # 30 seconds + + +class Profile: + def __init__( + self, + sampled, # type: Optional[bool] + start_ns, # type: int + hub=None, # type: Optional[sentry_sdk.Hub] + scheduler=None, # type: Optional[Scheduler] + ): + # type: (...) -> None + self.scheduler = _scheduler if scheduler is None else scheduler + + self.event_id = uuid.uuid4().hex # type: str + + self.sampled = sampled # type: Optional[bool] + + # Various framework integrations are capable of overwriting the active thread id. + # If it is set to `None` at the end of the profile, we fall back to the default. + self._default_active_thread_id = get_current_thread_meta()[0] or 0 # type: int + self.active_thread_id = None # type: Optional[int] + + try: + self.start_ns = start_ns # type: int + except AttributeError: + self.start_ns = 0 + + self.stop_ns = 0 # type: int + self.active = False # type: bool + + self.indexed_frames = {} # type: Dict[FrameId, int] + self.indexed_stacks = {} # type: Dict[StackId, int] + self.frames = [] # type: List[ProcessedFrame] + self.stacks = [] # type: List[ProcessedStack] + self.samples = [] # type: List[ProcessedSample] + + self.unique_samples = 0 + + # Backwards compatibility with the old hub property + self._hub = None # type: Optional[sentry_sdk.Hub] + if hub is not None: + self._hub = hub + warnings.warn( + "The `hub` parameter is deprecated. Please do not use it.", + DeprecationWarning, + stacklevel=2, + ) + + def update_active_thread_id(self): + # type: () -> None + self.active_thread_id = get_current_thread_meta()[0] + logger.debug( + "[Profiling] updating active thread id to {tid}".format( + tid=self.active_thread_id + ) + ) + + def _set_initial_sampling_decision(self, sampling_context): + # type: (SamplingContext) -> None + """ + Sets the profile's sampling decision according to the following + precedence rules: + + 1. If the transaction to be profiled is not sampled, that decision + will be used, regardless of anything else. + + 2. Use `profiles_sample_rate` to decide. + """ + + # The corresponding transaction was not sampled, + # so don't generate a profile for it. + if not self.sampled: + logger.debug( + "[Profiling] Discarding profile because transaction is discarded." + ) + self.sampled = False + return + + # The profiler hasn't been properly initialized. + if self.scheduler is None: + logger.debug( + "[Profiling] Discarding profile because profiler was not started." + ) + self.sampled = False + return + + client = sentry_sdk.get_client() + if not client.is_active(): + self.sampled = False + return + + options = client.options + + if callable(options.get("profiles_sampler")): + sample_rate = options["profiles_sampler"](sampling_context) + elif options["profiles_sample_rate"] is not None: + sample_rate = options["profiles_sample_rate"] + else: + sample_rate = options["_experiments"].get("profiles_sample_rate") + + # The profiles_sample_rate option was not set, so profiling + # was never enabled. + if sample_rate is None: + logger.debug( + "[Profiling] Discarding profile because profiling was not enabled." + ) + self.sampled = False + return + + if not is_valid_sample_rate(sample_rate, source="Profiling"): + logger.warning( + "[Profiling] Discarding profile because of invalid sample rate." + ) + self.sampled = False + return + + # Now we roll the dice. random.random is inclusive of 0, but not of 1, + # so strict < is safe here. In case sample_rate is a boolean, cast it + # to a float (True becomes 1.0 and False becomes 0.0) + self.sampled = random.random() < float(sample_rate) + + if self.sampled: + logger.debug("[Profiling] Initializing profile") + else: + logger.debug( + "[Profiling] Discarding profile because it's not included in the random sample (sample rate = {sample_rate})".format( + sample_rate=float(sample_rate) + ) + ) + + def start(self): + # type: () -> None + if not self.sampled or self.active: + return + + assert self.scheduler, "No scheduler specified" + logger.debug("[Profiling] Starting profile") + self.active = True + if not self.start_ns: + self.start_ns = nanosecond_time() + self.scheduler.start_profiling(self) + + def stop(self): + # type: () -> None + if not self.sampled or not self.active: + return + + assert self.scheduler, "No scheduler specified" + logger.debug("[Profiling] Stopping profile") + self.active = False + self.stop_ns = nanosecond_time() + + def __enter__(self): + # type: () -> Profile + scope = sentry_sdk.get_isolation_scope() + old_profile = scope.profile + scope.profile = self + + self._context_manager_state = (scope, old_profile) + + self.start() + + return self + + def __exit__(self, ty, value, tb): + # type: (Optional[Any], Optional[Any], Optional[Any]) -> None + self.stop() + + scope, old_profile = self._context_manager_state + del self._context_manager_state + + scope.profile = old_profile + + def write(self, ts, sample): + # type: (int, ExtractedSample) -> None + if not self.active: + return + + if ts < self.start_ns: + return + + offset = ts - self.start_ns + if offset > MAX_PROFILE_DURATION_NS: + self.stop() + return + + self.unique_samples += 1 + + elapsed_since_start_ns = str(offset) + + for tid, (stack_id, frame_ids, frames) in sample: + try: + # Check if the stack is indexed first, this lets us skip + # indexing frames if it's not necessary + if stack_id not in self.indexed_stacks: + for i, frame_id in enumerate(frame_ids): + if frame_id not in self.indexed_frames: + self.indexed_frames[frame_id] = len(self.indexed_frames) + self.frames.append(frames[i]) + + self.indexed_stacks[stack_id] = len(self.indexed_stacks) + self.stacks.append( + [self.indexed_frames[frame_id] for frame_id in frame_ids] + ) + + self.samples.append( + { + "elapsed_since_start_ns": elapsed_since_start_ns, + "thread_id": tid, + "stack_id": self.indexed_stacks[stack_id], + } + ) + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + + def process(self): + # type: () -> ProcessedProfile + + # This collects the thread metadata at the end of a profile. Doing it + # this way means that any threads that terminate before the profile ends + # will not have any metadata associated with it. + thread_metadata = { + str(thread.ident): { + "name": str(thread.name), + } + for thread in threading.enumerate() + } # type: Dict[str, ProcessedThreadMetadata] + + return { + "frames": self.frames, + "stacks": self.stacks, + "samples": self.samples, + "thread_metadata": thread_metadata, + } + + def to_json(self, event_opt, options): + # type: (Event, Dict[str, Any]) -> Dict[str, Any] + profile = self.process() + + set_in_app_in_frames( + profile["frames"], + options["in_app_exclude"], + options["in_app_include"], + options["project_root"], + ) + + return { + "environment": event_opt.get("environment"), + "event_id": self.event_id, + "platform": "python", + "profile": profile, + "release": event_opt.get("release", ""), + "timestamp": event_opt["start_timestamp"], + "version": "1", + "device": { + "architecture": platform.machine(), + }, + "os": { + "name": platform.system(), + "version": platform.release(), + }, + "runtime": { + "name": platform.python_implementation(), + "version": platform.python_version(), + }, + "transactions": [ + { + "id": event_opt["event_id"], + "name": event_opt["transaction"], + # we start the transaction before the profile and this is + # the transaction start time relative to the profile, so we + # hardcode it to 0 until we can start the profile before + "relative_start_ns": "0", + # use the duration of the profile instead of the transaction + # because we end the transaction after the profile + "relative_end_ns": str(self.stop_ns - self.start_ns), + "trace_id": event_opt["contexts"]["trace"]["trace_id"], + "active_thread_id": str( + self._default_active_thread_id + if self.active_thread_id is None + else self.active_thread_id + ), + } + ], + } + + def valid(self): + # type: () -> bool + client = sentry_sdk.get_client() + if not client.is_active(): + return False + + if not has_profiling_enabled(client.options): + return False + + if self.sampled is None or not self.sampled: + if client.transport: + client.transport.record_lost_event( + "sample_rate", data_category="profile" + ) + return False + + if self.unique_samples < PROFILE_MINIMUM_SAMPLES: + if client.transport: + client.transport.record_lost_event( + "insufficient_data", data_category="profile" + ) + logger.debug("[Profiling] Discarding profile because insufficient samples.") + return False + + return True + + @property + def hub(self): + # type: () -> Optional[sentry_sdk.Hub] + warnings.warn( + "The `hub` attribute is deprecated. Please do not access it.", + DeprecationWarning, + stacklevel=2, + ) + return self._hub + + @hub.setter + def hub(self, value): + # type: (Optional[sentry_sdk.Hub]) -> None + warnings.warn( + "The `hub` attribute is deprecated. Please do not set it.", + DeprecationWarning, + stacklevel=2, + ) + self._hub = value + + +class Scheduler(ABC): + mode = "unknown" # type: ProfilerMode + + def __init__(self, frequency): + # type: (int) -> None + self.interval = 1.0 / frequency + + self.sampler = self.make_sampler() + + # cap the number of new profiles at any time so it does not grow infinitely + self.new_profiles = deque(maxlen=128) # type: Deque[Profile] + self.active_profiles = set() # type: Set[Profile] + + def __enter__(self): + # type: () -> Scheduler + self.setup() + return self + + def __exit__(self, ty, value, tb): + # type: (Optional[Any], Optional[Any], Optional[Any]) -> None + self.teardown() + + @abstractmethod + def setup(self): + # type: () -> None + pass + + @abstractmethod + def teardown(self): + # type: () -> None + pass + + def ensure_running(self): + # type: () -> None + """ + Ensure the scheduler is running. By default, this method is a no-op. + The method should be overridden by any implementation for which it is + relevant. + """ + return None + + def start_profiling(self, profile): + # type: (Profile) -> None + self.ensure_running() + self.new_profiles.append(profile) + + def make_sampler(self): + # type: () -> Callable[..., None] + cwd = os.getcwd() + + cache = LRUCache(max_size=256) + + def _sample_stack(*args, **kwargs): + # type: (*Any, **Any) -> None + """ + Take a sample of the stack on all the threads in the process. + This should be called at a regular interval to collect samples. + """ + # no profiles taking place, so we can stop early + if not self.new_profiles and not self.active_profiles: + # make sure to clear the cache if we're not profiling so we dont + # keep a reference to the last stack of frames around + return + + # This is the number of profiles we want to pop off. + # It's possible another thread adds a new profile to + # the list and we spend longer than we want inside + # the loop below. + # + # Also make sure to set this value before extracting + # frames so we do not write to any new profiles that + # were started after this point. + new_profiles = len(self.new_profiles) + + now = nanosecond_time() + + try: + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) + for tid, frame in sys._current_frames().items() + ] + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + return + + # Move the new profiles into the active_profiles set. + # + # We cannot directly add the to active_profiles set + # in `start_profiling` because it is called from other + # threads which can cause a RuntimeError when it the + # set sizes changes during iteration without a lock. + # + # We also want to avoid using a lock here so threads + # that are starting profiles are not blocked until it + # can acquire the lock. + for _ in range(new_profiles): + self.active_profiles.add(self.new_profiles.popleft()) + + inactive_profiles = [] + + for profile in self.active_profiles: + if profile.active: + profile.write(now, sample) + else: + # If a profile is marked inactive, we buffer it + # to `inactive_profiles` so it can be removed. + # We cannot remove it here as it would result + # in a RuntimeError. + inactive_profiles.append(profile) + + for profile in inactive_profiles: + self.active_profiles.remove(profile) + + return _sample_stack + + +class ThreadScheduler(Scheduler): + """ + This scheduler is based on running a daemon thread that will call + the sampler at a regular interval. + """ + + mode = "thread" # type: ProfilerMode + name = "sentry.profiler.ThreadScheduler" + + def __init__(self, frequency): + # type: (int) -> None + super().__init__(frequency=frequency) + + # used to signal to the thread that it should stop + self.running = False + self.thread = None # type: Optional[threading.Thread] + self.pid = None # type: Optional[int] + self.lock = threading.Lock() + + def setup(self): + # type: () -> None + pass + + def teardown(self): + # type: () -> None + if self.running: + self.running = False + if self.thread is not None: + self.thread.join() + + def ensure_running(self): + # type: () -> None + """ + Check that the profiler has an active thread to run in, and start one if + that's not the case. + + Note that this might fail (e.g. in Python 3.12 it's not possible to + spawn new threads at interpreter shutdown). In that case self.running + will be False after running this function. + """ + pid = os.getpid() + + # is running on the right process + if self.running and self.pid == pid: + return + + with self.lock: + # another thread may have tried to acquire the lock + # at the same time so it may start another thread + # make sure to check again before proceeding + if self.running and self.pid == pid: + return + + self.pid = pid + self.running = True + + # make sure the thread is a daemon here otherwise this + # can keep the application running after other threads + # have exited + self.thread = threading.Thread(name=self.name, target=self.run, daemon=True) + try: + self.thread.start() + except RuntimeError: + # Unfortunately at this point the interpreter is in a state that no + # longer allows us to spawn a thread and we have to bail. + self.running = False + self.thread = None + return + + def run(self): + # type: () -> None + last = time.perf_counter() + + while self.running: + self.sampler() + + # some time may have elapsed since the last time + # we sampled, so we need to account for that and + # not sleep for too long + elapsed = time.perf_counter() - last + if elapsed < self.interval: + thread_sleep(self.interval - elapsed) + + # after sleeping, make sure to take the current + # timestamp so we can use it next iteration + last = time.perf_counter() + + +class GeventScheduler(Scheduler): + """ + This scheduler is based on the thread scheduler but adapted to work with + gevent. When using gevent, it may monkey patch the threading modules + (`threading` and `_thread`). This results in the use of greenlets instead + of native threads. + + This is an issue because the sampler CANNOT run in a greenlet because + 1. Other greenlets doing sync work will prevent the sampler from running + 2. The greenlet runs in the same thread as other greenlets so when taking + a sample, other greenlets will have been evicted from the thread. This + results in a sample containing only the sampler's code. + """ + + mode = "gevent" # type: ProfilerMode + name = "sentry.profiler.GeventScheduler" + + def __init__(self, frequency): + # type: (int) -> None + + if ThreadPool is None: + raise ValueError("Profiler mode: {} is not available".format(self.mode)) + + super().__init__(frequency=frequency) + + # used to signal to the thread that it should stop + self.running = False + self.thread = None # type: Optional[_ThreadPool] + self.pid = None # type: Optional[int] + + # This intentionally uses the gevent patched threading.Lock. + # The lock will be required when first trying to start profiles + # as we need to spawn the profiler thread from the greenlets. + self.lock = threading.Lock() + + def setup(self): + # type: () -> None + pass + + def teardown(self): + # type: () -> None + if self.running: + self.running = False + if self.thread is not None: + self.thread.join() + + def ensure_running(self): + # type: () -> None + pid = os.getpid() + + # is running on the right process + if self.running and self.pid == pid: + return + + with self.lock: + # another thread may have tried to acquire the lock + # at the same time so it may start another thread + # make sure to check again before proceeding + if self.running and self.pid == pid: + return + + self.pid = pid + self.running = True + + self.thread = ThreadPool(1) # type: ignore[misc] + try: + self.thread.spawn(self.run) + except RuntimeError: + # Unfortunately at this point the interpreter is in a state that no + # longer allows us to spawn a thread and we have to bail. + self.running = False + self.thread = None + return + + def run(self): + # type: () -> None + last = time.perf_counter() + + while self.running: + self.sampler() + + # some time may have elapsed since the last time + # we sampled, so we need to account for that and + # not sleep for too long + elapsed = time.perf_counter() - last + if elapsed < self.interval: + thread_sleep(self.interval - elapsed) + + # after sleeping, make sure to take the current + # timestamp so we can use it next iteration + last = time.perf_counter() diff --git a/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/utils.py b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/utils.py new file mode 100644 index 00000000..3554cddb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/sentry_sdk/profiler/utils.py @@ -0,0 +1,199 @@ +import os +from collections import deque + +from sentry_sdk._compat import PY311 +from sentry_sdk.utils import filename_for_module + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from sentry_sdk._lru_cache import LRUCache + from types import FrameType + from typing import Deque + from typing import List + from typing import Optional + from typing import Sequence + from typing import Tuple + from typing_extensions import TypedDict + + ThreadId = str + + ProcessedStack = List[int] + + ProcessedFrame = TypedDict( + "ProcessedFrame", + { + "abs_path": str, + "filename": Optional[str], + "function": str, + "lineno": int, + "module": Optional[str], + }, + ) + + ProcessedThreadMetadata = TypedDict( + "ProcessedThreadMetadata", + {"name": str}, + ) + + FrameId = Tuple[ + str, # abs_path + int, # lineno + str, # function + ] + FrameIds = Tuple[FrameId, ...] + + # The exact value of this id is not very meaningful. The purpose + # of this id is to give us a compact and unique identifier for a + # raw stack that can be used as a key to a dictionary so that it + # can be used during the sampled format generation. + StackId = Tuple[int, int] + + ExtractedStack = Tuple[StackId, FrameIds, List[ProcessedFrame]] + ExtractedSample = Sequence[Tuple[ThreadId, ExtractedStack]] + +# The default sampling frequency to use. This is set at 101 in order to +# mitigate the effects of lockstep sampling. +DEFAULT_SAMPLING_FREQUENCY = 101 + + +# We want to impose a stack depth limit so that samples aren't too large. +MAX_STACK_DEPTH = 128 + + +if PY311: + + def get_frame_name(frame): + # type: (FrameType) -> str + return frame.f_code.co_qualname + +else: + + def get_frame_name(frame): + # type: (FrameType) -> str + + f_code = frame.f_code + co_varnames = f_code.co_varnames + + # co_name only contains the frame name. If the frame was a method, + # the class name will NOT be included. + name = f_code.co_name + + # if it was a method, we can get the class name by inspecting + # the f_locals for the `self` argument + try: + if ( + # the co_varnames start with the frame's positional arguments + # and we expect the first to be `self` if its an instance method + co_varnames + and co_varnames[0] == "self" + and "self" in frame.f_locals + ): + for cls in type(frame.f_locals["self"]).__mro__: + if name in cls.__dict__: + return "{}.{}".format(cls.__name__, name) + except (AttributeError, ValueError): + pass + + # if it was a class method, (decorated with `@classmethod`) + # we can get the class name by inspecting the f_locals for the `cls` argument + try: + if ( + # the co_varnames start with the frame's positional arguments + # and we expect the first to be `cls` if its a class method + co_varnames + and co_varnames[0] == "cls" + and "cls" in frame.f_locals + ): + for cls in frame.f_locals["cls"].__mro__: + if name in cls.__dict__: + return "{}.{}".format(cls.__name__, name) + except (AttributeError, ValueError): + pass + + # nothing we can do if it is a staticmethod (decorated with @staticmethod) + + # we've done all we can, time to give up and return what we have + return name + + +def frame_id(raw_frame): + # type: (FrameType) -> FrameId + return (raw_frame.f_code.co_filename, raw_frame.f_lineno, get_frame_name(raw_frame)) + + +def extract_frame(fid, raw_frame, cwd): + # type: (FrameId, FrameType, str) -> ProcessedFrame + abs_path = raw_frame.f_code.co_filename + + try: + module = raw_frame.f_globals["__name__"] + except Exception: + module = None + + # namedtuples can be many times slower when initialing + # and accessing attribute so we opt to use a tuple here instead + return { + # This originally was `os.path.abspath(abs_path)` but that had + # a large performance overhead. + # + # According to docs, this is equivalent to + # `os.path.normpath(os.path.join(os.getcwd(), path))`. + # The `os.getcwd()` call is slow here, so we precompute it. + # + # Additionally, since we are using normalized path already, + # we skip calling `os.path.normpath` entirely. + "abs_path": os.path.join(cwd, abs_path), + "module": module, + "filename": filename_for_module(module, abs_path) or None, + "function": fid[2], + "lineno": raw_frame.f_lineno, + } + + +def extract_stack( + raw_frame, # type: Optional[FrameType] + cache, # type: LRUCache + cwd, # type: str + max_stack_depth=MAX_STACK_DEPTH, # type: int +): + # type: (...) -> ExtractedStack + """ + Extracts the stack starting the specified frame. The extracted stack + assumes the specified frame is the top of the stack, and works back + to the bottom of the stack. + + In the event that the stack is more than `MAX_STACK_DEPTH` frames deep, + only the first `MAX_STACK_DEPTH` frames will be returned. + """ + + raw_frames = deque(maxlen=max_stack_depth) # type: Deque[FrameType] + + while raw_frame is not None: + f_back = raw_frame.f_back + raw_frames.append(raw_frame) + raw_frame = f_back + + frame_ids = tuple(frame_id(raw_frame) for raw_frame in raw_frames) + frames = [] + for i, fid in enumerate(frame_ids): + frame = cache.get(fid) + if frame is None: + frame = extract_frame(fid, raw_frames[i], cwd) + cache.set(fid, frame) + frames.append(frame) + + # Instead of mapping the stack into frame ids and hashing + # that as a tuple, we can directly hash the stack. + # This saves us from having to generate yet another list. + # Additionally, using the stack as the key directly is + # costly because the stack can be large, so we pre-hash + # the stack, and use the hash as the key as this will be + # needed a few times to improve performance. + # + # To Reduce the likelihood of hash collisions, we include + # the stack depth. This means that only stacks of the same + # depth can suffer from hash collisions. + stack_id = len(raw_frames), hash(frame_ids) + + return stack_id, frame_ids, frames |