about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py')
-rw-r--r--.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py222
1 files changed, 222 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py b/.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py
new file mode 100644
index 00000000..4bf293fb
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/litellm/integrations/prometheus_services.py
@@ -0,0 +1,222 @@
+# used for monitoring litellm services health on `/metrics` endpoint on LiteLLM Proxy
+#### What this does ####
+#    On success + failure, log events to Prometheus for litellm / adjacent services (litellm, redis, postgres, llm api providers)
+
+
+from typing import List, Optional, Union
+
+from litellm._logging import print_verbose, verbose_logger
+from litellm.types.integrations.prometheus import LATENCY_BUCKETS
+from litellm.types.services import ServiceLoggerPayload, ServiceTypes
+
+FAILED_REQUESTS_LABELS = ["error_class", "function_name"]
+
+
+class PrometheusServicesLogger:
+    # Class variables or attributes
+    litellm_service_latency = None  # Class-level attribute to store the Histogram
+
+    def __init__(
+        self,
+        mock_testing: bool = False,
+        **kwargs,
+    ):
+        try:
+            try:
+                from prometheus_client import REGISTRY, Counter, Histogram
+            except ImportError:
+                raise Exception(
+                    "Missing prometheus_client. Run `pip install prometheus-client`"
+                )
+
+            self.Histogram = Histogram
+            self.Counter = Counter
+            self.REGISTRY = REGISTRY
+
+            verbose_logger.debug("in init prometheus services metrics")
+
+            self.services = [item.value for item in ServiceTypes]
+
+            self.payload_to_prometheus_map = (
+                {}
+            )  # store the prometheus histogram/counter we need to call for each field in payload
+
+            for service in self.services:
+                histogram = self.create_histogram(service, type_of_request="latency")
+                counter_failed_request = self.create_counter(
+                    service,
+                    type_of_request="failed_requests",
+                    additional_labels=FAILED_REQUESTS_LABELS,
+                )
+                counter_total_requests = self.create_counter(
+                    service, type_of_request="total_requests"
+                )
+                self.payload_to_prometheus_map[service] = [
+                    histogram,
+                    counter_failed_request,
+                    counter_total_requests,
+                ]
+
+            self.prometheus_to_amount_map: dict = (
+                {}
+            )  # the field / value in ServiceLoggerPayload the object needs to be incremented by
+
+            ### MOCK TESTING ###
+            self.mock_testing = mock_testing
+            self.mock_testing_success_calls = 0
+            self.mock_testing_failure_calls = 0
+
+        except Exception as e:
+            print_verbose(f"Got exception on init prometheus client {str(e)}")
+            raise e
+
+    def is_metric_registered(self, metric_name) -> bool:
+        for metric in self.REGISTRY.collect():
+            if metric_name == metric.name:
+                return True
+        return False
+
+    def _get_metric(self, metric_name):
+        """
+        Helper function to get a metric from the registry by name.
+        """
+        return self.REGISTRY._names_to_collectors.get(metric_name)
+
+    def create_histogram(self, service: str, type_of_request: str):
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
+        is_registered = self.is_metric_registered(metric_name)
+        if is_registered:
+            return self._get_metric(metric_name)
+        return self.Histogram(
+            metric_name,
+            "Latency for {} service".format(service),
+            labelnames=[service],
+            buckets=LATENCY_BUCKETS,
+        )
+
+    def create_counter(
+        self,
+        service: str,
+        type_of_request: str,
+        additional_labels: Optional[List[str]] = None,
+    ):
+        metric_name = "litellm_{}_{}".format(service, type_of_request)
+        is_registered = self.is_metric_registered(metric_name)
+        if is_registered:
+            return self._get_metric(metric_name)
+        return self.Counter(
+            metric_name,
+            "Total {} for {} service".format(type_of_request, service),
+            labelnames=[service] + (additional_labels or []),
+        )
+
+    def observe_histogram(
+        self,
+        histogram,
+        labels: str,
+        amount: float,
+    ):
+        assert isinstance(histogram, self.Histogram)
+
+        histogram.labels(labels).observe(amount)
+
+    def increment_counter(
+        self,
+        counter,
+        labels: str,
+        amount: float,
+        additional_labels: Optional[List[str]] = [],
+    ):
+        assert isinstance(counter, self.Counter)
+
+        if additional_labels:
+            counter.labels(labels, *additional_labels).inc(amount)
+        else:
+            counter.labels(labels).inc(amount)
+
+    def service_success_hook(self, payload: ServiceLoggerPayload):
+        if self.mock_testing:
+            self.mock_testing_success_calls += 1
+
+        if payload.service.value in self.payload_to_prometheus_map:
+            prom_objects = self.payload_to_prometheus_map[payload.service.value]
+            for obj in prom_objects:
+                if isinstance(obj, self.Histogram):
+                    self.observe_histogram(
+                        histogram=obj,
+                        labels=payload.service.value,
+                        amount=payload.duration,
+                    )
+                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
+                    self.increment_counter(
+                        counter=obj,
+                        labels=payload.service.value,
+                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                    )
+
+    def service_failure_hook(self, payload: ServiceLoggerPayload):
+        if self.mock_testing:
+            self.mock_testing_failure_calls += 1
+
+        if payload.service.value in self.payload_to_prometheus_map:
+            prom_objects = self.payload_to_prometheus_map[payload.service.value]
+            for obj in prom_objects:
+                if isinstance(obj, self.Counter):
+                    self.increment_counter(
+                        counter=obj,
+                        labels=payload.service.value,
+                        amount=1,  # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS
+                    )
+
+    async def async_service_success_hook(self, payload: ServiceLoggerPayload):
+        """
+        Log successful call to prometheus
+        """
+        if self.mock_testing:
+            self.mock_testing_success_calls += 1
+
+        if payload.service.value in self.payload_to_prometheus_map:
+            prom_objects = self.payload_to_prometheus_map[payload.service.value]
+            for obj in prom_objects:
+                if isinstance(obj, self.Histogram):
+                    self.observe_histogram(
+                        histogram=obj,
+                        labels=payload.service.value,
+                        amount=payload.duration,
+                    )
+                elif isinstance(obj, self.Counter) and "total_requests" in obj._name:
+                    self.increment_counter(
+                        counter=obj,
+                        labels=payload.service.value,
+                        amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                    )
+
+    async def async_service_failure_hook(
+        self,
+        payload: ServiceLoggerPayload,
+        error: Union[str, Exception],
+    ):
+        if self.mock_testing:
+            self.mock_testing_failure_calls += 1
+        error_class = error.__class__.__name__
+        function_name = payload.call_type
+
+        if payload.service.value in self.payload_to_prometheus_map:
+            prom_objects = self.payload_to_prometheus_map[payload.service.value]
+            for obj in prom_objects:
+                # increment both failed and total requests
+                if isinstance(obj, self.Counter):
+                    if "failed_requests" in obj._name:
+                        self.increment_counter(
+                            counter=obj,
+                            labels=payload.service.value,
+                            # log additional_labels=["error_class", "function_name"], used for debugging what's going wrong with the DB
+                            additional_labels=[error_class, function_name],
+                            amount=1,  # LOG ERROR COUNT TO PROMETHEUS
+                        )
+                    else:
+                        self.increment_counter(
+                            counter=obj,
+                            labels=payload.service.value,
+                            amount=1,  # LOG TOTAL REQUESTS TO PROMETHEUS
+                        )