about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py126
1 files changed, 126 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py
new file mode 100644
index 00000000..2ba4a634
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/utils/_telemetry.py
@@ -0,0 +1,126 @@
+from queue import Queue
+from threading import Lock, Thread
+from typing import Dict, Optional, Union
+from urllib.parse import quote
+
+from .. import constants, logging
+from . import build_hf_headers, get_session, hf_raise_for_status
+
+
+logger = logging.get_logger(__name__)
+
+# Telemetry is sent by a separate thread to avoid blocking the main thread.
+# A daemon thread is started once and consume tasks from the _TELEMETRY_QUEUE.
+# If the thread stops for some reason -shouldn't happen-, we restart a new one.
+_TELEMETRY_THREAD: Optional[Thread] = None
+_TELEMETRY_THREAD_LOCK = Lock()  # Lock to avoid starting multiple threads in parallel
+_TELEMETRY_QUEUE: Queue = Queue()
+
+
+def send_telemetry(
+    topic: str,
+    *,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+) -> None:
+    """
+    Sends telemetry that helps tracking usage of different HF libraries.
+
+    This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants
+    to share additional information, and we respect your privacy. You can disable telemetry collection by setting the
+    `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting
+    `HF_HUB_OFFLINE=1`).
+
+    Telemetry collection is run in a separate thread to minimize impact for the user.
+
+    Args:
+        topic (`str`):
+            Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor
+            subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",...
+        library_name (`str`, *optional*):
+            The name of the library that is making the HTTP request. Will be added to the user-agent header.
+        library_version (`str`, *optional*):
+            The version of the library that is making the HTTP request. Will be added to the user-agent header.
+        user_agent (`str`, `dict`, *optional*):
+            The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages.
+
+    Example:
+    ```py
+    >>> from huggingface_hub.utils import send_telemetry
+
+    # Send telemetry without library information
+    >>> send_telemetry("ping")
+
+    # Send telemetry to subtopic with library information
+    >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1")
+
+    # Send telemetry with additional data
+    >>> send_telemetry(
+    ...     topic="examples",
+    ...     library_name="transformers",
+    ...     library_version="4.26.0",
+    ...     user_agent={"pipeline": "text_classification", "framework": "flax"},
+    ... )
+    ```
+    """
+    if constants.HF_HUB_OFFLINE or constants.HF_HUB_DISABLE_TELEMETRY:
+        return
+
+    _start_telemetry_thread()  # starts thread only if doesn't exist yet
+    _TELEMETRY_QUEUE.put(
+        {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent}
+    )
+
+
+def _start_telemetry_thread():
+    """Start a daemon thread to consume tasks from the telemetry queue.
+
+    If the thread is interrupted, start a new one.
+    """
+    with _TELEMETRY_THREAD_LOCK:  # avoid to start multiple threads if called concurrently
+        global _TELEMETRY_THREAD
+        if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive():
+            _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True)
+            _TELEMETRY_THREAD.start()
+
+
+def _telemetry_worker():
+    """Wait for a task and consume it."""
+    while True:
+        kwargs = _TELEMETRY_QUEUE.get()
+        _send_telemetry_in_thread(**kwargs)
+        _TELEMETRY_QUEUE.task_done()
+
+
+def _send_telemetry_in_thread(
+    topic: str,
+    *,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+) -> None:
+    """Contains the actual data sending data to the Hub.
+
+    This function is called directly in gradio's analytics because
+    it is not possible to send telemetry from a daemon thread.
+
+    See here: https://github.com/gradio-app/gradio/pull/8180
+
+    Please do not rename or remove this function.
+    """
+    path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0)
+    try:
+        r = get_session().head(
+            f"{constants.ENDPOINT}/api/telemetry/{path}",
+            headers=build_hf_headers(
+                token=False,  # no need to send a token for telemetry
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+            ),
+        )
+        hf_raise_for_status(r)
+    except Exception as e:
+        # We don't want to error in case of connection errors of any kind.
+        logger.debug(f"Error while sending telemetry: {e}")