two version of R2R are here HEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/storage/filedatalake/_shared/request_handlers.py
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
1 files changed, 270 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/_shared/request_handlers.py b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/_shared/request_handlers.py
new file mode 100644
index 00000000..54927cc7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/storage/filedatalake/_shared/request_handlers.py
@@ -0,0 +1,270 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import logging
+import stat
+from io import (SEEK_END, SEEK_SET, UnsupportedOperation)
+from os import fstat
+from typing import Dict, Optional
+
+import isodate
+
+
+_LOGGER = logging.getLogger(__name__)
+
+_REQUEST_DELIMITER_PREFIX = "batch_"
+_HTTP1_1_IDENTIFIER = "HTTP/1.1"
+_HTTP_LINE_ENDING = "\r\n"
+
+
+def serialize_iso(attr):
+    """Serialize Datetime object into ISO-8601 formatted string.
+
+    :param Datetime attr: Object to be serialized.
+    :rtype: str
+    :raises: ValueError if format invalid.
+    """
+    if not attr:
+        return None
+    if isinstance(attr, str):
+        attr = isodate.parse_datetime(attr)
+    try:
+        utc = attr.utctimetuple()
+        if utc.tm_year > 9999 or utc.tm_year < 1:
+            raise OverflowError("Hit max or min date")
+
+        date = f"{utc.tm_year:04}-{utc.tm_mon:02}-{utc.tm_mday:02}T{utc.tm_hour:02}:{utc.tm_min:02}:{utc.tm_sec:02}"
+        return date + 'Z'
+    except (ValueError, OverflowError) as err:
+        raise ValueError("Unable to serialize datetime object.") from err
+    except AttributeError as err:
+        raise TypeError("ISO-8601 object must be valid datetime object.") from err
+
+def get_length(data):
+    length = None
+    # Check if object implements the __len__ method, covers most input cases such as bytearray.
+    try:
+        length = len(data)
+    except:  # pylint: disable=bare-except
+        pass
+
+    if not length:
+        # Check if the stream is a file-like stream object.
+        # If so, calculate the size using the file descriptor.
+        try:
+            fileno = data.fileno()
+        except (AttributeError, UnsupportedOperation):
+            pass
+        else:
+            try:
+                mode = fstat(fileno).st_mode
+                if stat.S_ISREG(mode) or stat.S_ISLNK(mode):
+                    #st_size only meaningful if regular file or symlink, other types
+                    # e.g. sockets may return misleading sizes like 0
+                    return fstat(fileno).st_size
+            except OSError:
+                # Not a valid fileno, may be possible requests returned
+                # a socket number?
+                pass
+
+        # If the stream is seekable and tell() is implemented, calculate the stream size.
+        try:
+            current_position = data.tell()
+            data.seek(0, SEEK_END)
+            length = data.tell() - current_position
+            data.seek(current_position, SEEK_SET)
+        except (AttributeError, OSError, UnsupportedOperation):
+            pass
+
+    return length
+
+
+def read_length(data):
+    try:
+        if hasattr(data, 'read'):
+            read_data = b''
+            for chunk in iter(lambda: data.read(4096), b""):
+                read_data += chunk
+            return len(read_data), read_data
+        if hasattr(data, '__iter__'):
+            read_data = b''
+            for chunk in data:
+                read_data += chunk
+            return len(read_data), read_data
+    except:  # pylint: disable=bare-except
+        pass
+    raise ValueError("Unable to calculate content length, please specify.")
+
+
+def validate_and_format_range_headers(
+        start_range, end_range, start_range_required=True,
+        end_range_required=True, check_content_md5=False, align_to_page=False):
+    # If end range is provided, start range must be provided
+    if (start_range_required or end_range is not None) and start_range is None:
+        raise ValueError("start_range value cannot be None.")
+    if end_range_required and end_range is None:
+        raise ValueError("end_range value cannot be None.")
+
+    # Page ranges must be 512 aligned
+    if align_to_page:
+        if start_range is not None and start_range % 512 != 0:
+            raise ValueError(f"Invalid page blob start_range: {start_range}. "
+                             "The size must be aligned to a 512-byte boundary.")
+        if end_range is not None and end_range % 512 != 511:
+            raise ValueError(f"Invalid page blob end_range: {end_range}. "
+                             "The size must be aligned to a 512-byte boundary.")
+
+    # Format based on whether end_range is present
+    range_header = None
+    if end_range is not None:
+        range_header = f'bytes={start_range}-{end_range}'
+    elif start_range is not None:
+        range_header = f"bytes={start_range}-"
+
+    # Content MD5 can only be provided for a complete range less than 4MB in size
+    range_validation = None
+    if check_content_md5:
+        if start_range is None or end_range is None:
+            raise ValueError("Both start and end range required for MD5 content validation.")
+        if end_range - start_range > 4 * 1024 * 1024:
+            raise ValueError("Getting content MD5 for a range greater than 4MB is not supported.")
+        range_validation = 'true'
+
+    return range_header, range_validation
+
+
+def add_metadata_headers(metadata=None):
+    # type: (Optional[Dict[str, str]]) -> Dict[str, str]
+    headers = {}
+    if metadata:
+        for key, value in metadata.items():
+            headers[f'x-ms-meta-{key.strip()}'] = value.strip() if value else value
+    return headers
+
+
+def serialize_batch_body(requests, batch_id):
+    """
+    --<delimiter>
+    <subrequest>
+    --<delimiter>
+    <subrequest>    (repeated as needed)
+    --<delimiter>--
+
+    Serializes the requests in this batch to a single HTTP mixed/multipart body.
+
+    :param List[~azure.core.pipeline.transport.HttpRequest] requests:
+        a list of sub-request for the batch request
+    :param str batch_id:
+        to be embedded in batch sub-request delimiter
+    :returns: The body bytes for this batch.
+    :rtype: bytes
+    """
+
+    if requests is None or len(requests) == 0:
+        raise ValueError('Please provide sub-request(s) for this batch request')
+
+    delimiter_bytes = (_get_batch_request_delimiter(batch_id, True, False) + _HTTP_LINE_ENDING).encode('utf-8')
+    newline_bytes = _HTTP_LINE_ENDING.encode('utf-8')
+    batch_body = []
+
+    content_index = 0
+    for request in requests:
+        request.headers.update({
+            "Content-ID": str(content_index),
+            "Content-Length": str(0)
+        })
+        batch_body.append(delimiter_bytes)
+        batch_body.append(_make_body_from_sub_request(request))
+        batch_body.append(newline_bytes)
+        content_index += 1
+
+    batch_body.append(_get_batch_request_delimiter(batch_id, True, True).encode('utf-8'))
+    # final line of body MUST have \r\n at the end, or it will not be properly read by the service
+    batch_body.append(newline_bytes)
+
+    return b"".join(batch_body)
+
+
+def _get_batch_request_delimiter(batch_id, is_prepend_dashes=False, is_append_dashes=False):
+    """
+    Gets the delimiter used for this batch request's mixed/multipart HTTP format.
+
+    :param str batch_id:
+        Randomly generated id
+    :param bool is_prepend_dashes:
+        Whether to include the starting dashes. Used in the body, but non on defining the delimiter.
+    :param bool is_append_dashes:
+        Whether to include the ending dashes. Used in the body on the closing delimiter only.
+    :returns: The delimiter, WITHOUT a trailing newline.
+    :rtype: str
+    """
+
+    prepend_dashes = '--' if is_prepend_dashes else ''
+    append_dashes = '--' if is_append_dashes else ''
+
+    return prepend_dashes + _REQUEST_DELIMITER_PREFIX + batch_id + append_dashes
+
+
+def _make_body_from_sub_request(sub_request):
+    """
+     Content-Type: application/http
+     Content-ID: <sequential int ID>
+     Content-Transfer-Encoding: <value> (if present)
+
+     <verb> <path><query> HTTP/<version>
+     <header key>: <header value> (repeated as necessary)
+     Content-Length: <value>
+     (newline if content length > 0)
+     <body> (if content length > 0)
+
+     Serializes an http request.
+
+     :param ~azure.core.pipeline.transport.HttpRequest sub_request:
+        Request to serialize.
+     :returns: The serialized sub-request in bytes
+     :rtype: bytes
+     """
+
+    # put the sub-request's headers into a list for efficient str concatenation
+    sub_request_body = []
+
+    # get headers for ease of manipulation; remove headers as they are used
+    headers = sub_request.headers
+
+    # append opening headers
+    sub_request_body.append("Content-Type: application/http")
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    sub_request_body.append("Content-ID: ")
+    sub_request_body.append(headers.pop("Content-ID", ""))
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    sub_request_body.append("Content-Transfer-Encoding: binary")
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    # append blank line
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    # append HTTP verb and path and query and HTTP version
+    sub_request_body.append(sub_request.method)
+    sub_request_body.append(' ')
+    sub_request_body.append(sub_request.url)
+    sub_request_body.append(' ')
+    sub_request_body.append(_HTTP1_1_IDENTIFIER)
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    # append remaining headers (this will set the Content-Length, as it was set on `sub-request`)
+    for header_name, header_value in headers.items():
+        if header_value is not None:
+            sub_request_body.append(header_name)
+            sub_request_body.append(": ")
+            sub_request_body.append(header_value)
+            sub_request_body.append(_HTTP_LINE_ENDING)
+
+    # append blank line
+    sub_request_body.append(_HTTP_LINE_ENDING)
+
+    return ''.join(sub_request_body).encode()
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/storage/filedatalake/_shared/request_handlers.py
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz