aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/unstructured_client/_hooks
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/unstructured_client/_hooks')
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/__init__.py5
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/__init__.py5
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/clean_server_url_hook.py34
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/common.py1
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/form_utils.py228
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/logger_hook.py83
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/pdf_utils.py79
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/request_utils.py190
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py445
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/suggest_defining_url.py30
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/registration.py48
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/sdkhooks.py57
-rw-r--r--.venv/lib/python3.12/site-packages/unstructured_client/_hooks/types.py74
13 files changed, 1279 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/__init__.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/__init__.py
new file mode 100644
index 00000000..2ee66cdd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/__init__.py
@@ -0,0 +1,5 @@
+"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
+
+from .sdkhooks import *
+from .types import *
+from .registration import *
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/__init__.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/__init__.py
new file mode 100644
index 00000000..321a6b9c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/__init__.py
@@ -0,0 +1,5 @@
+from .clean_server_url_hook import CleanServerUrlSDKInitHook
+from .logger_hook import LoggerHook
+from .suggest_defining_url import SuggestDefiningUrlIf401AfterErrorHook
+from .split_pdf_hook import SplitPdfHook
+import logging
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/clean_server_url_hook.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/clean_server_url_hook.py
new file mode 100644
index 00000000..5a2d6e3f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/clean_server_url_hook.py
@@ -0,0 +1,34 @@
+from typing import Tuple
+from urllib.parse import ParseResult, urlparse, urlunparse
+
+import requests
+
+from unstructured_client._hooks.types import SDKInitHook
+
+
+class CleanServerUrlSDKInitHook(SDKInitHook):
+ """Hook fixing common mistakes by users in defining `server_url` in the unstructured-client"""
+
+ def clean_server_url(self, base_url) -> str:
+ """Fix url scheme and remove the '/general/v0/general' path."""
+
+ # -- add a url scheme if not present (urllib.parse does not work reliably without it)
+ if "http" not in base_url:
+ base_url = "http://" + base_url
+
+ parsed_url: ParseResult = urlparse(base_url)
+
+ if "api.unstructuredapp.io" in base_url:
+ if parsed_url.scheme != "https":
+ parsed_url = parsed_url._replace(scheme="https")
+
+ # -- path should always be empty
+ return urlunparse(parsed_url._replace(path=""))
+
+ def sdk_init(
+ self, base_url: str, client: requests.Session
+ ) -> Tuple[str, requests.Session]:
+ """Concrete implementation for SDKInitHook."""
+ cleaned_url = self.clean_server_url(base_url)
+
+ return cleaned_url, client
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/common.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/common.py
new file mode 100644
index 00000000..ec759932
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/common.py
@@ -0,0 +1 @@
+UNSTRUCTURED_CLIENT_LOGGER_NAME = "unstructured-client"
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/form_utils.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/form_utils.py
new file mode 100644
index 00000000..bf97faa3
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/form_utils.py
@@ -0,0 +1,228 @@
+from __future__ import annotations
+
+import logging
+from typing import Union
+
+from requests_toolbelt.multipart.decoder import MultipartDecoder
+
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client.models import shared
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+FormData = dict[str, Union[str, shared.Files, list[str]]]
+
+PARTITION_FORM_FILES_KEY = "files"
+PARTITION_FORM_SPLIT_PDF_PAGE_KEY = "split_pdf_page"
+PARTITION_FORM_PAGE_RANGE_KEY = "split_pdf_page_range[]"
+PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY = "split_pdf_allow_failed"
+PARTITION_FORM_STARTING_PAGE_NUMBER_KEY = "starting_page_number"
+PARTITION_FORM_CONCURRENCY_LEVEL_KEY = "split_pdf_concurrency_level"
+
+
+def get_page_range(form_data: FormData, key: str, max_pages: int) -> tuple[int, int]:
+ """Retrieves the split page range from the given form data.
+
+ If the range is invalid or outside the bounds of the page count,
+ returns (1, num_pages), i.e. the full range.
+
+ Args:
+ form_data: The form data containing the page range
+ key: The key to look for in the form data.
+
+ Returns:
+ The range of pages to send in the request in the form (start, end)
+ """
+ try:
+ _page_range = form_data.get(key)
+
+ if _page_range is not None:
+ page_range = (int(_page_range[0]), int(_page_range[1]))
+ else:
+ page_range = (1, max_pages)
+
+ except (ValueError, IndexError) as exc:
+ msg = f"{_page_range} is not a valid page range."
+ logger.error(msg)
+ raise ValueError(msg) from exc
+
+ start, end = page_range
+
+ if not 0 < start <= max_pages or not 0 < end <= max_pages or not start <= end:
+ msg = f"Page range {page_range} is out of bounds. Start and end values should be between 1 and {max_pages}."
+ logger.error(msg)
+ raise ValueError(msg)
+
+ return page_range
+
+
+def get_starting_page_number(form_data: FormData, key: str, fallback_value: int) -> int:
+ """Retrieves the starting page number from the given form data.
+
+ In case given starting page number is not a valid integer or less than 1, it will
+ use the default value.
+
+ Args:
+ form_data: The form data containing the starting page number.
+ key: The key to look for in the form data.
+ fallback_value: The default value to use in case of an error.
+
+ Returns:
+ The starting page number.
+ """
+ starting_page_number = fallback_value
+ try:
+ _starting_page_number = form_data.get(key) or fallback_value
+ starting_page_number = int(_starting_page_number) # type: ignore
+ except ValueError:
+ logger.warning(
+ "'%s' is not a valid integer. Using default value '%d'.",
+ key,
+ fallback_value,
+ )
+
+ if starting_page_number < 1:
+ logger.warning(
+ "'%s' is less than 1. Using default value '%d'.",
+ key,
+ fallback_value,
+ )
+ starting_page_number = fallback_value
+
+ return starting_page_number
+
+def get_split_pdf_allow_failed_param(
+ form_data: FormData, key: str, fallback_value: bool,
+) -> bool:
+ """Retrieves the value for allow failed that should be used for splitting pdf.
+
+ In case given the number is not a "false" or "true" literal, it will use the
+ default value.
+
+ Args:
+ form_data: The form data containing the desired concurrency level.
+ key: The key to look for in the form data.
+ fallback_value: The default value to use in case of an error.
+
+ Returns:
+ The concurrency level after validation.
+ """
+ allow_failed = form_data.get(key)
+
+ if allow_failed is None:
+ return fallback_value
+
+ if allow_failed.lower() not in ["true", "false"]:
+ logger.warning(
+ "'%s' is not a valid boolean. Using default value '%s'.",
+ key,
+ fallback_value,
+ )
+ return fallback_value
+
+ return allow_failed.lower() == "true"
+
+def get_split_pdf_concurrency_level_param(
+ form_data: FormData, key: str, fallback_value: int, max_allowed: int
+) -> int:
+ """Retrieves the value for concurreny level that should be used for splitting pdf.
+
+ In case given the number is not a valid integer or less than 1, it will use the
+ default value.
+
+ Args:
+ form_data: The form data containing the desired concurrency level.
+ key: The key to look for in the form data.
+ fallback_value: The default value to use in case of an error.
+ max_allowed: The maximum allowed value for the concurrency level.
+
+ Returns:
+ The concurrency level after validation.
+ """
+ concurrency_level_str = form_data.get(key)
+
+ if concurrency_level_str is None:
+ return fallback_value
+
+ try:
+ concurrency_level = int(concurrency_level_str)
+ except ValueError:
+ logger.warning(
+ "'%s' is not a valid integer. Using default value '%s'.",
+ key,
+ fallback_value,
+ )
+ return fallback_value
+
+ if concurrency_level < 1:
+ logger.warning(
+ "'%s' is less than 1. Using the default value = %s.",
+ key,
+ fallback_value,
+ )
+ return fallback_value
+
+ if concurrency_level > max_allowed:
+ logger.warning(
+ "'%s' is greater than %s. Using the maximum allowed value = %s.",
+ key,
+ max_allowed,
+ max_allowed,
+ )
+ return max_allowed
+
+ return concurrency_level
+
+
+def decode_content_disposition(content_disposition: bytes) -> dict[str, str]:
+ """Decode the `Content-Disposition` header and return the parameters as a dictionary.
+
+ Args:
+ content_disposition: The `Content-Disposition` header as bytes.
+
+ Returns:
+ A dictionary containing the parameters extracted from the
+ `Content-Disposition` header.
+ """
+ data = content_disposition.decode().split("; ")[1:]
+ parameters = [d.split("=") for d in data]
+ parameters_dict = {p[0]: p[1].strip('"') for p in parameters}
+ return parameters_dict
+
+
+def parse_form_data(decoded_data: MultipartDecoder) -> FormData:
+ """Parses the form data from the decoded multipart data.
+
+ Args:
+ decoded_data: The decoded multipart data.
+
+ Returns:
+ The parsed form data.
+ """
+ form_data: FormData = {}
+
+ for part in decoded_data.parts:
+ content_disposition = part.headers.get(b"Content-Disposition")
+ if content_disposition is None:
+ raise RuntimeError("Content-Disposition header not found. Can't split pdf file.")
+ part_params = decode_content_disposition(content_disposition)
+ name = part_params.get("name")
+
+ if name is None:
+ continue
+
+ if name == PARTITION_FORM_FILES_KEY:
+ filename = part_params.get("filename")
+ if filename is None or not filename.strip():
+ raise ValueError("Filename can't be an empty string.")
+ form_data[PARTITION_FORM_FILES_KEY] = shared.Files(part.content, filename)
+ else:
+ content = part.content.decode()
+ if name in form_data:
+ if isinstance(form_data[name], list):
+ form_data[name].append(content)
+ else:
+ form_data[name] = [form_data[name], content]
+ else:
+ form_data[name] = content
+
+ return form_data
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/logger_hook.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/logger_hook.py
new file mode 100644
index 00000000..354f9ccd
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/logger_hook.py
@@ -0,0 +1,83 @@
+import logging
+from typing import Optional, Tuple, Union, DefaultDict
+
+import requests
+
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client._hooks.types import (
+ AfterSuccessContext,
+ AfterErrorContext,
+ AfterErrorHook,
+ SDKInitHook,
+ AfterSuccessHook,
+)
+from collections import defaultdict
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+
+
+class LoggerHook(AfterErrorHook, AfterSuccessHook, SDKInitHook):
+ """Hook providing custom logging"""
+
+ def __init__(self) -> None:
+ self.retries_counter: DefaultDict[str, int] = defaultdict(int)
+
+ def log_retries(self, response: Optional[requests.Response], error: Optional[Exception], operation_id: str,):
+ """Log retries to give users visibility into requests."""
+
+ if response is not None and response.status_code // 100 == 5:
+ logger.info(
+ "Failed to process a request due to API server error with status code %d. "
+ "Attempting retry number %d after sleep.",
+ response.status_code,
+ self.retries_counter[operation_id],
+ )
+ if response.text:
+ logger.info("Server message - %s", response.text)
+
+ elif error is not None and isinstance(error, requests.exceptions.ConnectionError):
+ logger.info(
+ "Failed to process a request due to connection error - %s. "
+ "Attempting retry number %d after sleep.",
+ error,
+ self.retries_counter[operation_id],
+ )
+
+
+ def sdk_init(
+ self, base_url: str, client: requests.Session
+ ) -> Tuple[str, requests.Session]:
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+ return base_url, client
+
+ def after_success(
+ self, hook_ctx: AfterSuccessContext, response: requests.Response
+ ) -> Union[requests.Response, Exception]:
+ self.retries_counter.pop(hook_ctx.operation_id, None)
+ # NOTE: In case of split page partition this means - at least one of the splits was partitioned successfully
+ logger.info("Successfully partitioned the document.")
+ return response
+
+ def after_error(
+ self,
+ hook_ctx: AfterErrorContext,
+ response: Optional[requests.Response],
+ error: Optional[Exception],
+ ) -> Union[Tuple[Optional[requests.Response], Optional[Exception]], Exception]:
+ """Concrete implementation for AfterErrorHook."""
+ self.retries_counter[hook_ctx.operation_id] += 1
+ self.log_retries(response, error, hook_ctx.operation_id)
+
+ if response and response.status_code == 200:
+ # NOTE: Even though this is an after_error method, due to split_pdf_hook logic we may get
+ # a success here when one of the split requests was partitioned successfully
+ logger.info("Successfully partitioned the document.")
+
+ else:
+ logger.error("Failed to partition the document.")
+ if response:
+ logger.error("Server responded with %d - %s", response.status_code, response.text)
+ if error is not None:
+ logger.error("Following error occurred - %s", error)
+
+ return response, error
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/pdf_utils.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/pdf_utils.py
new file mode 100644
index 00000000..27dc5e03
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/pdf_utils.py
@@ -0,0 +1,79 @@
+import io
+import logging
+from typing import Generator, Tuple, Optional
+
+from pypdf import PdfReader, PdfWriter
+from pypdf.errors import PdfReadError
+
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client.models import shared
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+
+# Loading pdfs with strict=False can dump a lot of warnings
+# We don't need to display these
+pdf_logger = logging.getLogger("pypdf")
+pdf_logger.setLevel(logging.ERROR)
+
+
+def get_pdf_pages(
+ pdf: PdfReader, split_size: int = 1, page_start: int = 1, page_end: Optional[int] = None
+) -> Generator[Tuple[io.BytesIO, int, int], None, None]:
+ """Reads given bytes of a pdf file and split it into n file-like objects, each
+ with `split_size` pages.
+
+ Args:
+ file_content: Content of the PDF file.
+ split_size: Split size, e.g. if the given file has 10 pages
+ and this value is set to 2 it will yield 5 documents, each containing 2 pages
+ of the original document. By default it will split each page to a separate file.
+ page_start: Begin splitting at this page number
+ page_end: If provided, split up to and including this page number
+
+ Yields:
+ The file contents with their page number and overall pages number of the original document.
+ """
+
+ offset = page_start - 1
+ offset_end = page_end or len(pdf.pages)
+
+ while offset < offset_end:
+ new_pdf = PdfWriter()
+ pdf_buffer = io.BytesIO()
+
+ end = min(offset + split_size, offset_end)
+
+ for page in list(pdf.pages[offset:end]):
+ new_pdf.add_page(page)
+
+ new_pdf.write(pdf_buffer)
+ pdf_buffer.seek(0)
+
+ yield pdf_buffer, offset, offset_end
+ offset += split_size
+
+
+def is_pdf(file: shared.Files) -> bool:
+ """Checks if the given file is a PDF.
+
+ First it checks the file extension and if it is equal to `.pdf`, then
+ it tries to read that file. If there is no error then we assume it is a proper PDF.
+
+ Args:
+ file: The file to be checked.
+
+ Returns:
+ True if the file is a PDF, False otherwise.
+ """
+ if not file.file_name.endswith(".pdf"):
+ logger.info("Given file doesn't have '.pdf' extension, so splitting is not enabled.")
+ return False
+
+ try:
+ PdfReader(io.BytesIO(file.content), strict=True)
+ except (PdfReadError, UnicodeDecodeError) as exc:
+ logger.error(exc)
+ logger.warning("The file does not appear to be a valid PDF.")
+ return False
+
+ return True
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/request_utils.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/request_utils.py
new file mode 100644
index 00000000..1512e80b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/request_utils.py
@@ -0,0 +1,190 @@
+from __future__ import annotations
+
+import asyncio
+import copy
+import io
+import json
+import logging
+from typing import Optional, Tuple, Any
+
+import httpx
+import requests
+from requests.structures import CaseInsensitiveDict
+from requests_toolbelt.multipart.encoder import MultipartEncoder
+
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client._hooks.custom.form_utils import (
+ PARTITION_FORM_FILES_KEY,
+ PARTITION_FORM_SPLIT_PDF_PAGE_KEY,
+ PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY,
+ PARTITION_FORM_PAGE_RANGE_KEY,
+ PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
+ FormData,
+)
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+
+
+def create_request_body(
+ form_data: FormData, page_content: io.BytesIO, filename: str, page_number: int
+) -> MultipartEncoder:
+ payload = prepare_request_payload(form_data)
+
+ payload_fields: list[tuple[str, Any]] = []
+ for key, value in payload.items():
+ if isinstance(value, list):
+ payload_fields.extend([(key, list_value) for list_value in value])
+ else:
+ payload_fields.append((key, value))
+
+ payload_fields.append((PARTITION_FORM_FILES_KEY, (
+ filename,
+ page_content,
+ "application/pdf",
+ )))
+
+ payload_fields.append((PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, str(page_number)))
+
+ body = MultipartEncoder(
+ fields=payload_fields
+ )
+ return body
+
+
+def create_httpx_request(
+ original_request: requests.Request, body: MultipartEncoder
+) -> httpx.Request:
+ headers = prepare_request_headers(original_request.headers)
+ return httpx.Request(
+ method="POST",
+ url=original_request.url or "",
+ content=body.to_string(),
+ headers={**headers, "Content-Type": body.content_type},
+ )
+
+
+def create_request(
+ request: requests.PreparedRequest,
+ body: MultipartEncoder,
+) -> requests.Request:
+ headers = prepare_request_headers(request.headers)
+ return requests.Request(
+ method="POST",
+ url=request.url or "",
+ data=body,
+ headers={**headers, "Content-Type": body.content_type},
+ )
+
+
+async def call_api_async(
+ client: httpx.AsyncClient,
+ page: Tuple[io.BytesIO, int],
+ original_request: requests.Request,
+ form_data: FormData,
+ filename: str,
+ limiter: asyncio.Semaphore,
+) -> tuple[int, dict]:
+ page_content, page_number = page
+ body = create_request_body(form_data, page_content, filename, page_number)
+ new_request = create_httpx_request(original_request, body)
+ async with limiter:
+ try:
+ response = await client.send(new_request)
+ return response.status_code, response.json()
+ except Exception:
+ logger.error("Failed to send request for page %d", page_number)
+ return 500, {}
+
+
+def call_api(
+ client: Optional[requests.Session],
+ page: Tuple[io.BytesIO, int],
+ request: requests.PreparedRequest,
+ form_data: FormData,
+ filename: str,
+) -> requests.Response:
+ if client is None:
+ raise RuntimeError("HTTP client not accessible!")
+ page_content, page_number = page
+
+ body = create_request_body(form_data, page_content, filename, page_number)
+ new_request = create_request(request, body)
+ prepared_request = client.prepare_request(new_request)
+
+ try:
+ return client.send(prepared_request)
+ except Exception:
+ logger.error("Failed to send request for page %d", page_number)
+ return requests.Response()
+
+
+def prepare_request_headers(
+ headers: CaseInsensitiveDict[str],
+) -> CaseInsensitiveDict[str]:
+ """Prepare the request headers by removing the 'Content-Type' and 'Content-Length' headers.
+
+ Args:
+ headers: The original request headers.
+
+ Returns:
+ The modified request headers.
+ """
+ headers = copy.deepcopy(headers)
+ headers.pop("Content-Type", None)
+ headers.pop("Content-Length", None)
+ return headers
+
+
+def prepare_request_payload(form_data: FormData) -> FormData:
+ """Prepares the request payload by removing unnecessary keys and updating the file.
+
+ Args:
+ form_data: The original form data.
+
+ Returns:
+ The updated request payload.
+ """
+ payload = copy.deepcopy(form_data)
+ payload.pop(PARTITION_FORM_SPLIT_PDF_PAGE_KEY, None)
+ payload.pop(PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY, None)
+ payload.pop(PARTITION_FORM_FILES_KEY, None)
+ payload.pop(PARTITION_FORM_PAGE_RANGE_KEY, None)
+ payload.pop(PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, None)
+ updated_parameters = {
+ PARTITION_FORM_SPLIT_PDF_PAGE_KEY: "false",
+ }
+ payload.update(updated_parameters)
+ return payload
+
+
+def create_response(response: requests.Response, elements: list) -> requests.Response:
+ """
+ Creates a modified response object with updated content.
+
+ Args:
+ response: The original response object.
+ elements: The list of elements to be serialized and added to
+ the response.
+
+ Returns:
+ The modified response object with updated content.
+ """
+ response_copy = copy.deepcopy(response)
+ content = json.dumps(elements).encode()
+ content_length = str(len(content))
+ response_copy.headers.update({"Content-Length": content_length})
+ setattr(response_copy, "_content", content)
+ return response_copy
+
+
+def log_after_split_response(status_code: int, split_number: int):
+ if status_code == 200:
+ logger.info(
+ "Successfully partitioned set #%d, elements added to the final result.",
+ split_number,
+ )
+ else:
+ logger.warning(
+ "Failed to partition set #%d, its elements will be omitted in the final result.",
+ split_number,
+ )
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py
new file mode 100644
index 00000000..54a3b89e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py
@@ -0,0 +1,445 @@
+from __future__ import annotations
+
+import asyncio
+import io
+import json
+import logging
+import math
+from collections.abc import Awaitable
+from typing import Any, Coroutine, Optional, Tuple, Union
+
+import httpx
+import nest_asyncio
+import requests
+from pypdf import PdfReader
+from requests_toolbelt.multipart.decoder import MultipartDecoder
+
+from unstructured_client._hooks.custom import form_utils, pdf_utils, request_utils
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client._hooks.custom.form_utils import (
+ PARTITION_FORM_CONCURRENCY_LEVEL_KEY,
+ PARTITION_FORM_FILES_KEY,
+ PARTITION_FORM_PAGE_RANGE_KEY,
+ PARTITION_FORM_SPLIT_PDF_PAGE_KEY,
+ PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY,
+ PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
+)
+from unstructured_client._hooks.types import (
+ AfterErrorContext,
+ AfterErrorHook,
+ AfterSuccessContext,
+ AfterSuccessHook,
+ BeforeRequestContext,
+ BeforeRequestHook,
+ SDKInitHook,
+)
+from unstructured_client.models import shared
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+
+DEFAULT_STARTING_PAGE_NUMBER = 1
+DEFAULT_ALLOW_FAILED = False
+DEFAULT_CONCURRENCY_LEVEL = 8
+MAX_CONCURRENCY_LEVEL = 15
+MIN_PAGES_PER_SPLIT = 2
+MAX_PAGES_PER_SPLIT = 20
+
+
+async def _order_keeper(index: int, coro: Awaitable) -> Tuple[int, requests.Response]:
+ response = await coro
+ return index, response
+
+
+async def run_tasks(coroutines: list[Awaitable], allow_failed: bool = False) -> list[tuple[int, requests.Response]]:
+ if allow_failed:
+ responses = await asyncio.gather(*coroutines, return_exceptions=False)
+ return list(enumerate(responses, 1))
+ # TODO: replace with asyncio.TaskGroup for python >3.11 # pylint: disable=fixme
+ tasks = [asyncio.create_task(_order_keeper(index, coro)) for index, coro in enumerate(coroutines, 1)]
+ results = []
+ remaining_tasks = dict(enumerate(tasks, 1))
+ for future in asyncio.as_completed(tasks):
+ index, response = await future
+ if response.status_code != 200:
+ # cancel all remaining tasks
+ for remaining_task in remaining_tasks.values():
+ remaining_task.cancel()
+ results.append((index, response))
+ break
+ results.append((index, response))
+ # remove task from remaining_tasks that should be cancelled in case of failure
+ del remaining_tasks[index]
+ # return results in the original order
+ return sorted(results, key=lambda x: x[0])
+
+
+def context_is_uvloop():
+ """Return true if uvloop is installed and we're currently in a uvloop context. Our asyncio splitting code currently doesn't work under uvloop."""
+ try:
+ import uvloop # pylint: disable=import-outside-toplevel
+ loop = asyncio.get_event_loop()
+ return isinstance(loop, uvloop.Loop)
+ except (ImportError, RuntimeError):
+ return False
+
+def get_optimal_split_size(num_pages: int, concurrency_level: int) -> int:
+ """Distributes pages to workers evenly based on the number of pages and desired concurrency level."""
+ if num_pages < MAX_PAGES_PER_SPLIT * concurrency_level:
+ split_size = math.ceil(num_pages / concurrency_level)
+ else:
+ split_size = MAX_PAGES_PER_SPLIT
+
+ return max(split_size, MIN_PAGES_PER_SPLIT)
+
+
+class SplitPdfHook(SDKInitHook, BeforeRequestHook, AfterSuccessHook, AfterErrorHook):
+ """
+ A hook class that splits a PDF file into multiple pages and sends each page as
+ a separate request. This hook is designed to be used with an Speakeasy SDK.
+
+ Usage:
+ 1. Create an instance of the `SplitPdfHook` class.
+ 2. Register SDK Init, Before Request, After Success and After Error hooks.
+ """
+
+ def __init__(self) -> None:
+ self.client: Optional[requests.Session] = None
+ self.coroutines_to_execute: dict[
+ str, list[Coroutine[Any, Any, requests.Response]]
+ ] = {}
+ self.api_successful_responses: dict[str, list[requests.Response]] = {}
+ self.api_failed_responses: dict[str, list[requests.Response]] = {}
+ self.allow_failed: bool = DEFAULT_ALLOW_FAILED
+
+ def sdk_init(
+ self, base_url: str, client: requests.Session
+ ) -> Tuple[str, requests.Session]:
+ """Initializes Split PDF Hook.
+
+ Args:
+ base_url (str): URL of the API.
+ client (requests.Session): HTTP Client.
+
+ Returns:
+ Tuple[str, requests.Session]: The initialized SDK options.
+ """
+ self.client = client
+ return base_url, client
+
+
+ # pylint: disable=too-many-return-statements
+ def before_request(
+ self, hook_ctx: BeforeRequestContext, request: requests.PreparedRequest
+ ) -> Union[requests.PreparedRequest, Exception]:
+ """If `splitPdfPage` is set to `true` in the request, the PDF file is split into
+ separate pages. Each page is sent as a separate request in parallel. The last
+ page request is returned by this method. It will return the original request
+ when: `splitPdfPage` is set to `false`, the file is not a PDF, or the HTTP
+ has not been initialized.
+
+ Args:
+ hook_ctx (BeforeRequestContext): The hook context containing information about
+ the operation.
+ request (requests.PreparedRequest): The request object.
+
+ Returns:
+ Union[requests.PreparedRequest, Exception]: If `splitPdfPage` is set to `true`,
+ the last page request; otherwise, the original request.
+ """
+ if self.client is None:
+ logger.warning("HTTP client not accessible! Continuing without splitting.")
+ return request
+
+ if context_is_uvloop():
+ logger.warning("Splitting is currently incompatible with uvloop. Continuing without splitting.")
+ return request
+
+ # This allows us to use an event loop in an env with an existing loop
+ # Temporary fix until we can improve the async splitting behavior
+ nest_asyncio.apply()
+ operation_id = hook_ctx.operation_id
+ content_type = request.headers.get("Content-Type")
+ body = request.body
+ if not isinstance(body, bytes) or content_type is None:
+ return request
+
+ decoded_body = MultipartDecoder(body, content_type)
+ form_data = form_utils.parse_form_data(decoded_body)
+ split_pdf_page = form_data.get(PARTITION_FORM_SPLIT_PDF_PAGE_KEY)
+ if split_pdf_page is None or split_pdf_page == "false":
+ logger.info("Partitioning without split.")
+ return request
+
+ logger.info("Preparing to split document for partition.")
+ file = form_data.get(PARTITION_FORM_FILES_KEY)
+ if (
+ file is None
+ or not isinstance(file, shared.Files)
+ or not pdf_utils.is_pdf(file)
+ ):
+ logger.info("Partitioning without split.")
+ return request
+
+ starting_page_number = form_utils.get_starting_page_number(
+ form_data,
+ key=PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
+ fallback_value=DEFAULT_STARTING_PAGE_NUMBER,
+ )
+ if starting_page_number > 1:
+ logger.info("Starting page number set to %d", starting_page_number)
+ logger.info("Starting page number set to %d", starting_page_number)
+
+ self.allow_failed = form_utils.get_split_pdf_allow_failed_param(
+ form_data,
+ key=PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY,
+ fallback_value=DEFAULT_ALLOW_FAILED,
+ )
+ logger.info("Allow failed set to %d", self.allow_failed)
+
+ concurrency_level = form_utils.get_split_pdf_concurrency_level_param(
+ form_data,
+ key=PARTITION_FORM_CONCURRENCY_LEVEL_KEY,
+ fallback_value=DEFAULT_CONCURRENCY_LEVEL,
+ max_allowed=MAX_CONCURRENCY_LEVEL,
+ )
+ logger.info("Concurrency level set to %d", concurrency_level)
+ limiter = asyncio.Semaphore(concurrency_level)
+
+ pdf = PdfReader(io.BytesIO(file.content))
+
+ page_range_start, page_range_end = form_utils.get_page_range(
+ form_data,
+ key=PARTITION_FORM_PAGE_RANGE_KEY,
+ max_pages=len(pdf.pages),
+ )
+
+ page_count = page_range_end - page_range_start + 1
+ logger.info(
+ "Splitting pages %d to %d (%d total)",
+ page_range_start,
+ page_range_end,
+ page_count,
+ )
+
+ split_size = get_optimal_split_size(
+ num_pages=page_count, concurrency_level=concurrency_level
+ )
+ logger.info("Determined optimal split size of %d pages.", split_size)
+
+ # If the doc is small enough, and we aren't slicing it with a page range:
+ # do not split, just continue with the original request
+ if split_size >= page_count and page_count == len(pdf.pages):
+ logger.info(
+ "Document has too few pages (%d) to be split efficiently. Partitioning without split.",
+ page_count,
+ )
+ return request
+
+ pages = pdf_utils.get_pdf_pages(pdf, split_size=split_size, page_start=page_range_start, page_end=page_range_end)
+ logger.info(
+ "Partitioning %d files with %d page(s) each.",
+ math.floor(page_count / split_size),
+ split_size,
+ )
+
+ # Log the remainder pages if there are any
+ if page_count % split_size > 0:
+ logger.info(
+ "Partitioning 1 file with %d page(s).",
+ page_count % split_size,
+ )
+
+ async def call_api_partial(page):
+ async with httpx.AsyncClient() as client:
+ status_code, json_response = await request_utils.call_api_async(
+ client=client,
+ original_request=request,
+ form_data=form_data,
+ filename=file.file_name,
+ page=page,
+ limiter=limiter,
+ )
+
+ # convert httpx response to requests.Response to preserve
+ # compatibility with the synchronous SDK generated by speakeasy
+ response = requests.Response()
+ response.status_code = status_code
+ response._content = json.dumps( # pylint: disable=W0212
+ json_response
+ ).encode()
+ response.headers["Content-Type"] = "application/json"
+ return response
+
+ self.coroutines_to_execute[operation_id] = []
+ last_page_content = io.BytesIO()
+ last_page_number = 0
+ set_index = 1
+ for page_content, page_index, all_pages_number in pages:
+ page_number = page_index + starting_page_number
+ logger.info(
+ "Partitioning set #%d (pages %d-%d).",
+ set_index,
+ page_number,
+ min(page_number + split_size - 1, all_pages_number),
+ )
+ # Check if this set of pages is the last one
+ if page_index + split_size >= all_pages_number:
+ last_page_content = page_content
+ last_page_number = page_number
+ break
+ coroutine = call_api_partial((page_content, page_number))
+ self.coroutines_to_execute[operation_id].append(coroutine)
+ set_index += 1
+ # `before_request` method needs to return a request so we skip sending the last page in parallel
+ # and return that last page at the end of this method
+
+ body = request_utils.create_request_body(
+ form_data, last_page_content, file.file_name, last_page_number
+ )
+ last_page_request = request_utils.create_request(request, body)
+ last_page_prepared_request = self.client.prepare_request(last_page_request)
+ return last_page_prepared_request
+
+ def _await_elements(
+ self, operation_id: str, response: requests.Response
+ ) -> Optional[list]:
+ """
+ Waits for the partition requests to complete and returns the flattened
+ elements.
+
+ Args:
+ operation_id (str): The ID of the operation.
+ response (requests.Response): The response object.
+
+ Returns:
+ Optional[list]: The flattened elements if the partition requests are
+ completed, otherwise None.
+ """
+ tasks = self.coroutines_to_execute.get(operation_id)
+ if tasks is None:
+ return None
+
+ ioloop = asyncio.get_event_loop()
+ task_responses: list[tuple[int, requests.Response]] = ioloop.run_until_complete(
+ run_tasks(tasks, allow_failed=self.allow_failed)
+ )
+
+ if task_responses is None:
+ return None
+
+ successful_responses = []
+ failed_responses = []
+ elements = []
+ for response_number, res in task_responses:
+ request_utils.log_after_split_response(res.status_code, response_number)
+ if res.status_code == 200:
+ successful_responses.append(res)
+ elements.append(res.json())
+ else:
+ failed_responses.append(res)
+
+ if self.allow_failed or not failed_responses:
+ last_response_number = len(task_responses) + 1
+ request_utils.log_after_split_response(
+ response.status_code, last_response_number
+ )
+ if response.status_code == 200:
+ elements.append(response.json())
+ successful_responses.append(response)
+ else:
+ failed_responses.append(response)
+
+ self.api_successful_responses[operation_id] = successful_responses
+ self.api_failed_responses[operation_id] = failed_responses
+ flattened_elements = [element for sublist in elements for element in sublist]
+ return flattened_elements
+
+ def after_success(
+ self, hook_ctx: AfterSuccessContext, response: requests.Response
+ ) -> Union[requests.Response, Exception]:
+ """Executes after a successful API request. Awaits all parallel requests and
+ combines the responses into a single response object.
+
+ Args:
+ hook_ctx (AfterSuccessContext): The context object containing information
+ about the hook execution.
+ response (requests.Response): The response object returned from the API
+ request.
+
+ Returns:
+ Union[requests.Response, Exception]: If requests were run in parallel, a
+ combined response object; otherwise, the original response. Can return
+ exception if it ocurred during the execution.
+ """
+ operation_id = hook_ctx.operation_id
+ # Because in `before_request` method we skipped sending last page in parallel
+ # we need to pass response, which contains last page, to `_await_elements` method
+ elements = self._await_elements(operation_id, response)
+
+ # if fails are disallowed, return the first failed response
+ if not self.allow_failed and self.api_failed_responses.get(operation_id):
+ return self.api_failed_responses[operation_id][0]
+
+ if elements is None:
+ return response
+
+ updated_response = request_utils.create_response(response, elements)
+ self._clear_operation(operation_id)
+ return updated_response
+
+ def after_error(
+ self,
+ hook_ctx: AfterErrorContext,
+ response: Optional[requests.Response],
+ error: Optional[Exception],
+ ) -> Union[Tuple[Optional[requests.Response], Optional[Exception]], Exception]:
+ """Executes after an unsuccessful API request. Awaits all parallel requests,
+ if at least one request was successful, combines the responses into a single
+ response object and doesn't throw an error. It will return an error only if
+ all requests failed, or there was no PDF split.
+
+ Args:
+ hook_ctx (AfterErrorContext): The AfterErrorContext object containing
+ information about the hook context.
+ response (Optional[requests.Response]): The Response object representing
+ the response received before the exception occurred.
+ error (Optional[Exception]): The exception object that was thrown.
+
+ Returns:
+ Union[Tuple[Optional[requests.Response], Optional[Exception]], Exception]:
+ If requests were run in parallel, and at least one was successful, a combined
+ response object; otherwise, the original response and exception.
+ """
+
+ # if fails are disallowed - return response and error objects immediately
+ if not self.allow_failed:
+ return (response, error)
+
+ operation_id = hook_ctx.operation_id
+ # We know that this request failed so we pass a failed or empty response to `_await_elements` method
+ # where it checks if at least on of the other requests succeeded
+ elements = self._await_elements(operation_id, response or requests.Response())
+ successful_responses = self.api_successful_responses.get(operation_id)
+
+ if elements is None or successful_responses is None:
+ return (response, error)
+
+ if len(successful_responses) == 0:
+ self._clear_operation(operation_id)
+ return (response, error)
+
+ updated_response = request_utils.create_response(
+ successful_responses[0], elements
+ )
+ self._clear_operation(operation_id)
+ return (updated_response, None)
+
+ def _clear_operation(self, operation_id: str) -> None:
+ """
+ Clears the operation data associated with the given operation ID.
+
+ Args:
+ operation_id (str): The ID of the operation to clear.
+ """
+ self.coroutines_to_execute.pop(operation_id, None)
+ self.api_successful_responses.pop(operation_id, None)
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/suggest_defining_url.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/suggest_defining_url.py
new file mode 100644
index 00000000..d9268159
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/custom/suggest_defining_url.py
@@ -0,0 +1,30 @@
+from typing import Optional, Tuple, Union
+
+import logging
+import requests
+
+from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME
+from unstructured_client._hooks.types import AfterErrorContext, AfterErrorHook
+
+logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME)
+
+
+class SuggestDefiningUrlIf401AfterErrorHook(AfterErrorHook):
+ """Hook advising users to check that 'server_url' is defined if a 401 error is encountered."""
+
+ def warn_if_401(self, response: Optional[requests.Response]):
+ """If the paid API returns 401, warn the user in case they meant to use the free api."""
+ if response is not None and response.status_code == 401:
+ logger.warning(
+ "This API key is invalid against the paid API. If intending to use the free API, please initialize UnstructuredClient with `server='free-api'`."
+ )
+
+ def after_error(
+ self,
+ hook_ctx: AfterErrorContext,
+ response: Optional[requests.Response],
+ error: Optional[Exception],
+ ) -> Union[Tuple[Optional[requests.Response], Optional[Exception]], Exception]:
+ """Concrete implementation for AfterErrorHook."""
+ self.warn_if_401(response)
+ return response, error
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/registration.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/registration.py
new file mode 100644
index 00000000..f2a7fc60
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/registration.py
@@ -0,0 +1,48 @@
+"""Registration of custom, human-written hooks."""
+
+from .custom import (
+ CleanServerUrlSDKInitHook,
+ LoggerHook,
+ SuggestDefiningUrlIf401AfterErrorHook,
+ SplitPdfHook,
+)
+from .types import Hooks
+
+
+# This file is only ever generated once on the first generation and then is free to be modified.
+# Any hooks you wish to add should be registered in the init_hooks function. Feel free to define
+# them in this file or in separate files in the hooks folder.
+
+
+def init_hooks(hooks: Hooks):
+ # pylint: disable=unused-argument
+ """Add hooks by calling `hooks.register_<type_or>_hook` with an instance of that hook.
+
+ Hooks are registered per SDK instance, and are valid for the lifetime of the SDK instance
+ """
+
+ # Initialize custom hooks
+ clean_server_url_hook = CleanServerUrlSDKInitHook()
+ suggest_defining_url_hook = SuggestDefiningUrlIf401AfterErrorHook()
+ logger_hook = LoggerHook()
+ split_pdf_hook = SplitPdfHook()
+
+ # NOTE: logger_hook should stay registered last as logs the status of
+ # request and whether it will be retried which can be changed by e.g. split_pdf_hook
+
+ # Register SDK Init hooks
+ hooks.register_sdk_init_hook(clean_server_url_hook)
+ hooks.register_sdk_init_hook(logger_hook)
+ hooks.register_sdk_init_hook(split_pdf_hook)
+
+ # Register Before Request hooks
+ hooks.register_before_request_hook(split_pdf_hook)
+
+ # Register After Error hooks
+ hooks.register_after_success_hook(split_pdf_hook)
+ hooks.register_after_success_hook(logger_hook)
+
+ # Register After Error hooks
+ hooks.register_after_error_hook(suggest_defining_url_hook)
+ hooks.register_after_error_hook(split_pdf_hook)
+ hooks.register_after_error_hook(logger_hook)
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/sdkhooks.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/sdkhooks.py
new file mode 100644
index 00000000..1fdae95a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/sdkhooks.py
@@ -0,0 +1,57 @@
+"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
+
+import requests
+from .types import SDKInitHook, BeforeRequestContext, BeforeRequestHook, AfterSuccessContext, AfterSuccessHook, AfterErrorContext, AfterErrorHook, Hooks
+from .registration import init_hooks
+from typing import List, Optional, Tuple
+
+
+class SDKHooks(Hooks):
+ def __init__(self):
+ self.sdk_init_hooks: List[SDKInitHook] = []
+ self.before_request_hooks: List[BeforeRequestHook] = []
+ self.after_success_hooks: List[AfterSuccessHook] = []
+ self.after_error_hooks: List[AfterErrorHook] = []
+ init_hooks(self)
+
+ def register_sdk_init_hook(self, hook: SDKInitHook) -> None:
+ self.sdk_init_hooks.append(hook)
+
+ def register_before_request_hook(self, hook: BeforeRequestHook) -> None:
+ self.before_request_hooks.append(hook)
+
+ def register_after_success_hook(self, hook: AfterSuccessHook) -> None:
+ self.after_success_hooks.append(hook)
+
+ def register_after_error_hook(self, hook: AfterErrorHook) -> None:
+ self.after_error_hooks.append(hook)
+
+ def sdk_init(self, base_url: str, client: requests.Session) -> Tuple[str, requests.Session]:
+ for hook in self.sdk_init_hooks:
+ base_url, client = hook.sdk_init(base_url, client)
+ return base_url, client
+
+ def before_request(self, hook_ctx: BeforeRequestContext, request: requests.PreparedRequest) -> requests.PreparedRequest:
+ for hook in self.before_request_hooks:
+ out = hook.before_request(hook_ctx, request)
+ if isinstance(out, Exception):
+ raise out
+ request = out
+
+ return request
+
+ def after_success(self, hook_ctx: AfterSuccessContext, response: requests.Response) -> requests.Response:
+ for hook in self.after_success_hooks:
+ out = hook.after_success(hook_ctx, response)
+ if isinstance(out, Exception):
+ raise out
+ response = out
+ return response
+
+ def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests.Response], error: Optional[Exception]) -> Tuple[Optional[requests.Response], Optional[Exception]]:
+ for hook in self.after_error_hooks:
+ result = hook.after_error(hook_ctx, response, error)
+ if isinstance(result, Exception):
+ raise result
+ response, error = result
+ return response, error
diff --git a/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/types.py b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/types.py
new file mode 100644
index 00000000..72ab059b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/unstructured_client/_hooks/types.py
@@ -0,0 +1,74 @@
+"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
+
+import requests as requests_http
+from abc import ABC, abstractmethod
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+
+class HookContext:
+ operation_id: str
+ oauth2_scopes: Optional[List[str]] = None
+ security_source: Optional[Union[Any, Callable[[], Any]]] = None
+
+ def __init__(self, operation_id: str, oauth2_scopes: Optional[List[str]], security_source: Optional[Union[Any, Callable[[], Any]]]):
+ self.operation_id = operation_id
+ self.oauth2_scopes = oauth2_scopes
+ self.security_source = security_source
+
+
+class BeforeRequestContext(HookContext):
+ def __init__(self, hook_ctx: HookContext):
+ super().__init__(hook_ctx.operation_id, hook_ctx.oauth2_scopes, hook_ctx.security_source)
+
+
+class AfterSuccessContext(HookContext):
+ def __init__(self, hook_ctx: HookContext):
+ super().__init__(hook_ctx.operation_id, hook_ctx.oauth2_scopes, hook_ctx.security_source)
+
+
+
+class AfterErrorContext(HookContext):
+ def __init__(self, hook_ctx: HookContext):
+ super().__init__(hook_ctx.operation_id, hook_ctx.oauth2_scopes, hook_ctx.security_source)
+
+
+class SDKInitHook(ABC):
+ @abstractmethod
+ def sdk_init(self, base_url: str, client: requests_http.Session) -> Tuple[str, requests_http.Session]:
+ pass
+
+
+class BeforeRequestHook(ABC):
+ @abstractmethod
+ def before_request(self, hook_ctx: BeforeRequestContext, request: requests_http.PreparedRequest) -> Union[requests_http.PreparedRequest, Exception]:
+ pass
+
+
+class AfterSuccessHook(ABC):
+ @abstractmethod
+ def after_success(self, hook_ctx: AfterSuccessContext, response: requests_http.Response) -> Union[requests_http.Response, Exception]:
+ pass
+
+
+class AfterErrorHook(ABC):
+ @abstractmethod
+ def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests_http.Response], error: Optional[Exception]) -> Union[Tuple[Optional[requests_http.Response], Optional[Exception]], Exception]:
+ pass
+
+
+class Hooks(ABC):
+ @abstractmethod
+ def register_sdk_init_hook(self, hook: SDKInitHook):
+ pass
+
+ @abstractmethod
+ def register_before_request_hook(self, hook: BeforeRequestHook):
+ pass
+
+ @abstractmethod
+ def register_after_success_hook(self, hook: AfterSuccessHook):
+ pass
+
+ @abstractmethod
+ def register_after_error_hook(self, hook: AfterErrorHook):
+ pass