about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/pdf2image
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/pdf2image')
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/__init__.py8
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/exceptions.py33
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/generators.py46
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/parsers.py98
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/pdf2image.py683
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/py.typed0
6 files changed, 868 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/__init__.py b/.venv/lib/python3.12/site-packages/pdf2image/__init__.py
new file mode 100644
index 00000000..72601399
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/__init__.py
@@ -0,0 +1,8 @@
+"""
+    __init__ of the pdf2image module
+"""
+
+from .pdf2image import convert_from_bytes as convert_from_bytes
+from .pdf2image import convert_from_path as convert_from_path
+from .pdf2image import pdfinfo_from_bytes as pdfinfo_from_bytes
+from .pdf2image import pdfinfo_from_path as pdfinfo_from_path
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/exceptions.py b/.venv/lib/python3.12/site-packages/pdf2image/exceptions.py
new file mode 100644
index 00000000..bf201089
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/exceptions.py
@@ -0,0 +1,33 @@
+"""
+    Define exceptions specific to pdf2image
+"""
+
+
+class PopplerNotInstalledError(Exception):
+    """Raised when poppler is not installed"""
+
+    pass
+
+
+class PDFInfoNotInstalledError(PopplerNotInstalledError):
+    """Raised when pdfinfo is not installed"""
+
+    pass
+
+
+class PDFPageCountError(Exception):
+    """Raised when the pdfinfo was unable to retrieve the page count"""
+
+    pass
+
+
+class PDFSyntaxError(Exception):
+    """Raised when a syntax error was thrown during rendering"""
+
+    pass
+
+
+class PDFPopplerTimeoutError(Exception):
+    """Raised when the timeout is exceeded while converting a PDF"""
+
+    pass
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/generators.py b/.venv/lib/python3.12/site-packages/pdf2image/generators.py
new file mode 100644
index 00000000..5d79a3ce
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/generators.py
@@ -0,0 +1,46 @@
+"""
+    pdf2image filename generators
+"""
+
+import uuid
+import threading
+
+
+class ThreadSafeGenerator(object):
+    """Wrapper around generator that protects concurrent access"""
+
+    def __init__(self, gen):
+        self.gen = gen
+        self.lock = threading.Lock()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        with self.lock:
+            return next(self.gen)
+
+
+def threadsafe(f):
+    """Decorator to make generator threadsafe. Fix #125"""
+
+    def g(*a, **kw):
+        return ThreadSafeGenerator(f(*a, **kw))
+
+    return g
+
+
+@threadsafe
+def uuid_generator():
+    """Returns a UUID4"""
+    while True:
+        yield str(uuid.uuid4())
+
+
+@threadsafe
+def counter_generator(prefix="", suffix="", padding_goal=4):
+    """Returns a joined prefix, iteration number, and suffix"""
+    i = 0
+    while True:
+        i += 1
+        yield str(prefix) + str(i).zfill(padding_goal) + str(suffix)
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/parsers.py b/.venv/lib/python3.12/site-packages/pdf2image/parsers.py
new file mode 100644
index 00000000..72f51250
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/parsers.py
@@ -0,0 +1,98 @@
+"""
+    pdf2image custom buffer parsers
+"""
+
+from io import BytesIO
+from typing import List
+
+from PIL import Image
+
+
+def parse_buffer_to_ppm(data: bytes) -> List[Image.Image]:
+    """Parse PPM file bytes to Pillow Image
+
+    :param data: pdftoppm/pdftocairo output bytes
+    :type data: bytes
+    :return: List of PPM images parsed from the output
+    :rtype: List[Image.Image]
+    """
+
+    images = []
+
+    index = 0
+
+    while index < len(data):
+        code, size, rgb = tuple(data[index : index + 40].split(b"\n")[0:3])
+        size_x, size_y = tuple(size.split(b" "))
+        file_size = len(code) + len(size) + len(rgb) + 3 + int(size_x) * int(size_y) * 3
+        images.append(Image.open(BytesIO(data[index : index + file_size])))
+        index += file_size
+
+    return images
+
+
+def parse_buffer_to_pgm(data: bytes) -> List[Image.Image]:
+    """Parse PGM file bytes to Pillow Image
+
+    :param data: pdftoppm/pdftocairo output bytes
+    :type data: bytes
+    :return: List of PGM images parsed from the output
+    :rtype: List[Image.Image]
+    """
+
+    images = []
+
+    index = 0
+
+    while index < len(data):
+        code, size, maxval = tuple(data[index : index + 40].split(b"\n")[0:3])
+        size_x, size_y = tuple(size.split(b" "))
+        file_size = len(code) + len(size) + len(maxval) + 3 + int(size_x) * int(size_y)
+        images.append(Image.open(BytesIO(data[index : index + file_size])))
+        index += file_size
+
+    return images
+
+
+def parse_buffer_to_jpeg(data: bytes) -> List[Image.Image]:
+    """Parse JPEG file bytes to Pillow Image
+
+    :param data: pdftoppm/pdftocairo output bytes
+    :type data: bytes
+    :return: List of JPEG images parsed from the output
+    :rtype: List[Image.Image]
+    """
+
+    return [
+        Image.open(BytesIO(image_data + b"\xff\xd9"))
+        for image_data in data.split(b"\xff\xd9")[
+            :-1
+        ]  # Last element is obviously empty
+    ]
+
+
+def parse_buffer_to_png(data: bytes) -> List[Image.Image]:
+    """Parse PNG file bytes to Pillow Image
+
+    :param data: pdftoppm/pdftocairo output bytes
+    :type data: bytes
+    :return: List of PNG images parsed from the output
+    :rtype: List[Image.Image]
+    """
+
+    images = []
+
+    c1 = 0
+    c2 = 0
+    data_len = len(data)
+    while c1 < data_len:
+        # IEND can appear in a PNG without being the actual end
+        if data[c2 : c2 + 4] == b"IEND" and (
+            c2 + 8 == data_len or data[c2 + 9 : c2 + 12] == b"PNG"
+        ):
+            images.append(Image.open(BytesIO(data[c1 : c2 + 8])))
+            c1 = c2 + 8
+            c2 = c1
+        c2 += 1
+
+    return images
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/pdf2image.py b/.venv/lib/python3.12/site-packages/pdf2image/pdf2image.py
new file mode 100644
index 00000000..21d19c88
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/pdf2image.py
@@ -0,0 +1,683 @@
+"""
+    pdf2image is a light wrapper for the poppler-utils tools that can convert your
+    PDFs into Pillow images.
+"""
+
+import os
+import platform
+import tempfile
+import types
+import shutil
+import subprocess
+from subprocess import Popen, PIPE, TimeoutExpired
+from typing import Any, Union, Tuple, List, Dict, Callable
+from pathlib import PurePath
+from PIL import Image
+
+from pdf2image.generators import uuid_generator, counter_generator, ThreadSafeGenerator
+
+from pdf2image.parsers import (
+    parse_buffer_to_pgm,
+    parse_buffer_to_ppm,
+    parse_buffer_to_jpeg,
+    parse_buffer_to_png,
+)
+
+from pdf2image.exceptions import (
+    PDFInfoNotInstalledError,
+    PDFPageCountError,
+    PDFSyntaxError,
+    PDFPopplerTimeoutError,
+)
+
+TRANSPARENT_FILE_TYPES = ["png", "tiff"]
+PDFINFO_CONVERT_TO_INT = ["Pages"]
+
+
+def convert_from_path(
+    pdf_path: Union[str, PurePath],
+    dpi: int = 200,
+    output_folder: Union[str, PurePath] = None,
+    first_page: int = None,
+    last_page: int = None,
+    fmt: str = "ppm",
+    jpegopt: Dict = None,
+    thread_count: int = 1,
+    userpw: str = None,
+    ownerpw: str = None,
+    use_cropbox: bool = False,
+    strict: bool = False,
+    transparent: bool = False,
+    single_file: bool = False,
+    output_file: Any = uuid_generator(),
+    poppler_path: Union[str, PurePath] = None,
+    grayscale: bool = False,
+    size: Union[Tuple, int] = None,
+    paths_only: bool = False,
+    use_pdftocairo: bool = False,
+    timeout: int = None,
+    hide_annotations: bool = False,
+) -> List[Image.Image]:
+    """Function wrapping pdftoppm and pdftocairo
+
+    :param pdf_path: Path to the PDF that you want to convert
+    :type pdf_path: Union[str, PurePath]
+    :param dpi: Image quality in DPI (default 200), defaults to 200
+    :type dpi: int, optional
+    :param output_folder: Write the resulting images to a folder (instead of directly in memory), defaults to None
+    :type output_folder: Union[str, PurePath], optional
+    :param first_page: First page to process, defaults to None
+    :type first_page: int, optional
+    :param last_page: Last page to process before stopping, defaults to None
+    :type last_page: int, optional
+    :param fmt: Output image format, defaults to "ppm"
+    :type fmt: str, optional
+    :param jpegopt: jpeg options `quality`, `progressive`, and `optimize` (only for jpeg format), defaults to None
+    :type jpegopt: Dict, optional
+    :param thread_count: How many threads we are allowed to spawn for processing, defaults to 1
+    :type thread_count: int, optional
+    :param userpw: PDF's password, defaults to None
+    :type userpw: str, optional
+    :param ownerpw: PDF's owner password, defaults to None
+    :type ownerpw: str, optional
+    :param use_cropbox: Use cropbox instead of mediabox, defaults to False
+    :type use_cropbox: bool, optional
+    :param strict: When a Syntax Error is thrown, it will be raised as an Exception, defaults to False
+    :type strict: bool, optional
+    :param transparent: Output with a transparent background instead of a white one, defaults to False
+    :type transparent: bool, optional
+    :param single_file: Uses the -singlefile option from pdftoppm/pdftocairo, defaults to False
+    :type single_file: bool, optional
+    :param output_file: What is the output filename or generator, defaults to uuid_generator()
+    :type output_file: Any, optional
+    :param poppler_path: Path to look for poppler binaries, defaults to None
+    :type poppler_path: Union[str, PurePath], optional
+    :param grayscale: Output grayscale image(s), defaults to False
+    :type grayscale: bool, optional
+    :param size: Size of the resulting image(s), uses the Pillow (width, height) standard, defaults to None
+    :type size: Union[Tuple, int], optional
+    :param paths_only: Don't load image(s), return paths instead (requires output_folder), defaults to False
+    :type paths_only: bool, optional
+    :param use_pdftocairo: Use pdftocairo instead of pdftoppm, may help performance, defaults to False
+    :type use_pdftocairo: bool, optional
+    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
+    :type timeout: int, optional
+    :param hide_annotations: Hide PDF annotations in the output, defaults to False
+    :type hide_annotations: bool, optional
+    :raises NotImplementedError: Raised when conflicting parameters are given (hide_annotations for pdftocairo)
+    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
+    :raises PDFSyntaxError: Raised if there is a syntax error in the PDF and strict=True
+    :return: A list of Pillow images, one for each page between first_page and last_page
+    :rtype: List[Image.Image]
+    """
+
+    if use_pdftocairo and fmt == "ppm":
+        fmt = "png"
+
+    # We make sure that if passed arguments are Path objects, they're converted to strings
+    if isinstance(pdf_path, PurePath):
+        pdf_path = pdf_path.as_posix()
+
+    if isinstance(output_folder, PurePath):
+        output_folder = output_folder.as_posix()
+
+    if isinstance(poppler_path, PurePath):
+        poppler_path = poppler_path.as_posix()
+
+    page_count = pdfinfo_from_path(
+        pdf_path, userpw, ownerpw, poppler_path=poppler_path
+    )["Pages"]
+
+    # We start by getting the output format, the buffer processing function and if we need pdftocairo
+    parsed_fmt, final_extension, parse_buffer_func, use_pdfcairo_format = _parse_format(
+        fmt, grayscale
+    )
+
+    # We use pdftocairo is the format requires it OR we need a transparent output
+    use_pdfcairo = (
+        use_pdftocairo
+        or use_pdfcairo_format
+        or (transparent and parsed_fmt in TRANSPARENT_FILE_TYPES)
+    )
+
+    poppler_version_major, poppler_version_minor = _get_poppler_version(
+        "pdftocairo" if use_pdfcairo else "pdftoppm", poppler_path=poppler_path
+    )
+
+    if poppler_version_major == 0 and poppler_version_minor <= 57:
+        jpegopt = None
+
+    if poppler_version_major == 0 and poppler_version_minor <= 83:
+        hide_annotations = False
+
+    # If output_file isn't a generator, it will be turned into one
+    if not isinstance(output_file, types.GeneratorType) and not isinstance(
+        output_file, ThreadSafeGenerator
+    ):
+        if single_file:
+            output_file = iter([output_file])
+            thread_count = 1
+        else:
+            output_file = counter_generator(output_file)
+
+    if thread_count < 1:
+        thread_count = 1
+
+    if first_page is None or first_page < 1:
+        first_page = 1
+
+    if last_page is None or last_page > page_count:
+        last_page = page_count
+
+    if first_page > last_page:
+        return []
+
+    try:
+        auto_temp_dir = False
+        if output_folder is None and use_pdfcairo:
+            output_folder = tempfile.mkdtemp()
+            auto_temp_dir = True
+
+        # Recalculate page count based on first and last page
+        page_count = last_page - first_page + 1
+
+        if thread_count > page_count:
+            thread_count = page_count
+
+        reminder = page_count % thread_count
+        current_page = first_page
+        processes = []
+        for _ in range(thread_count):
+            thread_output_file = next(output_file)
+
+            # Get the number of pages the thread will be processing
+            thread_page_count = page_count // thread_count + int(reminder > 0)
+            # Build the command accordingly
+            args = _build_command(
+                ["-r", str(dpi), pdf_path],
+                output_folder,
+                current_page,
+                current_page + thread_page_count - 1,
+                parsed_fmt,
+                jpegopt,
+                thread_output_file,
+                userpw,
+                ownerpw,
+                use_cropbox,
+                transparent,
+                single_file,
+                grayscale,
+                size,
+                hide_annotations,
+            )
+
+            if use_pdfcairo:
+                if hide_annotations:
+                    raise NotImplementedError(
+                        "Hide annotations flag not implemented in pdftocairo."
+                    )
+                args = [_get_command_path("pdftocairo", poppler_path)] + args
+            else:
+                args = [_get_command_path("pdftoppm", poppler_path)] + args
+
+            # Update page values
+            current_page = current_page + thread_page_count
+            reminder -= int(reminder > 0)
+            # Add poppler path to LD_LIBRARY_PATH
+            env = os.environ.copy()
+            if poppler_path is not None:
+                env["LD_LIBRARY_PATH"] = (
+                    poppler_path + ":" + env.get("LD_LIBRARY_PATH", "")
+                )
+            # Spawn the process and save its uuid
+            startupinfo = None
+            if platform.system() == "Windows":
+                # this startupinfo structure prevents a console window from popping up on Windows
+                startupinfo = subprocess.STARTUPINFO()
+                startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+            processes.append(
+                (
+                    thread_output_file,
+                    Popen(
+                        args, env=env, stdout=PIPE, stderr=PIPE, startupinfo=startupinfo
+                    ),
+                )
+            )
+
+        images = []
+
+        for uid, proc in processes:
+            try:
+                data, err = proc.communicate(timeout=timeout)
+            except TimeoutExpired:
+                proc.kill()
+                outs, errs = proc.communicate()
+                raise PDFPopplerTimeoutError("Run poppler timeout.")
+
+            if b"Syntax Error" in err and strict:
+                raise PDFSyntaxError(err.decode("utf8", "ignore"))
+
+            if output_folder is not None:
+                images += _load_from_output_folder(
+                    output_folder,
+                    uid,
+                    final_extension,
+                    paths_only,
+                    in_memory=auto_temp_dir,
+                )
+            else:
+                images += parse_buffer_func(data)
+    finally:
+        if auto_temp_dir:
+            shutil.rmtree(output_folder)
+
+    return images
+
+
+def convert_from_bytes(
+    pdf_file: bytes,
+    dpi: int = 200,
+    output_folder: Union[str, PurePath] = None,
+    first_page: int = None,
+    last_page: int = None,
+    fmt: str = "ppm",
+    jpegopt: Dict = None,
+    thread_count: int = 1,
+    userpw: str = None,
+    ownerpw: str = None,
+    use_cropbox: bool = False,
+    strict: bool = False,
+    transparent: bool = False,
+    single_file: bool = False,
+    output_file: Union[str, PurePath] = uuid_generator(),
+    poppler_path: Union[str, PurePath] = None,
+    grayscale: bool = False,
+    size: Union[Tuple, int] = None,
+    paths_only: bool = False,
+    use_pdftocairo: bool = False,
+    timeout: int = None,
+    hide_annotations: bool = False,
+) -> List[Image.Image]:
+    """Function wrapping pdftoppm and pdftocairo.
+
+    :param pdf_bytes: Bytes of the PDF that you want to convert
+    :type pdf_bytes: bytes
+    :param dpi: Image quality in DPI (default 200), defaults to 200
+    :type dpi: int, optional
+    :param output_folder: Write the resulting images to a folder (instead of directly in memory), defaults to None
+    :type output_folder: Union[str, PurePath], optional
+    :param first_page: First page to process, defaults to None
+    :type first_page: int, optional
+    :param last_page: Last page to process before stopping, defaults to None
+    :type last_page: int, optional
+    :param fmt: Output image format, defaults to "ppm"
+    :type fmt: str, optional
+    :param jpegopt: jpeg options `quality`, `progressive`, and `optimize` (only for jpeg format), defaults to None
+    :type jpegopt: Dict, optional
+    :param thread_count: How many threads we are allowed to spawn for processing, defaults to 1
+    :type thread_count: int, optional
+    :param userpw: PDF's password, defaults to None
+    :type userpw: str, optional
+    :param ownerpw: PDF's owner password, defaults to None
+    :type ownerpw: str, optional
+    :param use_cropbox: Use cropbox instead of mediabox, defaults to False
+    :type use_cropbox: bool, optional
+    :param strict: When a Syntax Error is thrown, it will be raised as an Exception, defaults to False
+    :type strict: bool, optional
+    :param transparent: Output with a transparent background instead of a white one, defaults to False
+    :type transparent: bool, optional
+    :param single_file: Uses the -singlefile option from pdftoppm/pdftocairo, defaults to False
+    :type single_file: bool, optional
+    :param output_file: What is the output filename or generator, defaults to uuid_generator()
+    :type output_file: Any, optional
+    :param poppler_path: Path to look for poppler binaries, defaults to None
+    :type poppler_path: Union[str, PurePath], optional
+    :param grayscale: Output grayscale image(s), defaults to False
+    :type grayscale: bool, optional
+    :param size: Size of the resulting image(s), uses the Pillow (width, height) standard, defaults to None
+    :type size: Union[Tuple, int], optional
+    :param paths_only: Don't load image(s), return paths instead (requires output_folder), defaults to False
+    :type paths_only: bool, optional
+    :param use_pdftocairo: Use pdftocairo instead of pdftoppm, may help performance, defaults to False
+    :type use_pdftocairo: bool, optional
+    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
+    :type timeout: int, optional
+    :param hide_annotations: Hide PDF annotations in the output, defaults to False
+    :type hide_annotations: bool, optional
+    :raises NotImplementedError: Raised when conflicting parameters are given (hide_annotations for pdftocairo)
+    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
+    :raises PDFSyntaxError: Raised if there is a syntax error in the PDF and strict=True
+    :return: A list of Pillow images, one for each page between first_page and last_page
+    :rtype: List[Image.Image]
+    """
+
+    fh, temp_filename = tempfile.mkstemp()
+    try:
+        with open(temp_filename, "wb") as f:
+            f.write(pdf_file)
+            f.flush()
+            return convert_from_path(
+                f.name,
+                dpi=dpi,
+                output_folder=output_folder,
+                first_page=first_page,
+                last_page=last_page,
+                fmt=fmt,
+                jpegopt=jpegopt,
+                thread_count=thread_count,
+                userpw=userpw,
+                ownerpw=ownerpw,
+                use_cropbox=use_cropbox,
+                strict=strict,
+                transparent=transparent,
+                single_file=single_file,
+                output_file=output_file,
+                poppler_path=poppler_path,
+                grayscale=grayscale,
+                size=size,
+                paths_only=paths_only,
+                use_pdftocairo=use_pdftocairo,
+                timeout=timeout,
+                hide_annotations=hide_annotations,
+            )
+    finally:
+        os.close(fh)
+        os.remove(temp_filename)
+
+
+def _build_command(
+    args: List,
+    output_folder: str,
+    first_page: int,
+    last_page: int,
+    fmt: str,
+    jpegopt: Dict,
+    output_file: str,
+    userpw: str,
+    ownerpw: str,
+    use_cropbox: bool,
+    transparent: bool,
+    single_file: bool,
+    grayscale: bool,
+    size: Union[int, Tuple[int, int]],
+    hide_annotations: bool,
+) -> List[str]:
+    if use_cropbox:
+        args.append("-cropbox")
+
+    if hide_annotations:
+        args.append("-hide-annotations")
+
+    if transparent and fmt in TRANSPARENT_FILE_TYPES:
+        args.append("-transp")
+
+    if first_page is not None:
+        args.extend(["-f", str(first_page)])
+
+    if last_page is not None:
+        args.extend(["-l", str(last_page)])
+
+    if fmt not in ["pgm", "ppm"]:
+        args.append("-" + fmt)
+
+    if fmt in ["jpeg", "jpg"] and jpegopt:
+        args.extend(["-jpegopt", _parse_jpegopt(jpegopt)])
+
+    if single_file:
+        args.append("-singlefile")
+
+    if output_folder is not None:
+        args.append(os.path.join(output_folder, output_file))
+
+    if userpw is not None:
+        args.extend(["-upw", userpw])
+
+    if ownerpw is not None:
+        args.extend(["-opw", ownerpw])
+
+    if grayscale:
+        args.append("-gray")
+
+    if size is None:
+        pass
+    elif isinstance(size, tuple) and len(size) == 2:
+        if size[0] is not None:
+            args.extend(["-scale-to-x", str(int(size[0]))])
+        else:
+            args.extend(["-scale-to-x", str(-1)])
+        if size[1] is not None:
+            args.extend(["-scale-to-y", str(int(size[1]))])
+        else:
+            args.extend(["-scale-to-y", str(-1)])
+    elif isinstance(size, tuple) and len(size) == 1:
+        args.extend(["-scale-to", str(int(size[0]))])
+    elif isinstance(size, int) or isinstance(size, float):
+        args.extend(["-scale-to", str(int(size))])
+    else:
+        raise ValueError(f"Size {size} is not a tuple or an integer")
+
+    return args
+
+
+def _parse_format(fmt: str, grayscale: bool = False) -> Tuple[str, str, Callable, bool]:
+    fmt = fmt.lower()
+    if fmt[0] == ".":
+        fmt = fmt[1:]
+    if fmt in ("jpeg", "jpg"):
+        return "jpeg", "jpg", parse_buffer_to_jpeg, False
+    if fmt == "png":
+        return "png", "png", parse_buffer_to_png, False
+    if fmt in ("tif", "tiff"):
+        return "tiff", "tif", None, True
+    if fmt == "ppm" and grayscale:
+        return "pgm", "pgm", parse_buffer_to_pgm, False
+    # Unable to parse the format so we'll use the default
+    return "ppm", "ppm", parse_buffer_to_ppm, False
+
+
+def _parse_jpegopt(jpegopt: Dict) -> str:
+    parts = []
+    for k, v in jpegopt.items():
+        if v is True:
+            v = "y"
+        if v is False:
+            v = "n"
+        parts.append("{}={}".format(k, v))
+    return ",".join(parts)
+
+
+def _get_command_path(command: str, poppler_path: str = None) -> str:
+    if platform.system() == "Windows":
+        command = command + ".exe"
+
+    if poppler_path is not None:
+        command = os.path.join(poppler_path, command)
+
+    return command
+
+
+def _get_poppler_version(
+    command: str, poppler_path: str = None, timeout: int = None
+) -> Tuple[int, int]:
+    command = [_get_command_path(command, poppler_path), "-v"]
+
+    env = os.environ.copy()
+    if poppler_path is not None:
+        env["LD_LIBRARY_PATH"] = poppler_path + ":" + env.get("LD_LIBRARY_PATH", "")
+    proc = Popen(command, env=env, stdout=PIPE, stderr=PIPE)
+
+    try:
+        data, err = proc.communicate(timeout=timeout)
+    except TimeoutExpired:
+        proc.kill()
+        outs, errs = proc.communicate()
+        raise PDFPopplerTimeoutError("Run poppler poppler timeout.")
+
+    try:
+        # TODO: Make this more robust
+        version = err.decode("utf8", "ignore").split("\n")[0].split(" ")[-1].split(".")
+        return int(version[0]), int(version[1])
+    except:
+        # Lowest version that includes pdftocairo (2011)
+        return 0, 17
+
+
+def pdfinfo_from_path(
+    pdf_path: str,
+    userpw: str = None,
+    ownerpw: str = None,
+    poppler_path: str = None,
+    rawdates: bool = False,
+    timeout: int = None,
+    first_page: int = None,
+    last_page: int = None,
+) -> Dict:
+    """Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.
+
+    :param pdf_path: Path to the PDF that you want to convert
+    :type pdf_path: str
+    :param userpw: PDF's password, defaults to None
+    :type userpw: str, optional
+    :param ownerpw: PDF's owner password, defaults to None
+    :type ownerpw: str, optional
+    :param poppler_path: Path to look for poppler binaries, defaults to None
+    :type poppler_path: Union[str, PurePath], optional
+    :param rawdates: Return the undecoded data strings, defaults to False
+    :type rawdates: bool, optional
+    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
+    :type timeout: int, optional
+    :param first_page: First page to process, defaults to None
+    :type first_page: int, optional
+    :param last_page: Last page to process before stopping, defaults to None
+    :type last_page: int, optional
+    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
+    :raises PDFInfoNotInstalledError: Raised if pdfinfo is not installed
+    :raises PDFPageCountError: Raised if the output could not be parsed
+    :return: Dictionary containing various information on the PDF
+    :rtype: Dict
+    """
+    try:
+        command = [_get_command_path("pdfinfo", poppler_path), pdf_path]
+
+        if userpw is not None:
+            command.extend(["-upw", userpw])
+
+        if ownerpw is not None:
+            command.extend(["-opw", ownerpw])
+
+        if rawdates:
+            command.extend(["-rawdates"])
+
+        if first_page:
+            command.extend(["-f", str(first_page)])
+
+        if last_page:
+            command.extend(["-l", str(last_page)])
+
+        # Add poppler path to LD_LIBRARY_PATH
+        env = os.environ.copy()
+        if poppler_path is not None:
+            env["LD_LIBRARY_PATH"] = poppler_path + ":" + env.get("LD_LIBRARY_PATH", "")
+        proc = Popen(command, env=env, stdout=PIPE, stderr=PIPE)
+
+        try:
+            out, err = proc.communicate(timeout=timeout)
+        except TimeoutExpired:
+            proc.kill()
+            outs, errs = proc.communicate()
+            raise PDFPopplerTimeoutError("Run poppler poppler timeout.")
+
+        d = {}
+        for field in out.decode("utf8", "ignore").split("\n"):
+            sf = field.split(":")
+            key, value = sf[0], ":".join(sf[1:])
+            if key != "":
+                d[key] = (
+                    int(value.strip())
+                    if key in PDFINFO_CONVERT_TO_INT
+                    else value.strip()
+                )
+
+        if "Pages" not in d:
+            raise ValueError
+
+        return d
+
+    except OSError:
+        raise PDFInfoNotInstalledError(
+            "Unable to get page count. Is poppler installed and in PATH?"
+        )
+    except ValueError:
+        raise PDFPageCountError(
+            f"Unable to get page count.\n{err.decode('utf8', 'ignore')}"
+        )
+
+
+def pdfinfo_from_bytes(
+    pdf_bytes: bytes,
+    userpw: str = None,
+    ownerpw: str = None,
+    poppler_path: str = None,
+    rawdates: bool = False,
+    timeout: int = None,
+    first_page: int = None,
+    last_page: int = None,
+) -> Dict:
+    """Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.
+
+    :param pdf_bytes: Bytes of the PDF that you want to convert
+    :type pdf_bytes: bytes
+    :param userpw: PDF's password, defaults to None
+    :type userpw: str, optional
+    :param ownerpw: PDF's owner password, defaults to None
+    :type ownerpw: str, optional
+    :param poppler_path: Path to look for poppler binaries, defaults to None
+    :type poppler_path: Union[str, PurePath], optional
+    :param rawdates: Return the undecoded data strings, defaults to False
+    :type rawdates: bool, optional
+    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
+    :type timeout: int, optional
+    :param first_page: First page to process, defaults to None
+    :type first_page: int, optional
+    :param last_page: Last page to process before stopping, defaults to None
+    :type last_page: int, optional
+    :return: Dictionary containing various information on the PDF
+    :rtype: Dict
+    """
+    fh, temp_filename = tempfile.mkstemp()
+    try:
+        with open(temp_filename, "wb") as f:
+            f.write(pdf_bytes)
+            f.flush()
+        return pdfinfo_from_path(
+            temp_filename,
+            userpw=userpw,
+            ownerpw=ownerpw,
+            poppler_path=poppler_path,
+            rawdates=rawdates,
+            timeout=timeout,
+            first_page=first_page,
+            last_page=last_page,
+        )
+    finally:
+        os.close(fh)
+        os.remove(temp_filename)
+
+
+def _load_from_output_folder(
+    output_folder: str,
+    output_file: str,
+    ext: str,
+    paths_only: bool,
+    in_memory: bool = False,
+) -> List[Image.Image]:
+    images = []
+    for f in sorted(os.listdir(output_folder)):
+        if f.startswith(output_file) and f.split(".")[-1] == ext:
+            if paths_only:
+                images.append(os.path.join(output_folder, f))
+            else:
+                images.append(Image.open(os.path.join(output_folder, f)))
+                if in_memory:
+                    images[-1].load()
+    return images
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/py.typed b/.venv/lib/python3.12/site-packages/pdf2image/py.typed
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/py.typed