aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/pdf2image/parsers.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/pdf2image/parsers.py')
-rw-r--r--.venv/lib/python3.12/site-packages/pdf2image/parsers.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pdf2image/parsers.py b/.venv/lib/python3.12/site-packages/pdf2image/parsers.py
new file mode 100644
index 00000000..72f51250
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pdf2image/parsers.py
@@ -0,0 +1,98 @@
+"""
+ pdf2image custom buffer parsers
+"""
+
+from io import BytesIO
+from typing import List
+
+from PIL import Image
+
+
+def parse_buffer_to_ppm(data: bytes) -> List[Image.Image]:
+ """Parse PPM file bytes to Pillow Image
+
+ :param data: pdftoppm/pdftocairo output bytes
+ :type data: bytes
+ :return: List of PPM images parsed from the output
+ :rtype: List[Image.Image]
+ """
+
+ images = []
+
+ index = 0
+
+ while index < len(data):
+ code, size, rgb = tuple(data[index : index + 40].split(b"\n")[0:3])
+ size_x, size_y = tuple(size.split(b" "))
+ file_size = len(code) + len(size) + len(rgb) + 3 + int(size_x) * int(size_y) * 3
+ images.append(Image.open(BytesIO(data[index : index + file_size])))
+ index += file_size
+
+ return images
+
+
+def parse_buffer_to_pgm(data: bytes) -> List[Image.Image]:
+ """Parse PGM file bytes to Pillow Image
+
+ :param data: pdftoppm/pdftocairo output bytes
+ :type data: bytes
+ :return: List of PGM images parsed from the output
+ :rtype: List[Image.Image]
+ """
+
+ images = []
+
+ index = 0
+
+ while index < len(data):
+ code, size, maxval = tuple(data[index : index + 40].split(b"\n")[0:3])
+ size_x, size_y = tuple(size.split(b" "))
+ file_size = len(code) + len(size) + len(maxval) + 3 + int(size_x) * int(size_y)
+ images.append(Image.open(BytesIO(data[index : index + file_size])))
+ index += file_size
+
+ return images
+
+
+def parse_buffer_to_jpeg(data: bytes) -> List[Image.Image]:
+ """Parse JPEG file bytes to Pillow Image
+
+ :param data: pdftoppm/pdftocairo output bytes
+ :type data: bytes
+ :return: List of JPEG images parsed from the output
+ :rtype: List[Image.Image]
+ """
+
+ return [
+ Image.open(BytesIO(image_data + b"\xff\xd9"))
+ for image_data in data.split(b"\xff\xd9")[
+ :-1
+ ] # Last element is obviously empty
+ ]
+
+
+def parse_buffer_to_png(data: bytes) -> List[Image.Image]:
+ """Parse PNG file bytes to Pillow Image
+
+ :param data: pdftoppm/pdftocairo output bytes
+ :type data: bytes
+ :return: List of PNG images parsed from the output
+ :rtype: List[Image.Image]
+ """
+
+ images = []
+
+ c1 = 0
+ c2 = 0
+ data_len = len(data)
+ while c1 < data_len:
+ # IEND can appear in a PNG without being the actual end
+ if data[c2 : c2 + 4] == b"IEND" and (
+ c2 + 8 == data_len or data[c2 + 9 : c2 + 12] == b"PNG"
+ ):
+ images.append(Image.open(BytesIO(data[c1 : c2 + 8])))
+ c1 = c2 + 8
+ c2 = c1
+ c2 += 1
+
+ return images