From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .venv/lib/python3.12/site-packages/docx/package.py | 110 +++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/docx/package.py (limited to '.venv/lib/python3.12/site-packages/docx/package.py') diff --git a/.venv/lib/python3.12/site-packages/docx/package.py b/.venv/lib/python3.12/site-packages/docx/package.py new file mode 100644 index 00000000..7ea47e6e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/package.py @@ -0,0 +1,110 @@ +"""WordprocessingML Package class and related objects.""" + +from __future__ import annotations + +from typing import IO, cast + +from docx.image.image import Image +from docx.opc.constants import RELATIONSHIP_TYPE as RT +from docx.opc.package import OpcPackage +from docx.opc.packuri import PackURI +from docx.parts.image import ImagePart +from docx.shared import lazyproperty + + +class Package(OpcPackage): + """Customizations specific to a WordprocessingML package.""" + + def after_unmarshal(self): + """Called by loading code after all parts and relationships have been loaded. + + This method affords the opportunity for any required post-processing. + """ + self._gather_image_parts() + + def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart: + """Return |ImagePart| containing image specified by `image_descriptor`. + + The image-part is newly created if a matching one is not already present in the + collection. + """ + return self.image_parts.get_or_add_image_part(image_descriptor) + + @lazyproperty + def image_parts(self) -> ImageParts: + """|ImageParts| collection object for this package.""" + return ImageParts() + + def _gather_image_parts(self): + """Load the image part collection with all the image parts in package.""" + for rel in self.iter_rels(): + if rel.is_external: + continue + if rel.reltype != RT.IMAGE: + continue + if rel.target_part in self.image_parts: + continue + self.image_parts.append(cast("ImagePart", rel.target_part)) + + +class ImageParts: + """Collection of |ImagePart| objects corresponding to images in the package.""" + + def __init__(self): + self._image_parts: list[ImagePart] = [] + + def __contains__(self, item: object): + return self._image_parts.__contains__(item) + + def __iter__(self): + return self._image_parts.__iter__() + + def __len__(self): + return self._image_parts.__len__() + + def append(self, item: ImagePart): + self._image_parts.append(item) + + def get_or_add_image_part(self, image_descriptor: str | IO[bytes]) -> ImagePart: + """Return |ImagePart| object containing image identified by `image_descriptor`. + + The image-part is newly created if a matching one is not present in the + collection. + """ + image = Image.from_file(image_descriptor) + matching_image_part = self._get_by_sha1(image.sha1) + if matching_image_part is not None: + return matching_image_part + return self._add_image_part(image) + + def _add_image_part(self, image: Image): + """Return |ImagePart| instance newly created from `image` and appended to the collection.""" + partname = self._next_image_partname(image.ext) + image_part = ImagePart.from_image(image, partname) + self.append(image_part) + return image_part + + def _get_by_sha1(self, sha1: str) -> ImagePart | None: + """Return the image part in this collection having a SHA1 hash matching `sha1`, + or |None| if not found.""" + for image_part in self._image_parts: + if image_part.sha1 == sha1: + return image_part + return None + + def _next_image_partname(self, ext: str) -> PackURI: + """The next available image partname, starting from ``/word/media/image1.{ext}`` + where unused numbers are reused. + + The partname is unique by number, without regard to the extension. `ext` does + not include the leading period. + """ + + def image_partname(n: int) -> PackURI: + return PackURI("/word/media/image%d.%s" % (n, ext)) + + used_numbers = [image_part.partname.idx for image_part in self] + for n in range(1, len(self) + 1): + if n not in used_numbers: + return image_partname(n) + return image_partname(len(self) + 1) -- cgit v1.2.3