about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/pptx/opc/serialized.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/pptx/opc/serialized.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/pptx/opc/serialized.py')
-rw-r--r--.venv/lib/python3.12/site-packages/pptx/opc/serialized.py296
1 files changed, 296 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pptx/opc/serialized.py b/.venv/lib/python3.12/site-packages/pptx/opc/serialized.py
new file mode 100644
index 00000000..92366708
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pptx/opc/serialized.py
@@ -0,0 +1,296 @@
+"""API for reading/writing serialized Open Packaging Convention (OPC) package."""
+
+from __future__ import annotations
+
+import os
+import posixpath
+import zipfile
+from typing import IO, TYPE_CHECKING, Any, Container, Sequence
+
+from pptx.exc import PackageNotFoundError
+from pptx.opc.constants import CONTENT_TYPE as CT
+from pptx.opc.oxml import CT_Types, serialize_part_xml
+from pptx.opc.packuri import CONTENT_TYPES_URI, PACKAGE_URI, PackURI
+from pptx.opc.shared import CaseInsensitiveDict
+from pptx.opc.spec import default_content_types
+from pptx.util import lazyproperty
+
+if TYPE_CHECKING:
+    from pptx.opc.package import Part, _Relationships  # pyright: ignore[reportPrivateUsage]
+
+
+class PackageReader(Container[bytes]):
+    """Provides access to package-parts of an OPC package with dict semantics.
+
+    The package may be in zip-format (a .pptx file) or expanded into a directory structure,
+    perhaps by unzipping a .pptx file.
+    """
+
+    def __init__(self, pkg_file: str | IO[bytes]):
+        self._pkg_file = pkg_file
+
+    def __contains__(self, pack_uri: object) -> bool:
+        """Return True when part identified by `pack_uri` is present in package."""
+        return pack_uri in self._blob_reader
+
+    def __getitem__(self, pack_uri: PackURI) -> bytes:
+        """Return bytes for part corresponding to `pack_uri`."""
+        return self._blob_reader[pack_uri]
+
+    def rels_xml_for(self, partname: PackURI) -> bytes | None:
+        """Return optional rels item XML for `partname`.
+
+        Returns `None` if no rels item is present for `partname`. `partname` is a |PackURI|
+        instance.
+        """
+        blob_reader, uri = self._blob_reader, partname.rels_uri
+        return blob_reader[uri] if uri in blob_reader else None
+
+    @lazyproperty
+    def _blob_reader(self) -> _PhysPkgReader:
+        """|_PhysPkgReader| subtype providing read access to the package file."""
+        return _PhysPkgReader.factory(self._pkg_file)
+
+
+class PackageWriter:
+    """Writes a zip-format OPC package to `pkg_file`.
+
+    `pkg_file` can be either a path to a zip file (a string) or a file-like object. `pkg_rels` is
+    the |_Relationships| object containing relationships for the package. `parts` is a sequence of
+    |Part| subtype instance to be written to the package.
+
+    Its single API classmethod is :meth:`write`. This class is not intended to be instantiated.
+    """
+
+    def __init__(self, pkg_file: str | IO[bytes], pkg_rels: _Relationships, parts: Sequence[Part]):
+        self._pkg_file = pkg_file
+        self._pkg_rels = pkg_rels
+        self._parts = parts
+
+    @classmethod
+    def write(
+        cls, pkg_file: str | IO[bytes], pkg_rels: _Relationships, parts: Sequence[Part]
+    ) -> None:
+        """Write a physical package (.pptx file) to `pkg_file`.
+
+        The serialized package contains `pkg_rels` and `parts`, a content-types stream based on
+        the content type of each part, and a .rels file for each part that has relationships.
+        """
+        cls(pkg_file, pkg_rels, parts)._write()
+
+    def _write(self) -> None:
+        """Write physical package (.pptx file)."""
+        with _PhysPkgWriter.factory(self._pkg_file) as phys_writer:
+            self._write_content_types_stream(phys_writer)
+            self._write_pkg_rels(phys_writer)
+            self._write_parts(phys_writer)
+
+    def _write_content_types_stream(self, phys_writer: _PhysPkgWriter) -> None:
+        """Write `[Content_Types].xml` part to the physical package.
+
+        This part must contain an appropriate content type lookup target for each part in the
+        package.
+        """
+        phys_writer.write(
+            CONTENT_TYPES_URI,
+            serialize_part_xml(_ContentTypesItem.xml_for(self._parts)),
+        )
+
+    def _write_parts(self, phys_writer: _PhysPkgWriter) -> None:
+        """Write blob of each part in `parts` to the package.
+
+        A rels item for each part is also written when the part has relationships.
+        """
+        for part in self._parts:
+            phys_writer.write(part.partname, part.blob)
+            if part._rels:  # pyright: ignore[reportPrivateUsage]
+                phys_writer.write(part.partname.rels_uri, part.rels.xml)
+
+    def _write_pkg_rels(self, phys_writer: _PhysPkgWriter) -> None:
+        """Write the XML rels item for `pkg_rels` ('/_rels/.rels') to the package."""
+        phys_writer.write(PACKAGE_URI.rels_uri, self._pkg_rels.xml)
+
+
+class _PhysPkgReader(Container[PackURI]):
+    """Base class for physical package reader objects."""
+
+    def __contains__(self, item: object) -> bool:
+        """Must be implemented by each subclass."""
+        raise NotImplementedError(  # pragma: no cover
+            "`%s` must implement `.__contains__()`" % type(self).__name__
+        )
+
+    def __getitem__(self, pack_uri: PackURI) -> bytes:
+        """Blob for part corresponding to `pack_uri`."""
+        raise NotImplementedError(  # pragma: no cover
+            f"`{type(self).__name__}` must implement `.__contains__()`"
+        )
+
+    @classmethod
+    def factory(cls, pkg_file: str | IO[bytes]) -> _PhysPkgReader:
+        """Return |_PhysPkgReader| subtype instance appropriage for `pkg_file`."""
+        # --- for pkg_file other than str, assume it's a stream and pass it to Zip
+        # --- reader to sort out
+        if not isinstance(pkg_file, str):
+            return _ZipPkgReader(pkg_file)
+
+        # --- otherwise we treat `pkg_file` as a path ---
+        if os.path.isdir(pkg_file):
+            return _DirPkgReader(pkg_file)
+
+        if zipfile.is_zipfile(pkg_file):
+            return _ZipPkgReader(pkg_file)
+
+        raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
+
+
+class _DirPkgReader(_PhysPkgReader):
+    """Implements |PhysPkgReader| interface for OPC package extracted into directory.
+
+    `path` is the path to a directory containing an expanded package.
+    """
+
+    def __init__(self, path: str):
+        self._path = os.path.abspath(path)
+
+    def __contains__(self, pack_uri: object) -> bool:
+        """Return True when part identified by `pack_uri` is present in zip archive."""
+        if not isinstance(pack_uri, PackURI):
+            return False
+        return os.path.exists(posixpath.join(self._path, pack_uri.membername))
+
+    def __getitem__(self, pack_uri: PackURI) -> bytes:
+        """Return bytes of file corresponding to `pack_uri` in package directory."""
+        path = os.path.join(self._path, pack_uri.membername)
+        try:
+            with open(path, "rb") as f:
+                return f.read()
+        except IOError:
+            raise KeyError("no member '%s' in package" % pack_uri)
+
+
+class _ZipPkgReader(_PhysPkgReader):
+    """Implements |PhysPkgReader| interface for a zip-file OPC package."""
+
+    def __init__(self, pkg_file: str | IO[bytes]):
+        self._pkg_file = pkg_file
+
+    def __contains__(self, pack_uri: object) -> bool:
+        """Return True when part identified by `pack_uri` is present in zip archive."""
+        return pack_uri in self._blobs
+
+    def __getitem__(self, pack_uri: PackURI) -> bytes:
+        """Return bytes for part corresponding to `pack_uri`.
+
+        Raises |KeyError| if no matching member is present in zip archive.
+        """
+        if pack_uri not in self._blobs:
+            raise KeyError("no member '%s' in package" % pack_uri)
+        return self._blobs[pack_uri]
+
+    @lazyproperty
+    def _blobs(self) -> dict[PackURI, bytes]:
+        """dict mapping partname to package part binaries."""
+        with zipfile.ZipFile(self._pkg_file, "r") as z:
+            return {PackURI("/%s" % name): z.read(name) for name in z.namelist()}
+
+
+class _PhysPkgWriter:
+    """Base class for physical package writer objects."""
+
+    @classmethod
+    def factory(cls, pkg_file: str | IO[bytes]) -> _ZipPkgWriter:
+        """Return |_PhysPkgWriter| subtype instance appropriage for `pkg_file`.
+
+        Currently the only subtype is `_ZipPkgWriter`, but a `_DirPkgWriter` could be implemented
+        or even a `_StreamPkgWriter`.
+        """
+        return _ZipPkgWriter(pkg_file)
+
+    def write(self, pack_uri: PackURI, blob: bytes) -> None:
+        """Write `blob` to package with membername corresponding to `pack_uri`."""
+        raise NotImplementedError(  # pragma: no cover
+            f"`{type(self).__name__}` must implement `.write()`"
+        )
+
+
+class _ZipPkgWriter(_PhysPkgWriter):
+    """Implements |PhysPkgWriter| interface for a zip-file (.pptx file) OPC package."""
+
+    def __init__(self, pkg_file: str | IO[bytes]):
+        self._pkg_file = pkg_file
+
+    def __enter__(self) -> _ZipPkgWriter:
+        """Enable use as a context-manager. Opening zip for writing happens here."""
+        return self
+
+    def __exit__(self, *exc: list[Any]) -> None:
+        """Close the zip archive on exit from context.
+
+        Closing flushes any pending physical writes and releasing any resources it's using.
+        """
+        self._zipf.close()
+
+    def write(self, pack_uri: PackURI, blob: bytes) -> None:
+        """Write `blob` to zip package with membername corresponding to `pack_uri`."""
+        self._zipf.writestr(pack_uri.membername, blob)
+
+    @lazyproperty
+    def _zipf(self) -> zipfile.ZipFile:
+        """`ZipFile` instance open for writing."""
+        return zipfile.ZipFile(
+            self._pkg_file, "w", compression=zipfile.ZIP_DEFLATED, strict_timestamps=False
+        )
+
+
+class _ContentTypesItem:
+    """Composes content-types "part" ([Content_Types].xml) for a collection of parts."""
+
+    def __init__(self, parts: Sequence[Part]):
+        self._parts = parts
+
+    @classmethod
+    def xml_for(cls, parts: Sequence[Part]) -> CT_Types:
+        """Return content-types XML mapping each part in `parts` to a content-type.
+
+        The resulting XML is suitable for storage as `[Content_Types].xml` in an OPC package.
+        """
+        return cls(parts)._xml
+
+    @lazyproperty
+    def _xml(self) -> CT_Types:
+        """lxml.etree._Element containing the content-types item.
+
+        This XML object is suitable for serialization to the `[Content_Types].xml` item for an OPC
+        package. Although the sequence of elements is not strictly significant, as an aid to
+        testing and readability Default elements are sorted by extension and Override elements are
+        sorted by partname.
+        """
+        defaults, overrides = self._defaults_and_overrides
+        _types_elm = CT_Types.new()
+
+        for ext, content_type in sorted(defaults.items()):
+            _types_elm.add_default(ext, content_type)
+        for partname, content_type in sorted(overrides.items()):
+            _types_elm.add_override(partname, content_type)
+
+        return _types_elm
+
+    @lazyproperty
+    def _defaults_and_overrides(self) -> tuple[dict[str, str], dict[PackURI, str]]:
+        """pair of dict (defaults, overrides) accounting for all parts.
+
+        `defaults` is {ext: content_type} and overrides is {partname: content_type}.
+        """
+        defaults = CaseInsensitiveDict(rels=CT.OPC_RELATIONSHIPS, xml=CT.XML)
+        overrides: dict[PackURI, str] = {}
+
+        for part in self._parts:
+            partname, content_type = part.partname, part.content_type
+            ext = partname.ext
+            if (ext.lower(), content_type) in default_content_types:
+                defaults[ext] = content_type
+            else:
+                overrides[partname] = content_type
+
+        return defaults, overrides