about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/docx/image/jpeg.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/image/jpeg.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/image/jpeg.py429
1 files changed, 429 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/image/jpeg.py b/.venv/lib/python3.12/site-packages/docx/image/jpeg.py
new file mode 100644
index 00000000..b0114a99
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/image/jpeg.py
@@ -0,0 +1,429 @@
+"""Objects related to parsing headers of JPEG image streams.
+
+Includes both JFIF and Exif sub-formats.
+"""
+
+import io
+
+from docx.image.constants import JPEG_MARKER_CODE, MIME_TYPE
+from docx.image.helpers import BIG_ENDIAN, StreamReader
+from docx.image.image import BaseImageHeader
+from docx.image.tiff import Tiff
+
+
+class Jpeg(BaseImageHeader):
+    """Base class for JFIF and EXIF subclasses."""
+
+    @property
+    def content_type(self):
+        """MIME content type for this image, unconditionally `image/jpeg` for JPEG
+        images."""
+        return MIME_TYPE.JPEG
+
+    @property
+    def default_ext(self):
+        """Default filename extension, always 'jpg' for JPG images."""
+        return "jpg"
+
+
+class Exif(Jpeg):
+    """Image header parser for Exif image format."""
+
+    @classmethod
+    def from_stream(cls, stream):
+        """Return |Exif| instance having header properties parsed from Exif image in
+        `stream`."""
+        markers = _JfifMarkers.from_stream(stream)
+        # print('\n%s' % markers)
+
+        px_width = markers.sof.px_width
+        px_height = markers.sof.px_height
+        horz_dpi = markers.app1.horz_dpi
+        vert_dpi = markers.app1.vert_dpi
+
+        return cls(px_width, px_height, horz_dpi, vert_dpi)
+
+
+class Jfif(Jpeg):
+    """Image header parser for JFIF image format."""
+
+    @classmethod
+    def from_stream(cls, stream):
+        """Return a |Jfif| instance having header properties parsed from image in
+        `stream`."""
+        markers = _JfifMarkers.from_stream(stream)
+
+        px_width = markers.sof.px_width
+        px_height = markers.sof.px_height
+        horz_dpi = markers.app0.horz_dpi
+        vert_dpi = markers.app0.vert_dpi
+
+        return cls(px_width, px_height, horz_dpi, vert_dpi)
+
+
+class _JfifMarkers:
+    """Sequence of markers in a JPEG file, perhaps truncated at first SOS marker for
+    performance reasons."""
+
+    def __init__(self, markers):
+        super(_JfifMarkers, self).__init__()
+        self._markers = list(markers)
+
+    def __str__(self):  # pragma: no cover
+        """Returns a tabular listing of the markers in this instance, which can be handy
+        for debugging and perhaps other uses."""
+        header = " offset  seglen  mc  name\n=======  ======  ==  ====="
+        tmpl = "%7d  %6d  %02X  %s"
+        rows = []
+        for marker in self._markers:
+            rows.append(
+                tmpl
+                % (
+                    marker.offset,
+                    marker.segment_length,
+                    ord(marker.marker_code),
+                    marker.name,
+                )
+            )
+        lines = [header] + rows
+        return "\n".join(lines)
+
+    @classmethod
+    def from_stream(cls, stream):
+        """Return a |_JfifMarkers| instance containing a |_JfifMarker| subclass instance
+        for each marker in `stream`."""
+        marker_parser = _MarkerParser.from_stream(stream)
+        markers = []
+        for marker in marker_parser.iter_markers():
+            markers.append(marker)
+            if marker.marker_code == JPEG_MARKER_CODE.SOS:
+                break
+        return cls(markers)
+
+    @property
+    def app0(self):
+        """First APP0 marker in image markers."""
+        for m in self._markers:
+            if m.marker_code == JPEG_MARKER_CODE.APP0:
+                return m
+        raise KeyError("no APP0 marker in image")
+
+    @property
+    def app1(self):
+        """First APP1 marker in image markers."""
+        for m in self._markers:
+            if m.marker_code == JPEG_MARKER_CODE.APP1:
+                return m
+        raise KeyError("no APP1 marker in image")
+
+    @property
+    def sof(self):
+        """First start of frame (SOFn) marker in this sequence."""
+        for m in self._markers:
+            if m.marker_code in JPEG_MARKER_CODE.SOF_MARKER_CODES:
+                return m
+        raise KeyError("no start of frame (SOFn) marker in image")
+
+
+class _MarkerParser:
+    """Service class that knows how to parse a JFIF stream and iterate over its
+    markers."""
+
+    def __init__(self, stream_reader):
+        super(_MarkerParser, self).__init__()
+        self._stream = stream_reader
+
+    @classmethod
+    def from_stream(cls, stream):
+        """Return a |_MarkerParser| instance to parse JFIF markers from `stream`."""
+        stream_reader = StreamReader(stream, BIG_ENDIAN)
+        return cls(stream_reader)
+
+    def iter_markers(self):
+        """Generate a (marker_code, segment_offset) 2-tuple for each marker in the JPEG
+        `stream`, in the order they occur in the stream."""
+        marker_finder = _MarkerFinder.from_stream(self._stream)
+        start = 0
+        marker_code = None
+        while marker_code != JPEG_MARKER_CODE.EOI:
+            marker_code, segment_offset = marker_finder.next(start)
+            marker = _MarkerFactory(marker_code, self._stream, segment_offset)
+            yield marker
+            start = segment_offset + marker.segment_length
+
+
+class _MarkerFinder:
+    """Service class that knows how to find the next JFIF marker in a stream."""
+
+    def __init__(self, stream):
+        super(_MarkerFinder, self).__init__()
+        self._stream = stream
+
+    @classmethod
+    def from_stream(cls, stream):
+        """Return a |_MarkerFinder| instance to find JFIF markers in `stream`."""
+        return cls(stream)
+
+    def next(self, start):
+        """Return a (marker_code, segment_offset) 2-tuple identifying and locating the
+        first marker in `stream` occuring after offset `start`.
+
+        The returned `segment_offset` points to the position immediately following the
+        2-byte marker code, the start of the marker segment, for those markers that have
+        a segment.
+        """
+        position = start
+        while True:
+            # skip over any non-\xFF bytes
+            position = self._offset_of_next_ff_byte(start=position)
+            # skip over any \xFF padding bytes
+            position, byte_ = self._next_non_ff_byte(start=position + 1)
+            # 'FF 00' sequence is not a marker, start over if found
+            if byte_ == b"\x00":
+                continue
+            # this is a marker, gather return values and break out of scan
+            marker_code, segment_offset = byte_, position + 1
+            break
+        return marker_code, segment_offset
+
+    def _next_non_ff_byte(self, start):
+        """Return an offset, byte 2-tuple for the next byte in `stream` that is not
+        '\xFF', starting with the byte at offset `start`.
+
+        If the byte at offset `start` is not '\xFF', `start` and the returned `offset`
+        will be the same.
+        """
+        self._stream.seek(start)
+        byte_ = self._read_byte()
+        while byte_ == b"\xFF":
+            byte_ = self._read_byte()
+        offset_of_non_ff_byte = self._stream.tell() - 1
+        return offset_of_non_ff_byte, byte_
+
+    def _offset_of_next_ff_byte(self, start):
+        """Return the offset of the next '\xFF' byte in `stream` starting with the byte
+        at offset `start`.
+
+        Returns `start` if the byte at that offset is a hex 255; it does not necessarily
+        advance in the stream.
+        """
+        self._stream.seek(start)
+        byte_ = self._read_byte()
+        while byte_ != b"\xFF":
+            byte_ = self._read_byte()
+        offset_of_ff_byte = self._stream.tell() - 1
+        return offset_of_ff_byte
+
+    def _read_byte(self):
+        """Return the next byte read from stream.
+
+        Raise Exception if stream is at end of file.
+        """
+        byte_ = self._stream.read(1)
+        if not byte_:  # pragma: no cover
+            raise Exception("unexpected end of file")
+        return byte_
+
+
+def _MarkerFactory(marker_code, stream, offset):
+    """Return |_Marker| or subclass instance appropriate for marker at `offset` in
+    `stream` having `marker_code`."""
+    if marker_code == JPEG_MARKER_CODE.APP0:
+        marker_cls = _App0Marker
+    elif marker_code == JPEG_MARKER_CODE.APP1:
+        marker_cls = _App1Marker
+    elif marker_code in JPEG_MARKER_CODE.SOF_MARKER_CODES:
+        marker_cls = _SofMarker
+    else:
+        marker_cls = _Marker
+    return marker_cls.from_stream(stream, marker_code, offset)
+
+
+class _Marker:
+    """Base class for JFIF marker classes.
+
+    Represents a marker and its segment occuring in a JPEG byte stream.
+    """
+
+    def __init__(self, marker_code, offset, segment_length):
+        super(_Marker, self).__init__()
+        self._marker_code = marker_code
+        self._offset = offset
+        self._segment_length = segment_length
+
+    @classmethod
+    def from_stream(cls, stream, marker_code, offset):
+        """Return a generic |_Marker| instance for the marker at `offset` in `stream`
+        having `marker_code`."""
+        if JPEG_MARKER_CODE.is_standalone(marker_code):
+            segment_length = 0
+        else:
+            segment_length = stream.read_short(offset)
+        return cls(marker_code, offset, segment_length)
+
+    @property
+    def marker_code(self):
+        """The single-byte code that identifies the type of this marker, e.g. ``'\xE0'``
+        for start of image (SOI)."""
+        return self._marker_code
+
+    @property
+    def name(self):  # pragma: no cover
+        return JPEG_MARKER_CODE.marker_names[self._marker_code]
+
+    @property
+    def offset(self):  # pragma: no cover
+        return self._offset
+
+    @property
+    def segment_length(self):
+        """The length in bytes of this marker's segment."""
+        return self._segment_length
+
+
+class _App0Marker(_Marker):
+    """Represents a JFIF APP0 marker segment."""
+
+    def __init__(
+        self, marker_code, offset, length, density_units, x_density, y_density
+    ):
+        super(_App0Marker, self).__init__(marker_code, offset, length)
+        self._density_units = density_units
+        self._x_density = x_density
+        self._y_density = y_density
+
+    @property
+    def horz_dpi(self):
+        """Horizontal dots per inch specified in this marker, defaults to 72 if not
+        specified."""
+        return self._dpi(self._x_density)
+
+    @property
+    def vert_dpi(self):
+        """Vertical dots per inch specified in this marker, defaults to 72 if not
+        specified."""
+        return self._dpi(self._y_density)
+
+    def _dpi(self, density):
+        """Return dots per inch corresponding to `density` value."""
+        if self._density_units == 1:
+            dpi = density
+        elif self._density_units == 2:
+            dpi = int(round(density * 2.54))
+        else:
+            dpi = 72
+        return dpi
+
+    @classmethod
+    def from_stream(cls, stream, marker_code, offset):
+        """Return an |_App0Marker| instance for the APP0 marker at `offset` in
+        `stream`."""
+        # field               off  type   notes
+        # ------------------  ---  -----  -------------------
+        # segment length       0   short
+        # JFIF identifier      2   5 chr  'JFIF\x00'
+        # major JPEG version   7   byte   typically 1
+        # minor JPEG version   8   byte   typically 1 or 2
+        # density units        9   byte   1=inches, 2=cm
+        # horz dots per unit  10   short
+        # vert dots per unit  12   short
+        # ------------------  ---  -----  -------------------
+        segment_length = stream.read_short(offset)
+        density_units = stream.read_byte(offset, 9)
+        x_density = stream.read_short(offset, 10)
+        y_density = stream.read_short(offset, 12)
+        return cls(
+            marker_code, offset, segment_length, density_units, x_density, y_density
+        )
+
+
+class _App1Marker(_Marker):
+    """Represents a JFIF APP1 (Exif) marker segment."""
+
+    def __init__(self, marker_code, offset, length, horz_dpi, vert_dpi):
+        super(_App1Marker, self).__init__(marker_code, offset, length)
+        self._horz_dpi = horz_dpi
+        self._vert_dpi = vert_dpi
+
+    @classmethod
+    def from_stream(cls, stream, marker_code, offset):
+        """Extract the horizontal and vertical dots-per-inch value from the APP1 header
+        at `offset` in `stream`."""
+        # field                 off  len  type   notes
+        # --------------------  ---  ---  -----  ----------------------------
+        # segment length         0    2   short
+        # Exif identifier        2    6   6 chr  'Exif\x00\x00'
+        # TIFF byte order        8    2   2 chr  'II'=little 'MM'=big endian
+        # meaning of universe   10    2   2 chr  '*\x00' or '\x00*' depending
+        # IFD0 off fr/II or MM  10   16   long   relative to ...?
+        # --------------------  ---  ---  -----  ----------------------------
+        segment_length = stream.read_short(offset)
+        if cls._is_non_Exif_APP1_segment(stream, offset):
+            return cls(marker_code, offset, segment_length, 72, 72)
+        tiff = cls._tiff_from_exif_segment(stream, offset, segment_length)
+        return cls(marker_code, offset, segment_length, tiff.horz_dpi, tiff.vert_dpi)
+
+    @property
+    def horz_dpi(self):
+        """Horizontal dots per inch specified in this marker, defaults to 72 if not
+        specified."""
+        return self._horz_dpi
+
+    @property
+    def vert_dpi(self):
+        """Vertical dots per inch specified in this marker, defaults to 72 if not
+        specified."""
+        return self._vert_dpi
+
+    @classmethod
+    def _is_non_Exif_APP1_segment(cls, stream, offset):
+        """Return True if the APP1 segment at `offset` in `stream` is NOT an Exif
+        segment, as determined by the ``'Exif\x00\x00'`` signature at offset 2 in the
+        segment."""
+        stream.seek(offset + 2)
+        exif_signature = stream.read(6)
+        return exif_signature != b"Exif\x00\x00"
+
+    @classmethod
+    def _tiff_from_exif_segment(cls, stream, offset, segment_length):
+        """Return a |Tiff| instance parsed from the Exif APP1 segment of
+        `segment_length` at `offset` in `stream`."""
+        # wrap full segment in its own stream and feed to Tiff()
+        stream.seek(offset + 8)
+        segment_bytes = stream.read(segment_length - 8)
+        substream = io.BytesIO(segment_bytes)
+        return Tiff.from_stream(substream)
+
+
+class _SofMarker(_Marker):
+    """Represents a JFIF start of frame (SOFx) marker segment."""
+
+    def __init__(self, marker_code, offset, segment_length, px_width, px_height):
+        super(_SofMarker, self).__init__(marker_code, offset, segment_length)
+        self._px_width = px_width
+        self._px_height = px_height
+
+    @classmethod
+    def from_stream(cls, stream, marker_code, offset):
+        """Return an |_SofMarker| instance for the SOFn marker at `offset` in stream."""
+        # field                 off  type   notes
+        # ------------------  ---  -----  ----------------------------
+        # segment length       0   short
+        # Data precision       2   byte
+        # Vertical lines       3   short  px_height
+        # Horizontal lines     5   short  px_width
+        # ------------------  ---  -----  ----------------------------
+        segment_length = stream.read_short(offset)
+        px_height = stream.read_short(offset, 3)
+        px_width = stream.read_short(offset, 5)
+        return cls(marker_code, offset, segment_length, px_width, px_height)
+
+    @property
+    def px_height(self):
+        """Image height in pixels."""
+        return self._px_height
+
+    @property
+    def px_width(self):
+        """Image width in pixels."""
+        return self._px_width