about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml/text/run.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/run.py276
1 files changed, 276 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
new file mode 100644
index 00000000..88efae83
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
@@ -0,0 +1,276 @@
+"""Custom element classes related to text runs (CT_R)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, Iterator, List
+
+from docx.oxml.drawing import CT_Drawing
+from docx.oxml.ns import qn
+from docx.oxml.simpletypes import ST_BrClear, ST_BrType
+from docx.oxml.text.font import CT_RPr
+from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
+from docx.shared import TextAccumulator
+
+if TYPE_CHECKING:
+    from docx.oxml.shape import CT_Anchor, CT_Inline
+    from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+    from docx.oxml.text.parfmt import CT_TabStop
+
+# ------------------------------------------------------------------------------------
+# Run-level elements
+
+
+class CT_R(BaseOxmlElement):
+    """`<w:r>` element, containing the properties and text for a run."""
+
+    add_br: Callable[[], CT_Br]
+    add_tab: Callable[[], CT_TabStop]
+    get_or_add_rPr: Callable[[], CT_RPr]
+    _add_drawing: Callable[[], CT_Drawing]
+    _add_t: Callable[..., CT_Text]
+
+    rPr: CT_RPr | None = ZeroOrOne("w:rPr")  # pyright: ignore[reportAssignmentType]
+    br = ZeroOrMore("w:br")
+    cr = ZeroOrMore("w:cr")
+    drawing = ZeroOrMore("w:drawing")
+    t = ZeroOrMore("w:t")
+    tab = ZeroOrMore("w:tab")
+
+    def add_t(self, text: str) -> CT_Text:
+        """Return a newly added `<w:t>` element containing `text`."""
+        t = self._add_t(text=text)
+        if len(text.strip()) < len(text):
+            t.set(qn("xml:space"), "preserve")
+        return t
+
+    def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
+        """Return newly appended `CT_Drawing` (`w:drawing`) child element.
+
+        The `w:drawing` element has `inline_or_anchor` as its child.
+        """
+        drawing = self._add_drawing()
+        drawing.append(inline_or_anchor)
+        return drawing
+
+    def clear_content(self) -> None:
+        """Remove all child elements except a `w:rPr` element if present."""
+        # -- remove all run inner-content except a `w:rPr` when present. --
+        for e in self.xpath("./*[not(self::w:rPr)]"):
+            self.remove(e)
+
+    @property
+    def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
+        """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
+        from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+        accum = TextAccumulator()
+
+        def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
+            for e in self.xpath(
+                "w:br"
+                " | w:cr"
+                " | w:drawing"
+                " | w:lastRenderedPageBreak"
+                " | w:noBreakHyphen"
+                " | w:ptab"
+                " | w:t"
+                " | w:tab"
+            ):
+                if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
+                    yield from accum.pop()
+                    yield e
+                else:
+                    accum.push(str(e))
+
+            # -- don't forget the "tail" string --
+            yield from accum.pop()
+
+        return list(iter_items())
+
+    @property
+    def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+        """All `w:lastRenderedPageBreaks` descendants of this run."""
+        return self.xpath("./w:lastRenderedPageBreak")
+
+    @property
+    def style(self) -> str | None:
+        """String contained in `w:val` attribute of `w:rStyle` grandchild.
+
+        |None| if that element is not present.
+        """
+        rPr = self.rPr
+        if rPr is None:
+            return None
+        return rPr.style
+
+    @style.setter
+    def style(self, style: str | None):
+        """Set character style of this `w:r` element to `style`.
+
+        If `style` is None, remove the style element.
+        """
+        rPr = self.get_or_add_rPr()
+        rPr.style = style
+
+    @property
+    def text(self) -> str:
+        """The textual content of this run.
+
+        Inner-content child elements like `w:tab` are translated to their text
+        equivalent.
+        """
+        return "".join(
+            str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
+        )
+
+    @text.setter
+    def text(self, text: str):  # pyright: ignore[reportIncompatibleMethodOverride]
+        self.clear_content()
+        _RunContentAppender.append_to_run_from_text(self, text)
+
+    def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
+        self.insert(0, rPr)
+        return rPr
+
+
+# ------------------------------------------------------------------------------------
+# Run inner-content elements
+
+
+class CT_Br(BaseOxmlElement):
+    """`<w:br>` element, indicating a line, page, or column break in a run."""
+
+    type: str | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:type", ST_BrType, default="textWrapping"
+    )
+    clear: str | None = OptionalAttribute("w:clear", ST_BrClear)  # pyright: ignore
+
+    def __str__(self) -> str:
+        """Text equivalent of this element. Actual value depends on break type.
+
+        A line break is translated as "\n". Column and page breaks produce the empty
+        string ("").
+
+        This allows the text of run inner-content to be accessed in a consistent way
+        for all run inner-context text elements.
+        """
+        return "\n" if self.type == "textWrapping" else ""
+
+
+class CT_Cr(BaseOxmlElement):
+    """`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
+
+    In Word, this represents a "soft carriage-return" in the sense that it does not end
+    the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
+    text equivalent is considered to be newline ("\n") since in plain-text that's the
+    closest Python equivalent.
+
+    NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
+    `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
+    for many elements.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single newline ("\n")."""
+        return "\n"
+
+
+class CT_NoBreakHyphen(BaseOxmlElement):
+    """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
+
+    This maps to a plain-text dash ("-").
+
+    NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
+    maps to `CT_Empty`. This name was added to give it behavior distinguished from the
+    many other elements represented in the schema by CT_Empty.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single dash character ("-")."""
+        return "-"
+
+
+class CT_PTab(BaseOxmlElement):
+    """`<w:ptab>` element, representing an absolute-position tab character within a run.
+
+    This character advances the rendering position to the specified position regardless
+    of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single tab ("\t") character.
+
+        This allows the text of run inner-content to be accessed in a consistent way
+        for all run inner-context text elements.
+        """
+        return "\t"
+
+
+# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
+# -- element class provides the __str__() method for this empty element, unconditionally
+# -- returning "\t".
+
+
+class CT_Text(BaseOxmlElement):
+    """`<w:t>` element, containing a sequence of characters within a run."""
+
+    def __str__(self) -> str:
+        """Text contained in this element, the empty string if it has no content.
+
+        This property allows this run inner-content element to be queried for its text
+        the same way as other run-content elements are. In particular, this never
+        returns None, as etree._Element does when there is no content.
+        """
+        return self.text or ""
+
+
+# ------------------------------------------------------------------------------------
+# Utility
+
+
+class _RunContentAppender:
+    """Translates a Python string into run content elements appended in a `w:r` element.
+
+    Contiguous sequences of regular characters are appended in a single `<w:t>` element.
+    Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
+    newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
+    appended.
+    """
+
+    def __init__(self, r: CT_R):
+        self._r = r
+        self._bfr: List[str] = []
+
+    @classmethod
+    def append_to_run_from_text(cls, r: CT_R, text: str):
+        """Append inner-content elements for `text` to `r` element."""
+        appender = cls(r)
+        appender.add_text(text)
+
+    def add_text(self, text: str):
+        """Append inner-content elements for `text` to the `w:r` element."""
+        for char in text:
+            self.add_char(char)
+        self.flush()
+
+    def add_char(self, char: str):
+        """Process next character of input through finite state maching (FSM).
+
+        There are two possible states, buffer pending and not pending, but those are
+        hidden behind the `.flush()` method which must be called at the end of text to
+        ensure any pending `<w:t>` element is written.
+        """
+        if char == "\t":
+            self.flush()
+            self._r.add_tab()
+        elif char in "\r\n":
+            self.flush()
+            self._r.add_br()
+        else:
+            self._bfr.append(char)
+
+    def flush(self):
+        text = "".join(self._bfr)
+        if text:
+            self._r.add_t(text)
+        self._bfr.clear()