two version of R2R are hereHEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/oxml/text
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
7 files changed, 1454 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py
new file mode 100644
index 00000000..140086aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py
@@ -0,0 +1,356 @@
+"""Custom element classes related to run properties (font)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+from docx.enum.dml import MSO_THEME_COLOR
+from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE
+from docx.oxml.ns import nsdecls
+from docx.oxml.parser import parse_xml
+from docx.oxml.simpletypes import (
+    ST_HexColor,
+    ST_HpsMeasure,
+    ST_String,
+    ST_VerticalAlignRun,
+)
+from docx.oxml.xmlchemy import (
+    BaseOxmlElement,
+    OptionalAttribute,
+    RequiredAttribute,
+    ZeroOrOne,
+)
+
+if TYPE_CHECKING:
+    from docx.oxml.shared import CT_OnOff, CT_String
+    from docx.shared import Length
+
+
+class CT_Color(BaseOxmlElement):
+    """`w:color` element, specifying the color of a font and perhaps other objects."""
+
+    val = RequiredAttribute("w:val", ST_HexColor)
+    themeColor = OptionalAttribute("w:themeColor", MSO_THEME_COLOR)
+
+
+class CT_Fonts(BaseOxmlElement):
+    """`<w:rFonts>` element.
+
+    Specifies typeface name for the various language types.
+    """
+
+    ascii: str | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:ascii", ST_String
+    )
+    hAnsi: str | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:hAnsi", ST_String
+    )
+
+
+class CT_Highlight(BaseOxmlElement):
+    """`w:highlight` element, specifying font highlighting/background color."""
+
+    val: WD_COLOR_INDEX = RequiredAttribute(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:val", WD_COLOR_INDEX
+    )
+
+
+class CT_HpsMeasure(BaseOxmlElement):
+    """Used for `<w:sz>` element and others, specifying font size in half-points."""
+
+    val: Length = RequiredAttribute(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:val", ST_HpsMeasure
+    )
+
+
+class CT_RPr(BaseOxmlElement):
+    """`<w:rPr>` element, containing the properties for a run."""
+
+    get_or_add_highlight: Callable[[], CT_Highlight]
+    get_or_add_rFonts: Callable[[], CT_Fonts]
+    get_or_add_sz: Callable[[], CT_HpsMeasure]
+    get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun]
+    _add_rStyle: Callable[..., CT_String]
+    _add_u: Callable[[], CT_Underline]
+    _remove_highlight: Callable[[], None]
+    _remove_rFonts: Callable[[], None]
+    _remove_rStyle: Callable[[], None]
+    _remove_sz: Callable[[], None]
+    _remove_u: Callable[[], None]
+    _remove_vertAlign: Callable[[], None]
+
+    _tag_seq = (
+        "w:rStyle",
+        "w:rFonts",
+        "w:b",
+        "w:bCs",
+        "w:i",
+        "w:iCs",
+        "w:caps",
+        "w:smallCaps",
+        "w:strike",
+        "w:dstrike",
+        "w:outline",
+        "w:shadow",
+        "w:emboss",
+        "w:imprint",
+        "w:noProof",
+        "w:snapToGrid",
+        "w:vanish",
+        "w:webHidden",
+        "w:color",
+        "w:spacing",
+        "w:w",
+        "w:kern",
+        "w:position",
+        "w:sz",
+        "w:szCs",
+        "w:highlight",
+        "w:u",
+        "w:effect",
+        "w:bdr",
+        "w:shd",
+        "w:fitText",
+        "w:vertAlign",
+        "w:rtl",
+        "w:cs",
+        "w:em",
+        "w:lang",
+        "w:eastAsianLayout",
+        "w:specVanish",
+        "w:oMath",
+    )
+    rStyle: CT_String | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:rStyle", successors=_tag_seq[1:]
+    )
+    rFonts: CT_Fonts | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:rFonts", successors=_tag_seq[2:]
+    )
+    b: CT_OnOff | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:b", successors=_tag_seq[3:]
+    )
+    bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:])
+    i = ZeroOrOne("w:i", successors=_tag_seq[5:])
+    iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:])
+    caps = ZeroOrOne("w:caps", successors=_tag_seq[7:])
+    smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:])
+    strike = ZeroOrOne("w:strike", successors=_tag_seq[9:])
+    dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:])
+    outline = ZeroOrOne("w:outline", successors=_tag_seq[11:])
+    shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:])
+    emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:])
+    imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:])
+    noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:])
+    snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:])
+    vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:])
+    webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:])
+    color = ZeroOrOne("w:color", successors=_tag_seq[19:])
+    sz: CT_HpsMeasure | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:sz", successors=_tag_seq[24:]
+    )
+    highlight: CT_Highlight | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:highlight", successors=_tag_seq[26:]
+    )
+    u: CT_Underline | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:u", successors=_tag_seq[27:]
+    )
+    vertAlign: CT_VerticalAlignRun | None = ZeroOrOne(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:vertAlign", successors=_tag_seq[32:]
+    )
+    rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:])
+    cs = ZeroOrOne("w:cs", successors=_tag_seq[34:])
+    specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:])
+    oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:])
+    del _tag_seq
+
+    def _new_color(self):
+        """Override metaclass method to set `w:color/@val` to RGB black on create."""
+        return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w"))
+
+    @property
+    def highlight_val(self) -> WD_COLOR_INDEX | None:
+        """Value of `./w:highlight/@val`.
+
+        Specifies font's highlight color, or `None` if the text is not highlighted.
+        """
+        highlight = self.highlight
+        if highlight is None:
+            return None
+        return highlight.val
+
+    @highlight_val.setter
+    def highlight_val(self, value: WD_COLOR_INDEX | None) -> None:
+        if value is None:
+            self._remove_highlight()
+            return
+        highlight = self.get_or_add_highlight()
+        highlight.val = value
+
+    @property
+    def rFonts_ascii(self) -> str | None:
+        """The value of `w:rFonts/@w:ascii` or |None| if not present.
+
+        Represents the assigned typeface name. The rFonts element also specifies other
+        special-case typeface names; this method handles the case where just the common
+        name is required.
+        """
+        rFonts = self.rFonts
+        if rFonts is None:
+            return None
+        return rFonts.ascii
+
+    @rFonts_ascii.setter
+    def rFonts_ascii(self, value: str | None) -> None:
+        if value is None:
+            self._remove_rFonts()
+            return
+        rFonts = self.get_or_add_rFonts()
+        rFonts.ascii = value
+
+    @property
+    def rFonts_hAnsi(self) -> str | None:
+        """The value of `w:rFonts/@w:hAnsi` or |None| if not present."""
+        rFonts = self.rFonts
+        if rFonts is None:
+            return None
+        return rFonts.hAnsi
+
+    @rFonts_hAnsi.setter
+    def rFonts_hAnsi(self, value: str | None):
+        if value is None and self.rFonts is None:
+            return
+        rFonts = self.get_or_add_rFonts()
+        rFonts.hAnsi = value
+
+    @property
+    def style(self) -> str | None:
+        """String in `./w:rStyle/@val`, or None if `w:rStyle` is not present."""
+        rStyle = self.rStyle
+        if rStyle is None:
+            return None
+        return rStyle.val
+
+    @style.setter
+    def style(self, style: str | None) -> None:
+        """Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary.
+
+        If `style` is |None|, remove `w:rStyle` element if present.
+        """
+        if style is None:
+            self._remove_rStyle()
+        elif self.rStyle is None:
+            self._add_rStyle(val=style)
+        else:
+            self.rStyle.val = style
+
+    @property
+    def subscript(self) -> bool | None:
+        """|True| if `./w:vertAlign/@w:val` is "subscript".
+
+        |False| if `w:vertAlign/@w:val` contains any other value. |None| if
+        `w:vertAlign` is not present.
+        """
+        vertAlign = self.vertAlign
+        if vertAlign is None:
+            return None
+        if vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
+            return True
+        return False
+
+    @subscript.setter
+    def subscript(self, value: bool | None) -> None:
+        if value is None:
+            self._remove_vertAlign()
+        elif bool(value) is True:
+            self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT
+        # -- assert bool(value) is False --
+        elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
+            self._remove_vertAlign()
+
+    @property
+    def superscript(self) -> bool | None:
+        """|True| if `w:vertAlign/@w:val` is 'superscript'.
+
+        |False| if `w:vertAlign/@w:val` contains any other value. |None| if
+        `w:vertAlign` is not present.
+        """
+        vertAlign = self.vertAlign
+        if vertAlign is None:
+            return None
+        if vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
+            return True
+        return False
+
+    @superscript.setter
+    def superscript(self, value: bool | None):
+        if value is None:
+            self._remove_vertAlign()
+        elif bool(value) is True:
+            self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT
+        # -- assert bool(value) is False --
+        elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
+            self._remove_vertAlign()
+
+    @property
+    def sz_val(self) -> Length | None:
+        """The value of `w:sz/@w:val` or |None| if not present."""
+        sz = self.sz
+        if sz is None:
+            return None
+        return sz.val
+
+    @sz_val.setter
+    def sz_val(self, value: Length | None):
+        if value is None:
+            self._remove_sz()
+            return
+        sz = self.get_or_add_sz()
+        sz.val = value
+
+    @property
+    def u_val(self) -> WD_UNDERLINE | None:
+        """Value of `w:u/@val`, or None if not present.
+
+        Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and
+        `False` respectively.
+        """
+        u = self.u
+        if u is None:
+            return None
+        return u.val
+
+    @u_val.setter
+    def u_val(self, value: WD_UNDERLINE | None):
+        self._remove_u()
+        if value is not None:
+            self._add_u().val = value
+
+    def _get_bool_val(self, name: str) -> bool | None:
+        """Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps"."""
+        element = getattr(self, name)
+        if element is None:
+            return None
+        return element.val
+
+    def _set_bool_val(self, name: str, value: bool | None):
+        if value is None:
+            getattr(self, "_remove_%s" % name)()
+            return
+        element = getattr(self, "get_or_add_%s" % name)()
+        element.val = value
+
+
+class CT_Underline(BaseOxmlElement):
+    """`<w:u>` element, specifying the underlining style for a run."""
+
+    val: WD_UNDERLINE | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:val", WD_UNDERLINE
+    )
+
+
+class CT_VerticalAlignRun(BaseOxmlElement):
+    """`<w:vertAlign>` element, specifying subscript or superscript."""
+
+    val: str = RequiredAttribute(  # pyright: ignore[reportGeneralTypeIssues]
+        "w:val", ST_VerticalAlignRun
+    )
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py
new file mode 100644
index 00000000..38a33ff1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py
@@ -0,0 +1,45 @@
+"""Custom element classes related to hyperlinks (CT_Hyperlink)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, List
+
+from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString
+from docx.oxml.text.run import CT_R
+from docx.oxml.xmlchemy import (
+    BaseOxmlElement,
+    OptionalAttribute,
+    ZeroOrMore,
+)
+
+if TYPE_CHECKING:
+    from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+
+class CT_Hyperlink(BaseOxmlElement):
+    """`<w:hyperlink>` element, containing the text and address for a hyperlink."""
+
+    r_lst: List[CT_R]
+
+    rId: str | None = OptionalAttribute("r:id", XsdString)  # pyright: ignore[reportAssignmentType]
+    anchor: str | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:anchor", ST_String
+    )
+    history: bool = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:history", ST_OnOff, default=True
+    )
+
+    r = ZeroOrMore("w:r")
+
+    @property
+    def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+        """All `w:lastRenderedPageBreak` descendants of this hyperlink."""
+        return self.xpath("./w:r/w:lastRenderedPageBreak")
+
+    @property
+    def text(self) -> str:  # pyright: ignore[reportIncompatibleMethodOverride]
+        """The textual content of this hyperlink.
+
+        `CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children.
+        """
+        return "".join(r.text for r in self.xpath("w:r"))
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
new file mode 100644
index 00000000..943f9b6c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
@@ -0,0 +1,284 @@
+"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
+
+from __future__ import annotations
+
+import copy
+from typing import TYPE_CHECKING
+
+from docx.oxml.xmlchemy import BaseOxmlElement
+from docx.shared import lazyproperty
+
+if TYPE_CHECKING:
+    from docx.oxml.text.hyperlink import CT_Hyperlink
+    from docx.oxml.text.paragraph import CT_P
+
+
+class CT_LastRenderedPageBreak(BaseOxmlElement):
+    """`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
+
+    A rendered page-break is one inserted by the renderer when it runs out of room on a
+    page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
+    CT_Text.
+
+    NOTE: this complex-type name does not exist in the schema, where
+    `w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
+    distinguished behavior. CT_Empty is used for many elements.
+    """
+
+    @property
+    def following_fragment_p(self) -> CT_P:
+        """A "loose" `CT_P` containing only the paragraph content before this break.
+
+        Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+        page-break in its paragraph.
+
+        The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+        page-break with this `w:lastRenderedPageBreak` element and all content preceding
+        it removed.
+
+        NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
+        (when the paragraph contained more than one). While this is rare, the caller
+        should treat this paragraph the same as other paragraphs and split it if
+        necessary in a folloing step or recursion.
+        """
+        if not self == self._first_lrpb_in_p(self._enclosing_p):
+            raise ValueError("only defined on first rendered page-break in paragraph")
+
+        # -- splitting approach is different when break is inside a hyperlink --
+        return (
+            self._following_frag_in_hlink
+            if self._is_in_hyperlink
+            else self._following_frag_in_run
+        )
+
+    @property
+    def follows_all_content(self) -> bool:
+        """True when this page-break element is the last "content" in the paragraph.
+
+        This is very uncommon case and may only occur in contrived or cases where the
+        XML is edited by hand, but it is not precluded by the spec.
+        """
+        # -- a page-break inside a hyperlink never meets these criteria (for our
+        # -- purposes at least) because it is considered "atomic" and always associated
+        # -- with the page it starts on.
+        if self._is_in_hyperlink:
+            return False
+
+        return bool(
+            # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+            self._enclosing_p.xpath(
+                # -- in first run of paragraph --
+                f"(./w:r)[last()]"
+                # -- all page-breaks --
+                f"/w:lastRenderedPageBreak"
+                # -- that are not preceded by any content-bearing elements --
+                f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
+            )
+        )
+
+    @property
+    def precedes_all_content(self) -> bool:
+        """True when a `w:lastRenderedPageBreak` precedes all paragraph content.
+
+        This is a common case; it occurs whenever the page breaks on an even paragraph
+        boundary.
+        """
+        # -- a page-break inside a hyperlink never meets these criteria because there
+        # -- is always part of the hyperlink text before the page-break.
+        if self._is_in_hyperlink:
+            return False
+
+        return bool(
+            # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+            self._enclosing_p.xpath(
+                # -- in first run of paragraph --
+                f"./w:r[1]"
+                # -- all page-breaks --
+                f"/w:lastRenderedPageBreak"
+                # -- that are not preceded by any content-bearing elements --
+                f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
+            )
+        )
+
+    @property
+    def preceding_fragment_p(self) -> CT_P:
+        """A "loose" `CT_P` containing only the paragraph content before this break.
+
+        Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+        paragraph in its paragraph.
+
+        The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+        page-break with this `w:lastRenderedPageBreak` element and all its following
+        siblings removed.
+        """
+        if not self == self._first_lrpb_in_p(self._enclosing_p):
+            raise ValueError("only defined on first rendered page-break in paragraph")
+
+        # -- splitting approach is different when break is inside a hyperlink --
+        return (
+            self._preceding_frag_in_hlink
+            if self._is_in_hyperlink
+            else self._preceding_frag_in_run
+        )
+
+    def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
+        """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
+
+        Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
+        when `._is_in_hyperlink` is True.
+        """
+        return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
+
+    @property
+    def _enclosing_p(self) -> CT_P:
+        """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
+        return self.xpath("./ancestor::w:p[1]")[0]
+
+    def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
+        """The first `w:lastRenderedPageBreak` element in `p`.
+
+        Raises `ValueError` if there are no rendered page-breaks in `p`.
+        """
+        lrpbs = p.xpath(
+            "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
+        )
+        if not lrpbs:
+            raise ValueError("no rendered page-breaks in paragraph element")
+        return lrpbs[0]
+
+    @lazyproperty
+    def _following_frag_in_hlink(self) -> CT_P:
+        """Following CT_P fragment when break occurs within a hyperlink.
+
+        Note this is a *partial-function* and raises when `lrpb` is not inside a
+        hyperlink.
+        """
+        if not self._is_in_hyperlink:
+            raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+        # -- work on a clone `w:p` so our mutations don't persist --
+        p = copy.deepcopy(self._enclosing_p)
+
+        # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+        lrpb = self._first_lrpb_in_p(p)
+
+        # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+        hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+        # -- delete all w:p inner-content preceding the hyperlink --
+        for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+            p.remove(e)
+
+        # -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
+        hyperlink.getparent().remove(hyperlink)
+
+        # -- that's it, return the remaining fragment of `w:p` clone --
+        return p
+
+    @lazyproperty
+    def _following_frag_in_run(self) -> CT_P:
+        """following CT_P fragment when break does not occur in a hyperlink.
+
+        Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+        """
+        if self._is_in_hyperlink:
+            raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+        # -- work on a clone `w:p` so our mutations don't persist --
+        p = copy.deepcopy(self._enclosing_p)
+
+        # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+        lrpb = self._first_lrpb_in_p(p)
+
+        # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+        enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+        # -- delete all w:p inner-content preceding that run (but not w:pPr) --
+        for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+            p.remove(e)
+
+        # -- then remove all run inner-content preceding this lrpb in its run (but not
+        # -- the `w:rPr`) and also remove the page-break itself
+        for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
+            enclosing_r.remove(e)
+        enclosing_r.remove(lrpb)
+
+        return p
+
+    @lazyproperty
+    def _is_in_hyperlink(self) -> bool:
+        """True when this page-break is embedded in a hyperlink run."""
+        return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
+
+    @lazyproperty
+    def _preceding_frag_in_hlink(self) -> CT_P:
+        """Preceding CT_P fragment when break occurs within a hyperlink.
+
+        Note this is a *partial-function* and raises when `lrpb` is not inside a
+        hyperlink.
+        """
+        if not self._is_in_hyperlink:
+            raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+        # -- work on a clone `w:p` so our mutations don't persist --
+        p = copy.deepcopy(self._enclosing_p)
+
+        # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+        lrpb = self._first_lrpb_in_p(p)
+
+        # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+        hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+        # -- delete all w:p inner-content following the hyperlink --
+        for e in hyperlink.xpath("./following-sibling::*"):
+            p.remove(e)
+
+        # -- remove this page-break from inside the hyperlink --
+        lrpb.getparent().remove(lrpb)
+
+        # -- that's it, the entire hyperlink goes into the preceding fragment so
+        # -- the hyperlink is not "split".
+        return p
+
+    @lazyproperty
+    def _preceding_frag_in_run(self) -> CT_P:
+        """Preceding CT_P fragment when break does not occur in a hyperlink.
+
+        Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+        """
+        if self._is_in_hyperlink:
+            raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+        # -- work on a clone `w:p` so our mutations don't persist --
+        p = copy.deepcopy(self._enclosing_p)
+
+        # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+        lrpb = self._first_lrpb_in_p(p)
+
+        # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+        enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+        # -- delete all `w:p` inner-content following that run --
+        for e in enclosing_r.xpath("./following-sibling::*"):
+            p.remove(e)
+
+        # -- then delete all `w:r` inner-content following this lrpb in its run and
+        # -- also remove the page-break itself
+        for e in lrpb.xpath("./following-sibling::*"):
+            enclosing_r.remove(e)
+        enclosing_r.remove(lrpb)
+
+        return p
+
+    @lazyproperty
+    def _run_inner_content_xpath(self) -> str:
+        """XPath fragment matching any run inner-content elements."""
+        return (
+            "self::w:br"
+            " | self::w:cr"
+            " | self::w:drawing"
+            " | self::w:noBreakHyphen"
+            " | self::w:ptab"
+            " | self::w:t"
+            " | self::w:tab"
+        )
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py
new file mode 100644
index 00000000..63e96f31
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py
@@ -0,0 +1,106 @@
+# pyright: reportPrivateUsage=false
+
+"""Custom element classes related to paragraphs (CT_P)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, List, cast
+
+from docx.oxml.parser import OxmlElement
+from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
+
+if TYPE_CHECKING:
+    from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
+    from docx.oxml.section import CT_SectPr
+    from docx.oxml.text.hyperlink import CT_Hyperlink
+    from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+    from docx.oxml.text.parfmt import CT_PPr
+    from docx.oxml.text.run import CT_R
+
+
+class CT_P(BaseOxmlElement):
+    """`<w:p>` element, containing the properties and text for a paragraph."""
+
+    add_r: Callable[[], CT_R]
+    get_or_add_pPr: Callable[[], CT_PPr]
+    hyperlink_lst: List[CT_Hyperlink]
+    r_lst: List[CT_R]
+
+    pPr: CT_PPr | None = ZeroOrOne("w:pPr")  # pyright: ignore[reportAssignmentType]
+    hyperlink = ZeroOrMore("w:hyperlink")
+    r = ZeroOrMore("w:r")
+
+    def add_p_before(self) -> CT_P:
+        """Return a new `<w:p>` element inserted directly prior to this one."""
+        new_p = cast(CT_P, OxmlElement("w:p"))
+        self.addprevious(new_p)
+        return new_p
+
+    @property
+    def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
+        """The value of the `<w:jc>` grandchild element or |None| if not present."""
+        pPr = self.pPr
+        if pPr is None:
+            return None
+        return pPr.jc_val
+
+    @alignment.setter
+    def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
+        pPr = self.get_or_add_pPr()
+        pPr.jc_val = value
+
+    def clear_content(self):
+        """Remove all child elements, except the `<w:pPr>` element if present."""
+        for child in self.xpath("./*[not(self::w:pPr)]"):
+            self.remove(child)
+
+    @property
+    def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
+        """Run and hyperlink children of the `w:p` element, in document order."""
+        return self.xpath("./w:r | ./w:hyperlink")
+
+    @property
+    def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+        """All `w:lastRenderedPageBreak` descendants of this paragraph.
+
+        Rendered page-breaks commonly occur in a run but can also occur in a run inside
+        a hyperlink. This returns both.
+        """
+        return self.xpath(
+            "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
+        )
+
+    def set_sectPr(self, sectPr: CT_SectPr):
+        """Unconditionally replace or add `sectPr` as grandchild in correct sequence."""
+        pPr = self.get_or_add_pPr()
+        pPr._remove_sectPr()
+        pPr._insert_sectPr(sectPr)
+
+    @property
+    def style(self) -> str | None:
+        """String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild.
+
+        |None| if not present.
+        """
+        pPr = self.pPr
+        if pPr is None:
+            return None
+        return pPr.style
+
+    @style.setter
+    def style(self, style: str | None):
+        pPr = self.get_or_add_pPr()
+        pPr.style = style
+
+    @property
+    def text(self):  # pyright: ignore[reportIncompatibleMethodOverride]
+        """The textual content of this paragraph.
+
+        Inner-content child elements like `w:r` and `w:hyperlink` are translated to
+        their text equivalent.
+        """
+        return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
+
+    def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
+        self.insert(0, pPr)
+        return pPr
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py
new file mode 100644
index 00000000..de560963
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py
@@ -0,0 +1,387 @@
+"""Custom element classes related to paragraph properties (CT_PPr)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+from docx.enum.text import (
+    WD_ALIGN_PARAGRAPH,
+    WD_LINE_SPACING,
+    WD_TAB_ALIGNMENT,
+    WD_TAB_LEADER,
+)
+from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure
+from docx.oxml.xmlchemy import (
+    BaseOxmlElement,
+    OneOrMore,
+    OptionalAttribute,
+    RequiredAttribute,
+    ZeroOrOne,
+)
+from docx.shared import Length
+
+if TYPE_CHECKING:
+    from docx.oxml.section import CT_SectPr
+    from docx.oxml.shared import CT_String
+
+
+class CT_Ind(BaseOxmlElement):
+    """``<w:ind>`` element, specifying paragraph indentation."""
+
+    left: Length | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:left", ST_SignedTwipsMeasure
+    )
+    right: Length | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:right", ST_SignedTwipsMeasure
+    )
+    firstLine: Length | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:firstLine", ST_TwipsMeasure
+    )
+    hanging: Length | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:hanging", ST_TwipsMeasure
+    )
+
+
+class CT_Jc(BaseOxmlElement):
+    """``<w:jc>`` element, specifying paragraph justification."""
+
+    val: WD_ALIGN_PARAGRAPH = RequiredAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:val", WD_ALIGN_PARAGRAPH
+    )
+
+
+class CT_PPr(BaseOxmlElement):
+    """``<w:pPr>`` element, containing the properties for a paragraph."""
+
+    get_or_add_ind: Callable[[], CT_Ind]
+    get_or_add_pStyle: Callable[[], CT_String]
+    _insert_sectPr: Callable[[CT_SectPr], None]
+    _remove_pStyle: Callable[[], None]
+    _remove_sectPr: Callable[[], None]
+
+    _tag_seq = (
+        "w:pStyle",
+        "w:keepNext",
+        "w:keepLines",
+        "w:pageBreakBefore",
+        "w:framePr",
+        "w:widowControl",
+        "w:numPr",
+        "w:suppressLineNumbers",
+        "w:pBdr",
+        "w:shd",
+        "w:tabs",
+        "w:suppressAutoHyphens",
+        "w:kinsoku",
+        "w:wordWrap",
+        "w:overflowPunct",
+        "w:topLinePunct",
+        "w:autoSpaceDE",
+        "w:autoSpaceDN",
+        "w:bidi",
+        "w:adjustRightInd",
+        "w:snapToGrid",
+        "w:spacing",
+        "w:ind",
+        "w:contextualSpacing",
+        "w:mirrorIndents",
+        "w:suppressOverlap",
+        "w:jc",
+        "w:textDirection",
+        "w:textAlignment",
+        "w:textboxTightWrap",
+        "w:outlineLvl",
+        "w:divId",
+        "w:cnfStyle",
+        "w:rPr",
+        "w:sectPr",
+        "w:pPrChange",
+    )
+    pStyle: CT_String | None = ZeroOrOne(  # pyright: ignore[reportAssignmentType]
+        "w:pStyle", successors=_tag_seq[1:]
+    )
+    keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:])
+    keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:])
+    pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:])
+    widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:])
+    numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:])
+    tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:])
+    spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:])
+    ind: CT_Ind | None = ZeroOrOne(  # pyright: ignore[reportAssignmentType]
+        "w:ind", successors=_tag_seq[23:]
+    )
+    jc = ZeroOrOne("w:jc", successors=_tag_seq[27:])
+    sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:])
+    del _tag_seq
+
+    @property
+    def first_line_indent(self) -> Length | None:
+        """A |Length| value calculated from the values of `w:ind/@w:firstLine` and
+        `w:ind/@w:hanging`.
+
+        Returns |None| if the `w:ind` child is not present.
+        """
+        ind = self.ind
+        if ind is None:
+            return None
+        hanging = ind.hanging
+        if hanging is not None:
+            return Length(-hanging)
+        firstLine = ind.firstLine
+        if firstLine is None:
+            return None
+        return firstLine
+
+    @first_line_indent.setter
+    def first_line_indent(self, value: Length | None):
+        if self.ind is None and value is None:
+            return
+        ind = self.get_or_add_ind()
+        ind.firstLine = ind.hanging = None
+        if value is None:
+            return
+        elif value < 0:
+            ind.hanging = -value
+        else:
+            ind.firstLine = value
+
+    @property
+    def ind_left(self) -> Length | None:
+        """The value of `w:ind/@w:left` or |None| if not present."""
+        ind = self.ind
+        if ind is None:
+            return None
+        return ind.left
+
+    @ind_left.setter
+    def ind_left(self, value: Length | None):
+        if value is None and self.ind is None:
+            return
+        ind = self.get_or_add_ind()
+        ind.left = value
+
+    @property
+    def ind_right(self) -> Length | None:
+        """The value of `w:ind/@w:right` or |None| if not present."""
+        ind = self.ind
+        if ind is None:
+            return None
+        return ind.right
+
+    @ind_right.setter
+    def ind_right(self, value: Length | None):
+        if value is None and self.ind is None:
+            return
+        ind = self.get_or_add_ind()
+        ind.right = value
+
+    @property
+    def jc_val(self) -> WD_ALIGN_PARAGRAPH | None:
+        """Value of the `<w:jc>` child element or |None| if not present."""
+        return self.jc.val if self.jc is not None else None
+
+    @jc_val.setter
+    def jc_val(self, value):
+        if value is None:
+            self._remove_jc()
+            return
+        self.get_or_add_jc().val = value
+
+    @property
+    def keepLines_val(self):
+        """The value of `keepLines/@val` or |None| if not present."""
+        keepLines = self.keepLines
+        if keepLines is None:
+            return None
+        return keepLines.val
+
+    @keepLines_val.setter
+    def keepLines_val(self, value):
+        if value is None:
+            self._remove_keepLines()
+        else:
+            self.get_or_add_keepLines().val = value
+
+    @property
+    def keepNext_val(self):
+        """The value of `keepNext/@val` or |None| if not present."""
+        keepNext = self.keepNext
+        if keepNext is None:
+            return None
+        return keepNext.val
+
+    @keepNext_val.setter
+    def keepNext_val(self, value):
+        if value is None:
+            self._remove_keepNext()
+        else:
+            self.get_or_add_keepNext().val = value
+
+    @property
+    def pageBreakBefore_val(self):
+        """The value of `pageBreakBefore/@val` or |None| if not present."""
+        pageBreakBefore = self.pageBreakBefore
+        if pageBreakBefore is None:
+            return None
+        return pageBreakBefore.val
+
+    @pageBreakBefore_val.setter
+    def pageBreakBefore_val(self, value):
+        if value is None:
+            self._remove_pageBreakBefore()
+        else:
+            self.get_or_add_pageBreakBefore().val = value
+
+    @property
+    def spacing_after(self):
+        """The value of `w:spacing/@w:after` or |None| if not present."""
+        spacing = self.spacing
+        if spacing is None:
+            return None
+        return spacing.after
+
+    @spacing_after.setter
+    def spacing_after(self, value):
+        if value is None and self.spacing is None:
+            return
+        self.get_or_add_spacing().after = value
+
+    @property
+    def spacing_before(self):
+        """The value of `w:spacing/@w:before` or |None| if not present."""
+        spacing = self.spacing
+        if spacing is None:
+            return None
+        return spacing.before
+
+    @spacing_before.setter
+    def spacing_before(self, value):
+        if value is None and self.spacing is None:
+            return
+        self.get_or_add_spacing().before = value
+
+    @property
+    def spacing_line(self):
+        """The value of `w:spacing/@w:line` or |None| if not present."""
+        spacing = self.spacing
+        if spacing is None:
+            return None
+        return spacing.line
+
+    @spacing_line.setter
+    def spacing_line(self, value):
+        if value is None and self.spacing is None:
+            return
+        self.get_or_add_spacing().line = value
+
+    @property
+    def spacing_lineRule(self):
+        """The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing`
+        enumeration.
+
+        Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the
+        responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and
+        `MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is
+        desired.
+        """
+        spacing = self.spacing
+        if spacing is None:
+            return None
+        lineRule = spacing.lineRule
+        if lineRule is None and spacing.line is not None:
+            return WD_LINE_SPACING.MULTIPLE
+        return lineRule
+
+    @spacing_lineRule.setter
+    def spacing_lineRule(self, value):
+        if value is None and self.spacing is None:
+            return
+        self.get_or_add_spacing().lineRule = value
+
+    @property
+    def style(self) -> str | None:
+        """String contained in `./w:pStyle/@val`, or None if child is not present."""
+        pStyle = self.pStyle
+        if pStyle is None:
+            return None
+        return pStyle.val
+
+    @style.setter
+    def style(self, style: str | None):
+        """Set `./w:pStyle/@val` `style`, adding a new element if necessary.
+
+        If `style` is |None|, remove `./w:pStyle` when present.
+        """
+        if style is None:
+            self._remove_pStyle()
+            return
+        pStyle = self.get_or_add_pStyle()
+        pStyle.val = style
+
+    @property
+    def widowControl_val(self):
+        """The value of `widowControl/@val` or |None| if not present."""
+        widowControl = self.widowControl
+        if widowControl is None:
+            return None
+        return widowControl.val
+
+    @widowControl_val.setter
+    def widowControl_val(self, value):
+        if value is None:
+            self._remove_widowControl()
+        else:
+            self.get_or_add_widowControl().val = value
+
+
+class CT_Spacing(BaseOxmlElement):
+    """``<w:spacing>`` element, specifying paragraph spacing attributes such as space
+    before and line spacing."""
+
+    after = OptionalAttribute("w:after", ST_TwipsMeasure)
+    before = OptionalAttribute("w:before", ST_TwipsMeasure)
+    line = OptionalAttribute("w:line", ST_SignedTwipsMeasure)
+    lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING)
+
+
+class CT_TabStop(BaseOxmlElement):
+    """`<w:tab>` element, representing an individual tab stop.
+
+    Overloaded to use for a tab-character in a run, which also uses the w:tab tag but
+    only needs a __str__ method.
+    """
+
+    val: WD_TAB_ALIGNMENT = RequiredAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:val", WD_TAB_ALIGNMENT
+    )
+    leader: WD_TAB_LEADER | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES
+    )
+    pos: Length = RequiredAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:pos", ST_SignedTwipsMeasure
+    )
+
+    def __str__(self) -> str:
+        """Text equivalent of a `w:tab` element appearing in a run.
+
+        Allows text of run inner-content to be accessed consistently across all text
+        inner-content.
+        """
+        return "\t"
+
+
+class CT_TabStops(BaseOxmlElement):
+    """``<w:tabs>`` element, container for a sorted sequence of tab stops."""
+
+    tab = OneOrMore("w:tab", successors=())
+
+    def insert_tab_in_order(self, pos, align, leader):
+        """Insert a newly created `w:tab` child element in `pos` order."""
+        new_tab = self._new_tab()
+        new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader
+        for tab in self.tab_lst:
+            if new_tab.pos < tab.pos:
+                tab.addprevious(new_tab)
+                return new_tab
+        self.append(new_tab)
+        return new_tab
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
new file mode 100644
index 00000000..88efae83
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
@@ -0,0 +1,276 @@
+"""Custom element classes related to text runs (CT_R)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, Iterator, List
+
+from docx.oxml.drawing import CT_Drawing
+from docx.oxml.ns import qn
+from docx.oxml.simpletypes import ST_BrClear, ST_BrType
+from docx.oxml.text.font import CT_RPr
+from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
+from docx.shared import TextAccumulator
+
+if TYPE_CHECKING:
+    from docx.oxml.shape import CT_Anchor, CT_Inline
+    from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+    from docx.oxml.text.parfmt import CT_TabStop
+
+# ------------------------------------------------------------------------------------
+# Run-level elements
+
+
+class CT_R(BaseOxmlElement):
+    """`<w:r>` element, containing the properties and text for a run."""
+
+    add_br: Callable[[], CT_Br]
+    add_tab: Callable[[], CT_TabStop]
+    get_or_add_rPr: Callable[[], CT_RPr]
+    _add_drawing: Callable[[], CT_Drawing]
+    _add_t: Callable[..., CT_Text]
+
+    rPr: CT_RPr | None = ZeroOrOne("w:rPr")  # pyright: ignore[reportAssignmentType]
+    br = ZeroOrMore("w:br")
+    cr = ZeroOrMore("w:cr")
+    drawing = ZeroOrMore("w:drawing")
+    t = ZeroOrMore("w:t")
+    tab = ZeroOrMore("w:tab")
+
+    def add_t(self, text: str) -> CT_Text:
+        """Return a newly added `<w:t>` element containing `text`."""
+        t = self._add_t(text=text)
+        if len(text.strip()) < len(text):
+            t.set(qn("xml:space"), "preserve")
+        return t
+
+    def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
+        """Return newly appended `CT_Drawing` (`w:drawing`) child element.
+
+        The `w:drawing` element has `inline_or_anchor` as its child.
+        """
+        drawing = self._add_drawing()
+        drawing.append(inline_or_anchor)
+        return drawing
+
+    def clear_content(self) -> None:
+        """Remove all child elements except a `w:rPr` element if present."""
+        # -- remove all run inner-content except a `w:rPr` when present. --
+        for e in self.xpath("./*[not(self::w:rPr)]"):
+            self.remove(e)
+
+    @property
+    def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
+        """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
+        from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+        accum = TextAccumulator()
+
+        def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
+            for e in self.xpath(
+                "w:br"
+                " | w:cr"
+                " | w:drawing"
+                " | w:lastRenderedPageBreak"
+                " | w:noBreakHyphen"
+                " | w:ptab"
+                " | w:t"
+                " | w:tab"
+            ):
+                if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
+                    yield from accum.pop()
+                    yield e
+                else:
+                    accum.push(str(e))
+
+            # -- don't forget the "tail" string --
+            yield from accum.pop()
+
+        return list(iter_items())
+
+    @property
+    def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+        """All `w:lastRenderedPageBreaks` descendants of this run."""
+        return self.xpath("./w:lastRenderedPageBreak")
+
+    @property
+    def style(self) -> str | None:
+        """String contained in `w:val` attribute of `w:rStyle` grandchild.
+
+        |None| if that element is not present.
+        """
+        rPr = self.rPr
+        if rPr is None:
+            return None
+        return rPr.style
+
+    @style.setter
+    def style(self, style: str | None):
+        """Set character style of this `w:r` element to `style`.
+
+        If `style` is None, remove the style element.
+        """
+        rPr = self.get_or_add_rPr()
+        rPr.style = style
+
+    @property
+    def text(self) -> str:
+        """The textual content of this run.
+
+        Inner-content child elements like `w:tab` are translated to their text
+        equivalent.
+        """
+        return "".join(
+            str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
+        )
+
+    @text.setter
+    def text(self, text: str):  # pyright: ignore[reportIncompatibleMethodOverride]
+        self.clear_content()
+        _RunContentAppender.append_to_run_from_text(self, text)
+
+    def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
+        self.insert(0, rPr)
+        return rPr
+
+
+# ------------------------------------------------------------------------------------
+# Run inner-content elements
+
+
+class CT_Br(BaseOxmlElement):
+    """`<w:br>` element, indicating a line, page, or column break in a run."""
+
+    type: str | None = OptionalAttribute(  # pyright: ignore[reportAssignmentType]
+        "w:type", ST_BrType, default="textWrapping"
+    )
+    clear: str | None = OptionalAttribute("w:clear", ST_BrClear)  # pyright: ignore
+
+    def __str__(self) -> str:
+        """Text equivalent of this element. Actual value depends on break type.
+
+        A line break is translated as "\n". Column and page breaks produce the empty
+        string ("").
+
+        This allows the text of run inner-content to be accessed in a consistent way
+        for all run inner-context text elements.
+        """
+        return "\n" if self.type == "textWrapping" else ""
+
+
+class CT_Cr(BaseOxmlElement):
+    """`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
+
+    In Word, this represents a "soft carriage-return" in the sense that it does not end
+    the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
+    text equivalent is considered to be newline ("\n") since in plain-text that's the
+    closest Python equivalent.
+
+    NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
+    `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
+    for many elements.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single newline ("\n")."""
+        return "\n"
+
+
+class CT_NoBreakHyphen(BaseOxmlElement):
+    """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
+
+    This maps to a plain-text dash ("-").
+
+    NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
+    maps to `CT_Empty`. This name was added to give it behavior distinguished from the
+    many other elements represented in the schema by CT_Empty.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single dash character ("-")."""
+        return "-"
+
+
+class CT_PTab(BaseOxmlElement):
+    """`<w:ptab>` element, representing an absolute-position tab character within a run.
+
+    This character advances the rendering position to the specified position regardless
+    of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
+    """
+
+    def __str__(self) -> str:
+        """Text equivalent of this element, a single tab ("\t") character.
+
+        This allows the text of run inner-content to be accessed in a consistent way
+        for all run inner-context text elements.
+        """
+        return "\t"
+
+
+# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
+# -- element class provides the __str__() method for this empty element, unconditionally
+# -- returning "\t".
+
+
+class CT_Text(BaseOxmlElement):
+    """`<w:t>` element, containing a sequence of characters within a run."""
+
+    def __str__(self) -> str:
+        """Text contained in this element, the empty string if it has no content.
+
+        This property allows this run inner-content element to be queried for its text
+        the same way as other run-content elements are. In particular, this never
+        returns None, as etree._Element does when there is no content.
+        """
+        return self.text or ""
+
+
+# ------------------------------------------------------------------------------------
+# Utility
+
+
+class _RunContentAppender:
+    """Translates a Python string into run content elements appended in a `w:r` element.
+
+    Contiguous sequences of regular characters are appended in a single `<w:t>` element.
+    Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
+    newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
+    appended.
+    """
+
+    def __init__(self, r: CT_R):
+        self._r = r
+        self._bfr: List[str] = []
+
+    @classmethod
+    def append_to_run_from_text(cls, r: CT_R, text: str):
+        """Append inner-content elements for `text` to `r` element."""
+        appender = cls(r)
+        appender.add_text(text)
+
+    def add_text(self, text: str):
+        """Append inner-content elements for `text` to the `w:r` element."""
+        for char in text:
+            self.add_char(char)
+        self.flush()
+
+    def add_char(self, char: str):
+        """Process next character of input through finite state maching (FSM).
+
+        There are two possible states, buffer pending and not pending, but those are
+        hidden behind the `.flush()` method which must be called at the end of text to
+        ensure any pending `<w:t>` element is written.
+        """
+        if char == "\t":
+            self.flush()
+            self._r.add_tab()
+        elif char in "\r\n":
+            self.flush()
+            self._r.add_br()
+        else:
+            self._bfr.append(char)
+
+    def flush(self):
+        text = "".join(self._bfr)
+        if text:
+            self._r.add_t(text)
+        self._bfr.clear()
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/oxml/text
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz