aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/text
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/oxml/text
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml/text')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py0
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/font.py356
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py45
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py284
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py106
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py387
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/run.py276
7 files changed, 1454 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py
new file mode 100644
index 00000000..140086aa
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py
@@ -0,0 +1,356 @@
+"""Custom element classes related to run properties (font)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+from docx.enum.dml import MSO_THEME_COLOR
+from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE
+from docx.oxml.ns import nsdecls
+from docx.oxml.parser import parse_xml
+from docx.oxml.simpletypes import (
+ ST_HexColor,
+ ST_HpsMeasure,
+ ST_String,
+ ST_VerticalAlignRun,
+)
+from docx.oxml.xmlchemy import (
+ BaseOxmlElement,
+ OptionalAttribute,
+ RequiredAttribute,
+ ZeroOrOne,
+)
+
+if TYPE_CHECKING:
+ from docx.oxml.shared import CT_OnOff, CT_String
+ from docx.shared import Length
+
+
+class CT_Color(BaseOxmlElement):
+ """`w:color` element, specifying the color of a font and perhaps other objects."""
+
+ val = RequiredAttribute("w:val", ST_HexColor)
+ themeColor = OptionalAttribute("w:themeColor", MSO_THEME_COLOR)
+
+
+class CT_Fonts(BaseOxmlElement):
+ """`<w:rFonts>` element.
+
+ Specifies typeface name for the various language types.
+ """
+
+ ascii: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:ascii", ST_String
+ )
+ hAnsi: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:hAnsi", ST_String
+ )
+
+
+class CT_Highlight(BaseOxmlElement):
+ """`w:highlight` element, specifying font highlighting/background color."""
+
+ val: WD_COLOR_INDEX = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues]
+ "w:val", WD_COLOR_INDEX
+ )
+
+
+class CT_HpsMeasure(BaseOxmlElement):
+ """Used for `<w:sz>` element and others, specifying font size in half-points."""
+
+ val: Length = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues]
+ "w:val", ST_HpsMeasure
+ )
+
+
+class CT_RPr(BaseOxmlElement):
+ """`<w:rPr>` element, containing the properties for a run."""
+
+ get_or_add_highlight: Callable[[], CT_Highlight]
+ get_or_add_rFonts: Callable[[], CT_Fonts]
+ get_or_add_sz: Callable[[], CT_HpsMeasure]
+ get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun]
+ _add_rStyle: Callable[..., CT_String]
+ _add_u: Callable[[], CT_Underline]
+ _remove_highlight: Callable[[], None]
+ _remove_rFonts: Callable[[], None]
+ _remove_rStyle: Callable[[], None]
+ _remove_sz: Callable[[], None]
+ _remove_u: Callable[[], None]
+ _remove_vertAlign: Callable[[], None]
+
+ _tag_seq = (
+ "w:rStyle",
+ "w:rFonts",
+ "w:b",
+ "w:bCs",
+ "w:i",
+ "w:iCs",
+ "w:caps",
+ "w:smallCaps",
+ "w:strike",
+ "w:dstrike",
+ "w:outline",
+ "w:shadow",
+ "w:emboss",
+ "w:imprint",
+ "w:noProof",
+ "w:snapToGrid",
+ "w:vanish",
+ "w:webHidden",
+ "w:color",
+ "w:spacing",
+ "w:w",
+ "w:kern",
+ "w:position",
+ "w:sz",
+ "w:szCs",
+ "w:highlight",
+ "w:u",
+ "w:effect",
+ "w:bdr",
+ "w:shd",
+ "w:fitText",
+ "w:vertAlign",
+ "w:rtl",
+ "w:cs",
+ "w:em",
+ "w:lang",
+ "w:eastAsianLayout",
+ "w:specVanish",
+ "w:oMath",
+ )
+ rStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:rStyle", successors=_tag_seq[1:]
+ )
+ rFonts: CT_Fonts | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:rFonts", successors=_tag_seq[2:]
+ )
+ b: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:b", successors=_tag_seq[3:]
+ )
+ bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:])
+ i = ZeroOrOne("w:i", successors=_tag_seq[5:])
+ iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:])
+ caps = ZeroOrOne("w:caps", successors=_tag_seq[7:])
+ smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:])
+ strike = ZeroOrOne("w:strike", successors=_tag_seq[9:])
+ dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:])
+ outline = ZeroOrOne("w:outline", successors=_tag_seq[11:])
+ shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:])
+ emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:])
+ imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:])
+ noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:])
+ snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:])
+ vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:])
+ webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:])
+ color = ZeroOrOne("w:color", successors=_tag_seq[19:])
+ sz: CT_HpsMeasure | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:sz", successors=_tag_seq[24:]
+ )
+ highlight: CT_Highlight | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:highlight", successors=_tag_seq[26:]
+ )
+ u: CT_Underline | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:u", successors=_tag_seq[27:]
+ )
+ vertAlign: CT_VerticalAlignRun | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues]
+ "w:vertAlign", successors=_tag_seq[32:]
+ )
+ rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:])
+ cs = ZeroOrOne("w:cs", successors=_tag_seq[34:])
+ specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:])
+ oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:])
+ del _tag_seq
+
+ def _new_color(self):
+ """Override metaclass method to set `w:color/@val` to RGB black on create."""
+ return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w"))
+
+ @property
+ def highlight_val(self) -> WD_COLOR_INDEX | None:
+ """Value of `./w:highlight/@val`.
+
+ Specifies font's highlight color, or `None` if the text is not highlighted.
+ """
+ highlight = self.highlight
+ if highlight is None:
+ return None
+ return highlight.val
+
+ @highlight_val.setter
+ def highlight_val(self, value: WD_COLOR_INDEX | None) -> None:
+ if value is None:
+ self._remove_highlight()
+ return
+ highlight = self.get_or_add_highlight()
+ highlight.val = value
+
+ @property
+ def rFonts_ascii(self) -> str | None:
+ """The value of `w:rFonts/@w:ascii` or |None| if not present.
+
+ Represents the assigned typeface name. The rFonts element also specifies other
+ special-case typeface names; this method handles the case where just the common
+ name is required.
+ """
+ rFonts = self.rFonts
+ if rFonts is None:
+ return None
+ return rFonts.ascii
+
+ @rFonts_ascii.setter
+ def rFonts_ascii(self, value: str | None) -> None:
+ if value is None:
+ self._remove_rFonts()
+ return
+ rFonts = self.get_or_add_rFonts()
+ rFonts.ascii = value
+
+ @property
+ def rFonts_hAnsi(self) -> str | None:
+ """The value of `w:rFonts/@w:hAnsi` or |None| if not present."""
+ rFonts = self.rFonts
+ if rFonts is None:
+ return None
+ return rFonts.hAnsi
+
+ @rFonts_hAnsi.setter
+ def rFonts_hAnsi(self, value: str | None):
+ if value is None and self.rFonts is None:
+ return
+ rFonts = self.get_or_add_rFonts()
+ rFonts.hAnsi = value
+
+ @property
+ def style(self) -> str | None:
+ """String in `./w:rStyle/@val`, or None if `w:rStyle` is not present."""
+ rStyle = self.rStyle
+ if rStyle is None:
+ return None
+ return rStyle.val
+
+ @style.setter
+ def style(self, style: str | None) -> None:
+ """Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary.
+
+ If `style` is |None|, remove `w:rStyle` element if present.
+ """
+ if style is None:
+ self._remove_rStyle()
+ elif self.rStyle is None:
+ self._add_rStyle(val=style)
+ else:
+ self.rStyle.val = style
+
+ @property
+ def subscript(self) -> bool | None:
+ """|True| if `./w:vertAlign/@w:val` is "subscript".
+
+ |False| if `w:vertAlign/@w:val` contains any other value. |None| if
+ `w:vertAlign` is not present.
+ """
+ vertAlign = self.vertAlign
+ if vertAlign is None:
+ return None
+ if vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
+ return True
+ return False
+
+ @subscript.setter
+ def subscript(self, value: bool | None) -> None:
+ if value is None:
+ self._remove_vertAlign()
+ elif bool(value) is True:
+ self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT
+ # -- assert bool(value) is False --
+ elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
+ self._remove_vertAlign()
+
+ @property
+ def superscript(self) -> bool | None:
+ """|True| if `w:vertAlign/@w:val` is 'superscript'.
+
+ |False| if `w:vertAlign/@w:val` contains any other value. |None| if
+ `w:vertAlign` is not present.
+ """
+ vertAlign = self.vertAlign
+ if vertAlign is None:
+ return None
+ if vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
+ return True
+ return False
+
+ @superscript.setter
+ def superscript(self, value: bool | None):
+ if value is None:
+ self._remove_vertAlign()
+ elif bool(value) is True:
+ self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT
+ # -- assert bool(value) is False --
+ elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
+ self._remove_vertAlign()
+
+ @property
+ def sz_val(self) -> Length | None:
+ """The value of `w:sz/@w:val` or |None| if not present."""
+ sz = self.sz
+ if sz is None:
+ return None
+ return sz.val
+
+ @sz_val.setter
+ def sz_val(self, value: Length | None):
+ if value is None:
+ self._remove_sz()
+ return
+ sz = self.get_or_add_sz()
+ sz.val = value
+
+ @property
+ def u_val(self) -> WD_UNDERLINE | None:
+ """Value of `w:u/@val`, or None if not present.
+
+ Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and
+ `False` respectively.
+ """
+ u = self.u
+ if u is None:
+ return None
+ return u.val
+
+ @u_val.setter
+ def u_val(self, value: WD_UNDERLINE | None):
+ self._remove_u()
+ if value is not None:
+ self._add_u().val = value
+
+ def _get_bool_val(self, name: str) -> bool | None:
+ """Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps"."""
+ element = getattr(self, name)
+ if element is None:
+ return None
+ return element.val
+
+ def _set_bool_val(self, name: str, value: bool | None):
+ if value is None:
+ getattr(self, "_remove_%s" % name)()
+ return
+ element = getattr(self, "get_or_add_%s" % name)()
+ element.val = value
+
+
+class CT_Underline(BaseOxmlElement):
+ """`<w:u>` element, specifying the underlining style for a run."""
+
+ val: WD_UNDERLINE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:val", WD_UNDERLINE
+ )
+
+
+class CT_VerticalAlignRun(BaseOxmlElement):
+ """`<w:vertAlign>` element, specifying subscript or superscript."""
+
+ val: str = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues]
+ "w:val", ST_VerticalAlignRun
+ )
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py
new file mode 100644
index 00000000..38a33ff1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py
@@ -0,0 +1,45 @@
+"""Custom element classes related to hyperlinks (CT_Hyperlink)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, List
+
+from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString
+from docx.oxml.text.run import CT_R
+from docx.oxml.xmlchemy import (
+ BaseOxmlElement,
+ OptionalAttribute,
+ ZeroOrMore,
+)
+
+if TYPE_CHECKING:
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+
+class CT_Hyperlink(BaseOxmlElement):
+ """`<w:hyperlink>` element, containing the text and address for a hyperlink."""
+
+ r_lst: List[CT_R]
+
+ rId: str | None = OptionalAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
+ anchor: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:anchor", ST_String
+ )
+ history: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:history", ST_OnOff, default=True
+ )
+
+ r = ZeroOrMore("w:r")
+
+ @property
+ def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+ """All `w:lastRenderedPageBreak` descendants of this hyperlink."""
+ return self.xpath("./w:r/w:lastRenderedPageBreak")
+
+ @property
+ def text(self) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
+ """The textual content of this hyperlink.
+
+ `CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children.
+ """
+ return "".join(r.text for r in self.xpath("w:r"))
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
new file mode 100644
index 00000000..943f9b6c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
@@ -0,0 +1,284 @@
+"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
+
+from __future__ import annotations
+
+import copy
+from typing import TYPE_CHECKING
+
+from docx.oxml.xmlchemy import BaseOxmlElement
+from docx.shared import lazyproperty
+
+if TYPE_CHECKING:
+ from docx.oxml.text.hyperlink import CT_Hyperlink
+ from docx.oxml.text.paragraph import CT_P
+
+
+class CT_LastRenderedPageBreak(BaseOxmlElement):
+ """`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
+
+ A rendered page-break is one inserted by the renderer when it runs out of room on a
+ page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
+ CT_Text.
+
+ NOTE: this complex-type name does not exist in the schema, where
+ `w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
+ distinguished behavior. CT_Empty is used for many elements.
+ """
+
+ @property
+ def following_fragment_p(self) -> CT_P:
+ """A "loose" `CT_P` containing only the paragraph content before this break.
+
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+ page-break in its paragraph.
+
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+ page-break with this `w:lastRenderedPageBreak` element and all content preceding
+ it removed.
+
+ NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
+ (when the paragraph contained more than one). While this is rare, the caller
+ should treat this paragraph the same as other paragraphs and split it if
+ necessary in a folloing step or recursion.
+ """
+ if not self == self._first_lrpb_in_p(self._enclosing_p):
+ raise ValueError("only defined on first rendered page-break in paragraph")
+
+ # -- splitting approach is different when break is inside a hyperlink --
+ return (
+ self._following_frag_in_hlink
+ if self._is_in_hyperlink
+ else self._following_frag_in_run
+ )
+
+ @property
+ def follows_all_content(self) -> bool:
+ """True when this page-break element is the last "content" in the paragraph.
+
+ This is very uncommon case and may only occur in contrived or cases where the
+ XML is edited by hand, but it is not precluded by the spec.
+ """
+ # -- a page-break inside a hyperlink never meets these criteria (for our
+ # -- purposes at least) because it is considered "atomic" and always associated
+ # -- with the page it starts on.
+ if self._is_in_hyperlink:
+ return False
+
+ return bool(
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+ self._enclosing_p.xpath(
+ # -- in first run of paragraph --
+ f"(./w:r)[last()]"
+ # -- all page-breaks --
+ f"/w:lastRenderedPageBreak"
+ # -- that are not preceded by any content-bearing elements --
+ f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
+ )
+ )
+
+ @property
+ def precedes_all_content(self) -> bool:
+ """True when a `w:lastRenderedPageBreak` precedes all paragraph content.
+
+ This is a common case; it occurs whenever the page breaks on an even paragraph
+ boundary.
+ """
+ # -- a page-break inside a hyperlink never meets these criteria because there
+ # -- is always part of the hyperlink text before the page-break.
+ if self._is_in_hyperlink:
+ return False
+
+ return bool(
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+ self._enclosing_p.xpath(
+ # -- in first run of paragraph --
+ f"./w:r[1]"
+ # -- all page-breaks --
+ f"/w:lastRenderedPageBreak"
+ # -- that are not preceded by any content-bearing elements --
+ f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
+ )
+ )
+
+ @property
+ def preceding_fragment_p(self) -> CT_P:
+ """A "loose" `CT_P` containing only the paragraph content before this break.
+
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+ paragraph in its paragraph.
+
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+ page-break with this `w:lastRenderedPageBreak` element and all its following
+ siblings removed.
+ """
+ if not self == self._first_lrpb_in_p(self._enclosing_p):
+ raise ValueError("only defined on first rendered page-break in paragraph")
+
+ # -- splitting approach is different when break is inside a hyperlink --
+ return (
+ self._preceding_frag_in_hlink
+ if self._is_in_hyperlink
+ else self._preceding_frag_in_run
+ )
+
+ def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
+ """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
+
+ Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
+ when `._is_in_hyperlink` is True.
+ """
+ return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
+
+ @property
+ def _enclosing_p(self) -> CT_P:
+ """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
+ return self.xpath("./ancestor::w:p[1]")[0]
+
+ def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
+ """The first `w:lastRenderedPageBreak` element in `p`.
+
+ Raises `ValueError` if there are no rendered page-breaks in `p`.
+ """
+ lrpbs = p.xpath(
+ "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
+ )
+ if not lrpbs:
+ raise ValueError("no rendered page-breaks in paragraph element")
+ return lrpbs[0]
+
+ @lazyproperty
+ def _following_frag_in_hlink(self) -> CT_P:
+ """Following CT_P fragment when break occurs within a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
+ hyperlink.
+ """
+ if not self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+ hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+ # -- delete all w:p inner-content preceding the hyperlink --
+ for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+ p.remove(e)
+
+ # -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
+ hyperlink.getparent().remove(hyperlink)
+
+ # -- that's it, return the remaining fragment of `w:p` clone --
+ return p
+
+ @lazyproperty
+ def _following_frag_in_run(self) -> CT_P:
+ """following CT_P fragment when break does not occur in a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+ """
+ if self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+ enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+ # -- delete all w:p inner-content preceding that run (but not w:pPr) --
+ for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+ p.remove(e)
+
+ # -- then remove all run inner-content preceding this lrpb in its run (but not
+ # -- the `w:rPr`) and also remove the page-break itself
+ for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
+ enclosing_r.remove(e)
+ enclosing_r.remove(lrpb)
+
+ return p
+
+ @lazyproperty
+ def _is_in_hyperlink(self) -> bool:
+ """True when this page-break is embedded in a hyperlink run."""
+ return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
+
+ @lazyproperty
+ def _preceding_frag_in_hlink(self) -> CT_P:
+ """Preceding CT_P fragment when break occurs within a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
+ hyperlink.
+ """
+ if not self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+ hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+ # -- delete all w:p inner-content following the hyperlink --
+ for e in hyperlink.xpath("./following-sibling::*"):
+ p.remove(e)
+
+ # -- remove this page-break from inside the hyperlink --
+ lrpb.getparent().remove(lrpb)
+
+ # -- that's it, the entire hyperlink goes into the preceding fragment so
+ # -- the hyperlink is not "split".
+ return p
+
+ @lazyproperty
+ def _preceding_frag_in_run(self) -> CT_P:
+ """Preceding CT_P fragment when break does not occur in a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+ """
+ if self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+ enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+ # -- delete all `w:p` inner-content following that run --
+ for e in enclosing_r.xpath("./following-sibling::*"):
+ p.remove(e)
+
+ # -- then delete all `w:r` inner-content following this lrpb in its run and
+ # -- also remove the page-break itself
+ for e in lrpb.xpath("./following-sibling::*"):
+ enclosing_r.remove(e)
+ enclosing_r.remove(lrpb)
+
+ return p
+
+ @lazyproperty
+ def _run_inner_content_xpath(self) -> str:
+ """XPath fragment matching any run inner-content elements."""
+ return (
+ "self::w:br"
+ " | self::w:cr"
+ " | self::w:drawing"
+ " | self::w:noBreakHyphen"
+ " | self::w:ptab"
+ " | self::w:t"
+ " | self::w:tab"
+ )
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py
new file mode 100644
index 00000000..63e96f31
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py
@@ -0,0 +1,106 @@
+# pyright: reportPrivateUsage=false
+
+"""Custom element classes related to paragraphs (CT_P)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, List, cast
+
+from docx.oxml.parser import OxmlElement
+from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
+
+if TYPE_CHECKING:
+ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
+ from docx.oxml.section import CT_SectPr
+ from docx.oxml.text.hyperlink import CT_Hyperlink
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+ from docx.oxml.text.parfmt import CT_PPr
+ from docx.oxml.text.run import CT_R
+
+
+class CT_P(BaseOxmlElement):
+ """`<w:p>` element, containing the properties and text for a paragraph."""
+
+ add_r: Callable[[], CT_R]
+ get_or_add_pPr: Callable[[], CT_PPr]
+ hyperlink_lst: List[CT_Hyperlink]
+ r_lst: List[CT_R]
+
+ pPr: CT_PPr | None = ZeroOrOne("w:pPr") # pyright: ignore[reportAssignmentType]
+ hyperlink = ZeroOrMore("w:hyperlink")
+ r = ZeroOrMore("w:r")
+
+ def add_p_before(self) -> CT_P:
+ """Return a new `<w:p>` element inserted directly prior to this one."""
+ new_p = cast(CT_P, OxmlElement("w:p"))
+ self.addprevious(new_p)
+ return new_p
+
+ @property
+ def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
+ """The value of the `<w:jc>` grandchild element or |None| if not present."""
+ pPr = self.pPr
+ if pPr is None:
+ return None
+ return pPr.jc_val
+
+ @alignment.setter
+ def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
+ pPr = self.get_or_add_pPr()
+ pPr.jc_val = value
+
+ def clear_content(self):
+ """Remove all child elements, except the `<w:pPr>` element if present."""
+ for child in self.xpath("./*[not(self::w:pPr)]"):
+ self.remove(child)
+
+ @property
+ def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
+ """Run and hyperlink children of the `w:p` element, in document order."""
+ return self.xpath("./w:r | ./w:hyperlink")
+
+ @property
+ def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+ """All `w:lastRenderedPageBreak` descendants of this paragraph.
+
+ Rendered page-breaks commonly occur in a run but can also occur in a run inside
+ a hyperlink. This returns both.
+ """
+ return self.xpath(
+ "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
+ )
+
+ def set_sectPr(self, sectPr: CT_SectPr):
+ """Unconditionally replace or add `sectPr` as grandchild in correct sequence."""
+ pPr = self.get_or_add_pPr()
+ pPr._remove_sectPr()
+ pPr._insert_sectPr(sectPr)
+
+ @property
+ def style(self) -> str | None:
+ """String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild.
+
+ |None| if not present.
+ """
+ pPr = self.pPr
+ if pPr is None:
+ return None
+ return pPr.style
+
+ @style.setter
+ def style(self, style: str | None):
+ pPr = self.get_or_add_pPr()
+ pPr.style = style
+
+ @property
+ def text(self): # pyright: ignore[reportIncompatibleMethodOverride]
+ """The textual content of this paragraph.
+
+ Inner-content child elements like `w:r` and `w:hyperlink` are translated to
+ their text equivalent.
+ """
+ return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
+
+ def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
+ self.insert(0, pPr)
+ return pPr
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py
new file mode 100644
index 00000000..de560963
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py
@@ -0,0 +1,387 @@
+"""Custom element classes related to paragraph properties (CT_PPr)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+from docx.enum.text import (
+ WD_ALIGN_PARAGRAPH,
+ WD_LINE_SPACING,
+ WD_TAB_ALIGNMENT,
+ WD_TAB_LEADER,
+)
+from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure
+from docx.oxml.xmlchemy import (
+ BaseOxmlElement,
+ OneOrMore,
+ OptionalAttribute,
+ RequiredAttribute,
+ ZeroOrOne,
+)
+from docx.shared import Length
+
+if TYPE_CHECKING:
+ from docx.oxml.section import CT_SectPr
+ from docx.oxml.shared import CT_String
+
+
+class CT_Ind(BaseOxmlElement):
+ """``<w:ind>`` element, specifying paragraph indentation."""
+
+ left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:left", ST_SignedTwipsMeasure
+ )
+ right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:right", ST_SignedTwipsMeasure
+ )
+ firstLine: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:firstLine", ST_TwipsMeasure
+ )
+ hanging: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:hanging", ST_TwipsMeasure
+ )
+
+
+class CT_Jc(BaseOxmlElement):
+ """``<w:jc>`` element, specifying paragraph justification."""
+
+ val: WD_ALIGN_PARAGRAPH = RequiredAttribute( # pyright: ignore[reportAssignmentType]
+ "w:val", WD_ALIGN_PARAGRAPH
+ )
+
+
+class CT_PPr(BaseOxmlElement):
+ """``<w:pPr>`` element, containing the properties for a paragraph."""
+
+ get_or_add_ind: Callable[[], CT_Ind]
+ get_or_add_pStyle: Callable[[], CT_String]
+ _insert_sectPr: Callable[[CT_SectPr], None]
+ _remove_pStyle: Callable[[], None]
+ _remove_sectPr: Callable[[], None]
+
+ _tag_seq = (
+ "w:pStyle",
+ "w:keepNext",
+ "w:keepLines",
+ "w:pageBreakBefore",
+ "w:framePr",
+ "w:widowControl",
+ "w:numPr",
+ "w:suppressLineNumbers",
+ "w:pBdr",
+ "w:shd",
+ "w:tabs",
+ "w:suppressAutoHyphens",
+ "w:kinsoku",
+ "w:wordWrap",
+ "w:overflowPunct",
+ "w:topLinePunct",
+ "w:autoSpaceDE",
+ "w:autoSpaceDN",
+ "w:bidi",
+ "w:adjustRightInd",
+ "w:snapToGrid",
+ "w:spacing",
+ "w:ind",
+ "w:contextualSpacing",
+ "w:mirrorIndents",
+ "w:suppressOverlap",
+ "w:jc",
+ "w:textDirection",
+ "w:textAlignment",
+ "w:textboxTightWrap",
+ "w:outlineLvl",
+ "w:divId",
+ "w:cnfStyle",
+ "w:rPr",
+ "w:sectPr",
+ "w:pPrChange",
+ )
+ pStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
+ "w:pStyle", successors=_tag_seq[1:]
+ )
+ keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:])
+ keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:])
+ pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:])
+ widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:])
+ numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:])
+ tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:])
+ spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:])
+ ind: CT_Ind | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
+ "w:ind", successors=_tag_seq[23:]
+ )
+ jc = ZeroOrOne("w:jc", successors=_tag_seq[27:])
+ sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:])
+ del _tag_seq
+
+ @property
+ def first_line_indent(self) -> Length | None:
+ """A |Length| value calculated from the values of `w:ind/@w:firstLine` and
+ `w:ind/@w:hanging`.
+
+ Returns |None| if the `w:ind` child is not present.
+ """
+ ind = self.ind
+ if ind is None:
+ return None
+ hanging = ind.hanging
+ if hanging is not None:
+ return Length(-hanging)
+ firstLine = ind.firstLine
+ if firstLine is None:
+ return None
+ return firstLine
+
+ @first_line_indent.setter
+ def first_line_indent(self, value: Length | None):
+ if self.ind is None and value is None:
+ return
+ ind = self.get_or_add_ind()
+ ind.firstLine = ind.hanging = None
+ if value is None:
+ return
+ elif value < 0:
+ ind.hanging = -value
+ else:
+ ind.firstLine = value
+
+ @property
+ def ind_left(self) -> Length | None:
+ """The value of `w:ind/@w:left` or |None| if not present."""
+ ind = self.ind
+ if ind is None:
+ return None
+ return ind.left
+
+ @ind_left.setter
+ def ind_left(self, value: Length | None):
+ if value is None and self.ind is None:
+ return
+ ind = self.get_or_add_ind()
+ ind.left = value
+
+ @property
+ def ind_right(self) -> Length | None:
+ """The value of `w:ind/@w:right` or |None| if not present."""
+ ind = self.ind
+ if ind is None:
+ return None
+ return ind.right
+
+ @ind_right.setter
+ def ind_right(self, value: Length | None):
+ if value is None and self.ind is None:
+ return
+ ind = self.get_or_add_ind()
+ ind.right = value
+
+ @property
+ def jc_val(self) -> WD_ALIGN_PARAGRAPH | None:
+ """Value of the `<w:jc>` child element or |None| if not present."""
+ return self.jc.val if self.jc is not None else None
+
+ @jc_val.setter
+ def jc_val(self, value):
+ if value is None:
+ self._remove_jc()
+ return
+ self.get_or_add_jc().val = value
+
+ @property
+ def keepLines_val(self):
+ """The value of `keepLines/@val` or |None| if not present."""
+ keepLines = self.keepLines
+ if keepLines is None:
+ return None
+ return keepLines.val
+
+ @keepLines_val.setter
+ def keepLines_val(self, value):
+ if value is None:
+ self._remove_keepLines()
+ else:
+ self.get_or_add_keepLines().val = value
+
+ @property
+ def keepNext_val(self):
+ """The value of `keepNext/@val` or |None| if not present."""
+ keepNext = self.keepNext
+ if keepNext is None:
+ return None
+ return keepNext.val
+
+ @keepNext_val.setter
+ def keepNext_val(self, value):
+ if value is None:
+ self._remove_keepNext()
+ else:
+ self.get_or_add_keepNext().val = value
+
+ @property
+ def pageBreakBefore_val(self):
+ """The value of `pageBreakBefore/@val` or |None| if not present."""
+ pageBreakBefore = self.pageBreakBefore
+ if pageBreakBefore is None:
+ return None
+ return pageBreakBefore.val
+
+ @pageBreakBefore_val.setter
+ def pageBreakBefore_val(self, value):
+ if value is None:
+ self._remove_pageBreakBefore()
+ else:
+ self.get_or_add_pageBreakBefore().val = value
+
+ @property
+ def spacing_after(self):
+ """The value of `w:spacing/@w:after` or |None| if not present."""
+ spacing = self.spacing
+ if spacing is None:
+ return None
+ return spacing.after
+
+ @spacing_after.setter
+ def spacing_after(self, value):
+ if value is None and self.spacing is None:
+ return
+ self.get_or_add_spacing().after = value
+
+ @property
+ def spacing_before(self):
+ """The value of `w:spacing/@w:before` or |None| if not present."""
+ spacing = self.spacing
+ if spacing is None:
+ return None
+ return spacing.before
+
+ @spacing_before.setter
+ def spacing_before(self, value):
+ if value is None and self.spacing is None:
+ return
+ self.get_or_add_spacing().before = value
+
+ @property
+ def spacing_line(self):
+ """The value of `w:spacing/@w:line` or |None| if not present."""
+ spacing = self.spacing
+ if spacing is None:
+ return None
+ return spacing.line
+
+ @spacing_line.setter
+ def spacing_line(self, value):
+ if value is None and self.spacing is None:
+ return
+ self.get_or_add_spacing().line = value
+
+ @property
+ def spacing_lineRule(self):
+ """The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing`
+ enumeration.
+
+ Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the
+ responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and
+ `MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is
+ desired.
+ """
+ spacing = self.spacing
+ if spacing is None:
+ return None
+ lineRule = spacing.lineRule
+ if lineRule is None and spacing.line is not None:
+ return WD_LINE_SPACING.MULTIPLE
+ return lineRule
+
+ @spacing_lineRule.setter
+ def spacing_lineRule(self, value):
+ if value is None and self.spacing is None:
+ return
+ self.get_or_add_spacing().lineRule = value
+
+ @property
+ def style(self) -> str | None:
+ """String contained in `./w:pStyle/@val`, or None if child is not present."""
+ pStyle = self.pStyle
+ if pStyle is None:
+ return None
+ return pStyle.val
+
+ @style.setter
+ def style(self, style: str | None):
+ """Set `./w:pStyle/@val` `style`, adding a new element if necessary.
+
+ If `style` is |None|, remove `./w:pStyle` when present.
+ """
+ if style is None:
+ self._remove_pStyle()
+ return
+ pStyle = self.get_or_add_pStyle()
+ pStyle.val = style
+
+ @property
+ def widowControl_val(self):
+ """The value of `widowControl/@val` or |None| if not present."""
+ widowControl = self.widowControl
+ if widowControl is None:
+ return None
+ return widowControl.val
+
+ @widowControl_val.setter
+ def widowControl_val(self, value):
+ if value is None:
+ self._remove_widowControl()
+ else:
+ self.get_or_add_widowControl().val = value
+
+
+class CT_Spacing(BaseOxmlElement):
+ """``<w:spacing>`` element, specifying paragraph spacing attributes such as space
+ before and line spacing."""
+
+ after = OptionalAttribute("w:after", ST_TwipsMeasure)
+ before = OptionalAttribute("w:before", ST_TwipsMeasure)
+ line = OptionalAttribute("w:line", ST_SignedTwipsMeasure)
+ lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING)
+
+
+class CT_TabStop(BaseOxmlElement):
+ """`<w:tab>` element, representing an individual tab stop.
+
+ Overloaded to use for a tab-character in a run, which also uses the w:tab tag but
+ only needs a __str__ method.
+ """
+
+ val: WD_TAB_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
+ "w:val", WD_TAB_ALIGNMENT
+ )
+ leader: WD_TAB_LEADER | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES
+ )
+ pos: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
+ "w:pos", ST_SignedTwipsMeasure
+ )
+
+ def __str__(self) -> str:
+ """Text equivalent of a `w:tab` element appearing in a run.
+
+ Allows text of run inner-content to be accessed consistently across all text
+ inner-content.
+ """
+ return "\t"
+
+
+class CT_TabStops(BaseOxmlElement):
+ """``<w:tabs>`` element, container for a sorted sequence of tab stops."""
+
+ tab = OneOrMore("w:tab", successors=())
+
+ def insert_tab_in_order(self, pos, align, leader):
+ """Insert a newly created `w:tab` child element in `pos` order."""
+ new_tab = self._new_tab()
+ new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader
+ for tab in self.tab_lst:
+ if new_tab.pos < tab.pos:
+ tab.addprevious(new_tab)
+ return new_tab
+ self.append(new_tab)
+ return new_tab
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
new file mode 100644
index 00000000..88efae83
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
@@ -0,0 +1,276 @@
+"""Custom element classes related to text runs (CT_R)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, Iterator, List
+
+from docx.oxml.drawing import CT_Drawing
+from docx.oxml.ns import qn
+from docx.oxml.simpletypes import ST_BrClear, ST_BrType
+from docx.oxml.text.font import CT_RPr
+from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
+from docx.shared import TextAccumulator
+
+if TYPE_CHECKING:
+ from docx.oxml.shape import CT_Anchor, CT_Inline
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+ from docx.oxml.text.parfmt import CT_TabStop
+
+# ------------------------------------------------------------------------------------
+# Run-level elements
+
+
+class CT_R(BaseOxmlElement):
+ """`<w:r>` element, containing the properties and text for a run."""
+
+ add_br: Callable[[], CT_Br]
+ add_tab: Callable[[], CT_TabStop]
+ get_or_add_rPr: Callable[[], CT_RPr]
+ _add_drawing: Callable[[], CT_Drawing]
+ _add_t: Callable[..., CT_Text]
+
+ rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType]
+ br = ZeroOrMore("w:br")
+ cr = ZeroOrMore("w:cr")
+ drawing = ZeroOrMore("w:drawing")
+ t = ZeroOrMore("w:t")
+ tab = ZeroOrMore("w:tab")
+
+ def add_t(self, text: str) -> CT_Text:
+ """Return a newly added `<w:t>` element containing `text`."""
+ t = self._add_t(text=text)
+ if len(text.strip()) < len(text):
+ t.set(qn("xml:space"), "preserve")
+ return t
+
+ def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
+ """Return newly appended `CT_Drawing` (`w:drawing`) child element.
+
+ The `w:drawing` element has `inline_or_anchor` as its child.
+ """
+ drawing = self._add_drawing()
+ drawing.append(inline_or_anchor)
+ return drawing
+
+ def clear_content(self) -> None:
+ """Remove all child elements except a `w:rPr` element if present."""
+ # -- remove all run inner-content except a `w:rPr` when present. --
+ for e in self.xpath("./*[not(self::w:rPr)]"):
+ self.remove(e)
+
+ @property
+ def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
+ """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+ accum = TextAccumulator()
+
+ def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
+ for e in self.xpath(
+ "w:br"
+ " | w:cr"
+ " | w:drawing"
+ " | w:lastRenderedPageBreak"
+ " | w:noBreakHyphen"
+ " | w:ptab"
+ " | w:t"
+ " | w:tab"
+ ):
+ if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
+ yield from accum.pop()
+ yield e
+ else:
+ accum.push(str(e))
+
+ # -- don't forget the "tail" string --
+ yield from accum.pop()
+
+ return list(iter_items())
+
+ @property
+ def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+ """All `w:lastRenderedPageBreaks` descendants of this run."""
+ return self.xpath("./w:lastRenderedPageBreak")
+
+ @property
+ def style(self) -> str | None:
+ """String contained in `w:val` attribute of `w:rStyle` grandchild.
+
+ |None| if that element is not present.
+ """
+ rPr = self.rPr
+ if rPr is None:
+ return None
+ return rPr.style
+
+ @style.setter
+ def style(self, style: str | None):
+ """Set character style of this `w:r` element to `style`.
+
+ If `style` is None, remove the style element.
+ """
+ rPr = self.get_or_add_rPr()
+ rPr.style = style
+
+ @property
+ def text(self) -> str:
+ """The textual content of this run.
+
+ Inner-content child elements like `w:tab` are translated to their text
+ equivalent.
+ """
+ return "".join(
+ str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
+ )
+
+ @text.setter
+ def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride]
+ self.clear_content()
+ _RunContentAppender.append_to_run_from_text(self, text)
+
+ def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
+ self.insert(0, rPr)
+ return rPr
+
+
+# ------------------------------------------------------------------------------------
+# Run inner-content elements
+
+
+class CT_Br(BaseOxmlElement):
+ """`<w:br>` element, indicating a line, page, or column break in a run."""
+
+ type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:type", ST_BrType, default="textWrapping"
+ )
+ clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore
+
+ def __str__(self) -> str:
+ """Text equivalent of this element. Actual value depends on break type.
+
+ A line break is translated as "\n". Column and page breaks produce the empty
+ string ("").
+
+ This allows the text of run inner-content to be accessed in a consistent way
+ for all run inner-context text elements.
+ """
+ return "\n" if self.type == "textWrapping" else ""
+
+
+class CT_Cr(BaseOxmlElement):
+ """`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
+
+ In Word, this represents a "soft carriage-return" in the sense that it does not end
+ the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
+ text equivalent is considered to be newline ("\n") since in plain-text that's the
+ closest Python equivalent.
+
+ NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
+ `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
+ for many elements.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single newline ("\n")."""
+ return "\n"
+
+
+class CT_NoBreakHyphen(BaseOxmlElement):
+ """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
+
+ This maps to a plain-text dash ("-").
+
+ NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
+ maps to `CT_Empty`. This name was added to give it behavior distinguished from the
+ many other elements represented in the schema by CT_Empty.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single dash character ("-")."""
+ return "-"
+
+
+class CT_PTab(BaseOxmlElement):
+ """`<w:ptab>` element, representing an absolute-position tab character within a run.
+
+ This character advances the rendering position to the specified position regardless
+ of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single tab ("\t") character.
+
+ This allows the text of run inner-content to be accessed in a consistent way
+ for all run inner-context text elements.
+ """
+ return "\t"
+
+
+# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
+# -- element class provides the __str__() method for this empty element, unconditionally
+# -- returning "\t".
+
+
+class CT_Text(BaseOxmlElement):
+ """`<w:t>` element, containing a sequence of characters within a run."""
+
+ def __str__(self) -> str:
+ """Text contained in this element, the empty string if it has no content.
+
+ This property allows this run inner-content element to be queried for its text
+ the same way as other run-content elements are. In particular, this never
+ returns None, as etree._Element does when there is no content.
+ """
+ return self.text or ""
+
+
+# ------------------------------------------------------------------------------------
+# Utility
+
+
+class _RunContentAppender:
+ """Translates a Python string into run content elements appended in a `w:r` element.
+
+ Contiguous sequences of regular characters are appended in a single `<w:t>` element.
+ Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
+ newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
+ appended.
+ """
+
+ def __init__(self, r: CT_R):
+ self._r = r
+ self._bfr: List[str] = []
+
+ @classmethod
+ def append_to_run_from_text(cls, r: CT_R, text: str):
+ """Append inner-content elements for `text` to `r` element."""
+ appender = cls(r)
+ appender.add_text(text)
+
+ def add_text(self, text: str):
+ """Append inner-content elements for `text` to the `w:r` element."""
+ for char in text:
+ self.add_char(char)
+ self.flush()
+
+ def add_char(self, char: str):
+ """Process next character of input through finite state maching (FSM).
+
+ There are two possible states, buffer pending and not pending, but those are
+ hidden behind the `.flush()` method which must be called at the end of text to
+ ensure any pending `<w:t>` element is written.
+ """
+ if char == "\t":
+ self.flush()
+ self._r.add_tab()
+ elif char in "\r\n":
+ self.flush()
+ self._r.add_br()
+ else:
+ self._bfr.append(char)
+
+ def flush(self):
+ text = "".join(self._bfr)
+ if text:
+ self._r.add_t(text)
+ self._bfr.clear()