From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../python3.12/site-packages/docx/oxml/text/run.py | 276 +++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/docx/oxml/text/run.py (limited to '.venv/lib/python3.12/site-packages/docx/oxml/text/run.py') diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py new file mode 100644 index 00000000..88efae83 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py @@ -0,0 +1,276 @@ +"""Custom element classes related to text runs (CT_R).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, Iterator, List + +from docx.oxml.drawing import CT_Drawing +from docx.oxml.ns import qn +from docx.oxml.simpletypes import ST_BrClear, ST_BrType +from docx.oxml.text.font import CT_RPr +from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne +from docx.shared import TextAccumulator + +if TYPE_CHECKING: + from docx.oxml.shape import CT_Anchor, CT_Inline + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + from docx.oxml.text.parfmt import CT_TabStop + +# ------------------------------------------------------------------------------------ +# Run-level elements + + +class CT_R(BaseOxmlElement): + """`` element, containing the properties and text for a run.""" + + add_br: Callable[[], CT_Br] + add_tab: Callable[[], CT_TabStop] + get_or_add_rPr: Callable[[], CT_RPr] + _add_drawing: Callable[[], CT_Drawing] + _add_t: Callable[..., CT_Text] + + rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType] + br = ZeroOrMore("w:br") + cr = ZeroOrMore("w:cr") + drawing = ZeroOrMore("w:drawing") + t = ZeroOrMore("w:t") + tab = ZeroOrMore("w:tab") + + def add_t(self, text: str) -> CT_Text: + """Return a newly added `` element containing `text`.""" + t = self._add_t(text=text) + if len(text.strip()) < len(text): + t.set(qn("xml:space"), "preserve") + return t + + def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing: + """Return newly appended `CT_Drawing` (`w:drawing`) child element. + + The `w:drawing` element has `inline_or_anchor` as its child. + """ + drawing = self._add_drawing() + drawing.append(inline_or_anchor) + return drawing + + def clear_content(self) -> None: + """Remove all child elements except a `w:rPr` element if present.""" + # -- remove all run inner-content except a `w:rPr` when present. -- + for e in self.xpath("./*[not(self::w:rPr)]"): + self.remove(e) + + @property + def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]: + """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements.""" + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + + accum = TextAccumulator() + + def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]: + for e in self.xpath( + "w:br" + " | w:cr" + " | w:drawing" + " | w:lastRenderedPageBreak" + " | w:noBreakHyphen" + " | w:ptab" + " | w:t" + " | w:tab" + ): + if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)): + yield from accum.pop() + yield e + else: + accum.push(str(e)) + + # -- don't forget the "tail" string -- + yield from accum.pop() + + return list(iter_items()) + + @property + def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]: + """All `w:lastRenderedPageBreaks` descendants of this run.""" + return self.xpath("./w:lastRenderedPageBreak") + + @property + def style(self) -> str | None: + """String contained in `w:val` attribute of `w:rStyle` grandchild. + + |None| if that element is not present. + """ + rPr = self.rPr + if rPr is None: + return None + return rPr.style + + @style.setter + def style(self, style: str | None): + """Set character style of this `w:r` element to `style`. + + If `style` is None, remove the style element. + """ + rPr = self.get_or_add_rPr() + rPr.style = style + + @property + def text(self) -> str: + """The textual content of this run. + + Inner-content child elements like `w:tab` are translated to their text + equivalent. + """ + return "".join( + str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab") + ) + + @text.setter + def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride] + self.clear_content() + _RunContentAppender.append_to_run_from_text(self, text) + + def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr: + self.insert(0, rPr) + return rPr + + +# ------------------------------------------------------------------------------------ +# Run inner-content elements + + +class CT_Br(BaseOxmlElement): + """`` element, indicating a line, page, or column break in a run.""" + + type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:type", ST_BrType, default="textWrapping" + ) + clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore + + def __str__(self) -> str: + """Text equivalent of this element. Actual value depends on break type. + + A line break is translated as "\n". Column and page breaks produce the empty + string (""). + + This allows the text of run inner-content to be accessed in a consistent way + for all run inner-context text elements. + """ + return "\n" if self.type == "textWrapping" else "" + + +class CT_Cr(BaseOxmlElement): + """`` element, representing a carriage-return (0x0D) character within a run. + + In Word, this represents a "soft carriage-return" in the sense that it does not end + the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the + text equivalent is considered to be newline ("\n") since in plain-text that's the + closest Python equivalent. + + NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to + `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used + for many elements. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single newline ("\n").""" + return "\n" + + +class CT_NoBreakHyphen(BaseOxmlElement): + """`` element, a hyphen ineligible for a line-wrap position. + + This maps to a plain-text dash ("-"). + + NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen` + maps to `CT_Empty`. This name was added to give it behavior distinguished from the + many other elements represented in the schema by CT_Empty. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single dash character ("-").""" + return "-" + + +class CT_PTab(BaseOxmlElement): + """`` element, representing an absolute-position tab character within a run. + + This character advances the rendering position to the specified position regardless + of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single tab ("\t") character. + + This allows the text of run inner-content to be accessed in a consistent way + for all run inner-context text elements. + """ + return "\t" + + +# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That +# -- element class provides the __str__() method for this empty element, unconditionally +# -- returning "\t". + + +class CT_Text(BaseOxmlElement): + """`` element, containing a sequence of characters within a run.""" + + def __str__(self) -> str: + """Text contained in this element, the empty string if it has no content. + + This property allows this run inner-content element to be queried for its text + the same way as other run-content elements are. In particular, this never + returns None, as etree._Element does when there is no content. + """ + return self.text or "" + + +# ------------------------------------------------------------------------------------ +# Utility + + +class _RunContentAppender: + """Translates a Python string into run content elements appended in a `w:r` element. + + Contiguous sequences of regular characters are appended in a single `` element. + Each tab character ('\t') causes a `` element to be appended. Likewise a + newline or carriage return character ('\n', '\r') causes a `` element to be + appended. + """ + + def __init__(self, r: CT_R): + self._r = r + self._bfr: List[str] = [] + + @classmethod + def append_to_run_from_text(cls, r: CT_R, text: str): + """Append inner-content elements for `text` to `r` element.""" + appender = cls(r) + appender.add_text(text) + + def add_text(self, text: str): + """Append inner-content elements for `text` to the `w:r` element.""" + for char in text: + self.add_char(char) + self.flush() + + def add_char(self, char: str): + """Process next character of input through finite state maching (FSM). + + There are two possible states, buffer pending and not pending, but those are + hidden behind the `.flush()` method which must be called at the end of text to + ensure any pending `` element is written. + """ + if char == "\t": + self.flush() + self._r.add_tab() + elif char in "\r\n": + self.flush() + self._r.add_br() + else: + self._bfr.append(char) + + def flush(self): + text = "".join(self._bfr) + if text: + self._r.add_t(text) + self._bfr.clear() -- cgit v1.2.3