aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml/text/run.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/run.py276
1 files changed, 276 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
new file mode 100644
index 00000000..88efae83
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py
@@ -0,0 +1,276 @@
+"""Custom element classes related to text runs (CT_R)."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable, Iterator, List
+
+from docx.oxml.drawing import CT_Drawing
+from docx.oxml.ns import qn
+from docx.oxml.simpletypes import ST_BrClear, ST_BrType
+from docx.oxml.text.font import CT_RPr
+from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
+from docx.shared import TextAccumulator
+
+if TYPE_CHECKING:
+ from docx.oxml.shape import CT_Anchor, CT_Inline
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+ from docx.oxml.text.parfmt import CT_TabStop
+
+# ------------------------------------------------------------------------------------
+# Run-level elements
+
+
+class CT_R(BaseOxmlElement):
+ """`<w:r>` element, containing the properties and text for a run."""
+
+ add_br: Callable[[], CT_Br]
+ add_tab: Callable[[], CT_TabStop]
+ get_or_add_rPr: Callable[[], CT_RPr]
+ _add_drawing: Callable[[], CT_Drawing]
+ _add_t: Callable[..., CT_Text]
+
+ rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType]
+ br = ZeroOrMore("w:br")
+ cr = ZeroOrMore("w:cr")
+ drawing = ZeroOrMore("w:drawing")
+ t = ZeroOrMore("w:t")
+ tab = ZeroOrMore("w:tab")
+
+ def add_t(self, text: str) -> CT_Text:
+ """Return a newly added `<w:t>` element containing `text`."""
+ t = self._add_t(text=text)
+ if len(text.strip()) < len(text):
+ t.set(qn("xml:space"), "preserve")
+ return t
+
+ def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
+ """Return newly appended `CT_Drawing` (`w:drawing`) child element.
+
+ The `w:drawing` element has `inline_or_anchor` as its child.
+ """
+ drawing = self._add_drawing()
+ drawing.append(inline_or_anchor)
+ return drawing
+
+ def clear_content(self) -> None:
+ """Remove all child elements except a `w:rPr` element if present."""
+ # -- remove all run inner-content except a `w:rPr` when present. --
+ for e in self.xpath("./*[not(self::w:rPr)]"):
+ self.remove(e)
+
+ @property
+ def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
+ """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
+ from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
+
+ accum = TextAccumulator()
+
+ def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
+ for e in self.xpath(
+ "w:br"
+ " | w:cr"
+ " | w:drawing"
+ " | w:lastRenderedPageBreak"
+ " | w:noBreakHyphen"
+ " | w:ptab"
+ " | w:t"
+ " | w:tab"
+ ):
+ if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
+ yield from accum.pop()
+ yield e
+ else:
+ accum.push(str(e))
+
+ # -- don't forget the "tail" string --
+ yield from accum.pop()
+
+ return list(iter_items())
+
+ @property
+ def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
+ """All `w:lastRenderedPageBreaks` descendants of this run."""
+ return self.xpath("./w:lastRenderedPageBreak")
+
+ @property
+ def style(self) -> str | None:
+ """String contained in `w:val` attribute of `w:rStyle` grandchild.
+
+ |None| if that element is not present.
+ """
+ rPr = self.rPr
+ if rPr is None:
+ return None
+ return rPr.style
+
+ @style.setter
+ def style(self, style: str | None):
+ """Set character style of this `w:r` element to `style`.
+
+ If `style` is None, remove the style element.
+ """
+ rPr = self.get_or_add_rPr()
+ rPr.style = style
+
+ @property
+ def text(self) -> str:
+ """The textual content of this run.
+
+ Inner-content child elements like `w:tab` are translated to their text
+ equivalent.
+ """
+ return "".join(
+ str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
+ )
+
+ @text.setter
+ def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride]
+ self.clear_content()
+ _RunContentAppender.append_to_run_from_text(self, text)
+
+ def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
+ self.insert(0, rPr)
+ return rPr
+
+
+# ------------------------------------------------------------------------------------
+# Run inner-content elements
+
+
+class CT_Br(BaseOxmlElement):
+ """`<w:br>` element, indicating a line, page, or column break in a run."""
+
+ type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
+ "w:type", ST_BrType, default="textWrapping"
+ )
+ clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore
+
+ def __str__(self) -> str:
+ """Text equivalent of this element. Actual value depends on break type.
+
+ A line break is translated as "\n". Column and page breaks produce the empty
+ string ("").
+
+ This allows the text of run inner-content to be accessed in a consistent way
+ for all run inner-context text elements.
+ """
+ return "\n" if self.type == "textWrapping" else ""
+
+
+class CT_Cr(BaseOxmlElement):
+ """`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
+
+ In Word, this represents a "soft carriage-return" in the sense that it does not end
+ the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
+ text equivalent is considered to be newline ("\n") since in plain-text that's the
+ closest Python equivalent.
+
+ NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
+ `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
+ for many elements.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single newline ("\n")."""
+ return "\n"
+
+
+class CT_NoBreakHyphen(BaseOxmlElement):
+ """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
+
+ This maps to a plain-text dash ("-").
+
+ NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
+ maps to `CT_Empty`. This name was added to give it behavior distinguished from the
+ many other elements represented in the schema by CT_Empty.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single dash character ("-")."""
+ return "-"
+
+
+class CT_PTab(BaseOxmlElement):
+ """`<w:ptab>` element, representing an absolute-position tab character within a run.
+
+ This character advances the rendering position to the specified position regardless
+ of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
+ """
+
+ def __str__(self) -> str:
+ """Text equivalent of this element, a single tab ("\t") character.
+
+ This allows the text of run inner-content to be accessed in a consistent way
+ for all run inner-context text elements.
+ """
+ return "\t"
+
+
+# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
+# -- element class provides the __str__() method for this empty element, unconditionally
+# -- returning "\t".
+
+
+class CT_Text(BaseOxmlElement):
+ """`<w:t>` element, containing a sequence of characters within a run."""
+
+ def __str__(self) -> str:
+ """Text contained in this element, the empty string if it has no content.
+
+ This property allows this run inner-content element to be queried for its text
+ the same way as other run-content elements are. In particular, this never
+ returns None, as etree._Element does when there is no content.
+ """
+ return self.text or ""
+
+
+# ------------------------------------------------------------------------------------
+# Utility
+
+
+class _RunContentAppender:
+ """Translates a Python string into run content elements appended in a `w:r` element.
+
+ Contiguous sequences of regular characters are appended in a single `<w:t>` element.
+ Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
+ newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
+ appended.
+ """
+
+ def __init__(self, r: CT_R):
+ self._r = r
+ self._bfr: List[str] = []
+
+ @classmethod
+ def append_to_run_from_text(cls, r: CT_R, text: str):
+ """Append inner-content elements for `text` to `r` element."""
+ appender = cls(r)
+ appender.add_text(text)
+
+ def add_text(self, text: str):
+ """Append inner-content elements for `text` to the `w:r` element."""
+ for char in text:
+ self.add_char(char)
+ self.flush()
+
+ def add_char(self, char: str):
+ """Process next character of input through finite state maching (FSM).
+
+ There are two possible states, buffer pending and not pending, but those are
+ hidden behind the `.flush()` method which must be called at the end of text to
+ ensure any pending `<w:t>` element is written.
+ """
+ if char == "\t":
+ self.flush()
+ self._r.add_tab()
+ elif char in "\r\n":
+ self.flush()
+ self._r.add_br()
+ else:
+ self._bfr.append(char)
+
+ def flush(self):
+ text = "".join(self._bfr)
+ if text:
+ self._r.add_t(text)
+ self._bfr.clear()