diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml')
23 files changed, 5801 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/__init__.py b/.venv/lib/python3.12/site-packages/docx/oxml/__init__.py new file mode 100644 index 00000000..bf32932f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/__init__.py @@ -0,0 +1,243 @@ +"""Initializes oxml sub-package. + +This including registering custom element classes corresponding to Open XML elements. +""" + +from __future__ import annotations + +from docx.oxml.drawing import CT_Drawing +from docx.oxml.parser import OxmlElement, parse_xml, register_element_cls +from docx.oxml.shape import ( + CT_Anchor, + CT_Blip, + CT_BlipFillProperties, + CT_GraphicalObject, + CT_GraphicalObjectData, + CT_Inline, + CT_NonVisualDrawingProps, + CT_Picture, + CT_PictureNonVisual, + CT_Point2D, + CT_PositiveSize2D, + CT_ShapeProperties, + CT_Transform2D, +) +from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String +from docx.oxml.text.hyperlink import CT_Hyperlink +from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak +from docx.oxml.text.run import ( + CT_R, + CT_Br, + CT_Cr, + CT_NoBreakHyphen, + CT_PTab, + CT_Text, +) + +# -- `OxmlElement` and `parse_xml()` are not used in this module but several downstream +# -- "extension" packages expect to find them here and there's no compelling reason +# -- not to republish them here so those keep working. +__all__ = ["OxmlElement", "parse_xml"] + +# --------------------------------------------------------------------------- +# DrawingML-related elements + +register_element_cls("a:blip", CT_Blip) +register_element_cls("a:ext", CT_PositiveSize2D) +register_element_cls("a:graphic", CT_GraphicalObject) +register_element_cls("a:graphicData", CT_GraphicalObjectData) +register_element_cls("a:off", CT_Point2D) +register_element_cls("a:xfrm", CT_Transform2D) +register_element_cls("pic:blipFill", CT_BlipFillProperties) +register_element_cls("pic:cNvPr", CT_NonVisualDrawingProps) +register_element_cls("pic:nvPicPr", CT_PictureNonVisual) +register_element_cls("pic:pic", CT_Picture) +register_element_cls("pic:spPr", CT_ShapeProperties) +register_element_cls("w:drawing", CT_Drawing) +register_element_cls("wp:anchor", CT_Anchor) +register_element_cls("wp:docPr", CT_NonVisualDrawingProps) +register_element_cls("wp:extent", CT_PositiveSize2D) +register_element_cls("wp:inline", CT_Inline) + +# --------------------------------------------------------------------------- +# hyperlink-related elements + +register_element_cls("w:hyperlink", CT_Hyperlink) + +# --------------------------------------------------------------------------- +# text-related elements + +register_element_cls("w:br", CT_Br) +register_element_cls("w:cr", CT_Cr) +register_element_cls("w:lastRenderedPageBreak", CT_LastRenderedPageBreak) +register_element_cls("w:noBreakHyphen", CT_NoBreakHyphen) +register_element_cls("w:ptab", CT_PTab) +register_element_cls("w:r", CT_R) +register_element_cls("w:t", CT_Text) + +# --------------------------------------------------------------------------- +# header/footer-related mappings + +register_element_cls("w:evenAndOddHeaders", CT_OnOff) +register_element_cls("w:titlePg", CT_OnOff) + +# --------------------------------------------------------------------------- +# other custom element class mappings + +from .coreprops import CT_CoreProperties # noqa + +register_element_cls("cp:coreProperties", CT_CoreProperties) + +from .document import CT_Body, CT_Document # noqa + +register_element_cls("w:body", CT_Body) +register_element_cls("w:document", CT_Document) + +from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr # noqa + +register_element_cls("w:abstractNumId", CT_DecimalNumber) +register_element_cls("w:ilvl", CT_DecimalNumber) +register_element_cls("w:lvlOverride", CT_NumLvl) +register_element_cls("w:num", CT_Num) +register_element_cls("w:numId", CT_DecimalNumber) +register_element_cls("w:numPr", CT_NumPr) +register_element_cls("w:numbering", CT_Numbering) +register_element_cls("w:startOverride", CT_DecimalNumber) + +from .section import ( # noqa + CT_HdrFtr, + CT_HdrFtrRef, + CT_PageMar, + CT_PageSz, + CT_SectPr, + CT_SectType, +) + +register_element_cls("w:footerReference", CT_HdrFtrRef) +register_element_cls("w:ftr", CT_HdrFtr) +register_element_cls("w:hdr", CT_HdrFtr) +register_element_cls("w:headerReference", CT_HdrFtrRef) +register_element_cls("w:pgMar", CT_PageMar) +register_element_cls("w:pgSz", CT_PageSz) +register_element_cls("w:sectPr", CT_SectPr) +register_element_cls("w:type", CT_SectType) + +from .settings import CT_Settings # noqa + +register_element_cls("w:settings", CT_Settings) + +from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles # noqa + +register_element_cls("w:basedOn", CT_String) +register_element_cls("w:latentStyles", CT_LatentStyles) +register_element_cls("w:locked", CT_OnOff) +register_element_cls("w:lsdException", CT_LsdException) +register_element_cls("w:name", CT_String) +register_element_cls("w:next", CT_String) +register_element_cls("w:qFormat", CT_OnOff) +register_element_cls("w:semiHidden", CT_OnOff) +register_element_cls("w:style", CT_Style) +register_element_cls("w:styles", CT_Styles) +register_element_cls("w:uiPriority", CT_DecimalNumber) +register_element_cls("w:unhideWhenUsed", CT_OnOff) + +from .table import ( # noqa + CT_Height, + CT_Row, + CT_Tbl, + CT_TblGrid, + CT_TblGridCol, + CT_TblLayoutType, + CT_TblPr, + CT_TblPrEx, + CT_TblWidth, + CT_Tc, + CT_TcPr, + CT_TrPr, + CT_VMerge, + CT_VerticalJc, +) + +register_element_cls("w:bidiVisual", CT_OnOff) +register_element_cls("w:gridAfter", CT_DecimalNumber) +register_element_cls("w:gridBefore", CT_DecimalNumber) +register_element_cls("w:gridCol", CT_TblGridCol) +register_element_cls("w:gridSpan", CT_DecimalNumber) +register_element_cls("w:tbl", CT_Tbl) +register_element_cls("w:tblGrid", CT_TblGrid) +register_element_cls("w:tblLayout", CT_TblLayoutType) +register_element_cls("w:tblPr", CT_TblPr) +register_element_cls("w:tblPrEx", CT_TblPrEx) +register_element_cls("w:tblStyle", CT_String) +register_element_cls("w:tc", CT_Tc) +register_element_cls("w:tcPr", CT_TcPr) +register_element_cls("w:tcW", CT_TblWidth) +register_element_cls("w:tr", CT_Row) +register_element_cls("w:trHeight", CT_Height) +register_element_cls("w:trPr", CT_TrPr) +register_element_cls("w:vAlign", CT_VerticalJc) +register_element_cls("w:vMerge", CT_VMerge) + +from .text.font import ( # noqa + CT_Color, + CT_Fonts, + CT_Highlight, + CT_HpsMeasure, + CT_RPr, + CT_Underline, + CT_VerticalAlignRun, +) + +register_element_cls("w:b", CT_OnOff) +register_element_cls("w:bCs", CT_OnOff) +register_element_cls("w:caps", CT_OnOff) +register_element_cls("w:color", CT_Color) +register_element_cls("w:cs", CT_OnOff) +register_element_cls("w:dstrike", CT_OnOff) +register_element_cls("w:emboss", CT_OnOff) +register_element_cls("w:highlight", CT_Highlight) +register_element_cls("w:i", CT_OnOff) +register_element_cls("w:iCs", CT_OnOff) +register_element_cls("w:imprint", CT_OnOff) +register_element_cls("w:noProof", CT_OnOff) +register_element_cls("w:oMath", CT_OnOff) +register_element_cls("w:outline", CT_OnOff) +register_element_cls("w:rFonts", CT_Fonts) +register_element_cls("w:rPr", CT_RPr) +register_element_cls("w:rStyle", CT_String) +register_element_cls("w:rtl", CT_OnOff) +register_element_cls("w:shadow", CT_OnOff) +register_element_cls("w:smallCaps", CT_OnOff) +register_element_cls("w:snapToGrid", CT_OnOff) +register_element_cls("w:specVanish", CT_OnOff) +register_element_cls("w:strike", CT_OnOff) +register_element_cls("w:sz", CT_HpsMeasure) +register_element_cls("w:u", CT_Underline) +register_element_cls("w:vanish", CT_OnOff) +register_element_cls("w:vertAlign", CT_VerticalAlignRun) +register_element_cls("w:webHidden", CT_OnOff) + +from .text.paragraph import CT_P # noqa + +register_element_cls("w:p", CT_P) + +from .text.parfmt import ( # noqa + CT_Ind, + CT_Jc, + CT_PPr, + CT_Spacing, + CT_TabStop, + CT_TabStops, +) + +register_element_cls("w:ind", CT_Ind) +register_element_cls("w:jc", CT_Jc) +register_element_cls("w:keepLines", CT_OnOff) +register_element_cls("w:keepNext", CT_OnOff) +register_element_cls("w:pageBreakBefore", CT_OnOff) +register_element_cls("w:pPr", CT_PPr) +register_element_cls("w:pStyle", CT_String) +register_element_cls("w:spacing", CT_Spacing) +register_element_cls("w:tab", CT_TabStop) +register_element_cls("w:tabs", CT_TabStops) +register_element_cls("w:widowControl", CT_OnOff) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/coreprops.py b/.venv/lib/python3.12/site-packages/docx/oxml/coreprops.py new file mode 100644 index 00000000..8ba9ff42 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/coreprops.py @@ -0,0 +1,298 @@ +"""Custom element classes for core properties-related XML elements.""" + +from __future__ import annotations + +import datetime as dt +import re +from typing import TYPE_CHECKING, Any, Callable + +from docx.oxml.ns import nsdecls, qn +from docx.oxml.parser import parse_xml +from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne + +if TYPE_CHECKING: + from lxml.etree import _Element as etree_Element # pyright: ignore[reportPrivateUsage] + + +class CT_CoreProperties(BaseOxmlElement): + """`<cp:coreProperties>` element, the root element of the Core Properties part. + + Stored as `/docProps/core.xml`. Implements many of the Dublin Core document metadata + elements. String elements resolve to an empty string ("") if the element is not + present in the XML. String elements are limited in length to 255 unicode characters. + """ + + get_or_add_revision: Callable[[], etree_Element] + + category = ZeroOrOne("cp:category", successors=()) + contentStatus = ZeroOrOne("cp:contentStatus", successors=()) + created = ZeroOrOne("dcterms:created", successors=()) + creator = ZeroOrOne("dc:creator", successors=()) + description = ZeroOrOne("dc:description", successors=()) + identifier = ZeroOrOne("dc:identifier", successors=()) + keywords = ZeroOrOne("cp:keywords", successors=()) + language = ZeroOrOne("dc:language", successors=()) + lastModifiedBy = ZeroOrOne("cp:lastModifiedBy", successors=()) + lastPrinted = ZeroOrOne("cp:lastPrinted", successors=()) + modified = ZeroOrOne("dcterms:modified", successors=()) + revision: etree_Element | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "cp:revision", successors=() + ) + subject = ZeroOrOne("dc:subject", successors=()) + title = ZeroOrOne("dc:title", successors=()) + version = ZeroOrOne("cp:version", successors=()) + + _coreProperties_tmpl = "<cp:coreProperties %s/>\n" % nsdecls("cp", "dc", "dcterms") + + @classmethod + def new(cls): + """Return a new `<cp:coreProperties>` element.""" + xml = cls._coreProperties_tmpl + coreProperties = parse_xml(xml) + return coreProperties + + @property + def author_text(self): + """The text in the `dc:creator` child element.""" + return self._text_of_element("creator") + + @author_text.setter + def author_text(self, value: str): + self._set_element_text("creator", value) + + @property + def category_text(self) -> str: + return self._text_of_element("category") + + @category_text.setter + def category_text(self, value: str): + self._set_element_text("category", value) + + @property + def comments_text(self) -> str: + return self._text_of_element("description") + + @comments_text.setter + def comments_text(self, value: str): + self._set_element_text("description", value) + + @property + def contentStatus_text(self): + return self._text_of_element("contentStatus") + + @contentStatus_text.setter + def contentStatus_text(self, value: str): + self._set_element_text("contentStatus", value) + + @property + def created_datetime(self): + return self._datetime_of_element("created") + + @created_datetime.setter + def created_datetime(self, value: dt.datetime): + self._set_element_datetime("created", value) + + @property + def identifier_text(self): + return self._text_of_element("identifier") + + @identifier_text.setter + def identifier_text(self, value: str): + self._set_element_text("identifier", value) + + @property + def keywords_text(self): + return self._text_of_element("keywords") + + @keywords_text.setter + def keywords_text(self, value: str): + self._set_element_text("keywords", value) + + @property + def language_text(self): + return self._text_of_element("language") + + @language_text.setter + def language_text(self, value: str): + self._set_element_text("language", value) + + @property + def lastModifiedBy_text(self): + return self._text_of_element("lastModifiedBy") + + @lastModifiedBy_text.setter + def lastModifiedBy_text(self, value: str): + self._set_element_text("lastModifiedBy", value) + + @property + def lastPrinted_datetime(self): + return self._datetime_of_element("lastPrinted") + + @lastPrinted_datetime.setter + def lastPrinted_datetime(self, value: dt.datetime): + self._set_element_datetime("lastPrinted", value) + + @property + def modified_datetime(self) -> dt.datetime | None: + return self._datetime_of_element("modified") + + @modified_datetime.setter + def modified_datetime(self, value: dt.datetime): + self._set_element_datetime("modified", value) + + @property + def revision_number(self): + """Integer value of revision property.""" + revision = self.revision + if revision is None: + return 0 + revision_str = str(revision.text) + try: + revision = int(revision_str) + except ValueError: + # non-integer revision strings also resolve to 0 + revision = 0 + # as do negative integers + if revision < 0: + revision = 0 + return revision + + @revision_number.setter + def revision_number(self, value: int): + """Set revision property to string value of integer `value`.""" + if not isinstance(value, int) or value < 1: # pyright: ignore[reportUnnecessaryIsInstance] + tmpl = "revision property requires positive int, got '%s'" + raise ValueError(tmpl % value) + revision = self.get_or_add_revision() + revision.text = str(value) + + @property + def subject_text(self): + return self._text_of_element("subject") + + @subject_text.setter + def subject_text(self, value: str): + self._set_element_text("subject", value) + + @property + def title_text(self): + return self._text_of_element("title") + + @title_text.setter + def title_text(self, value: str): + self._set_element_text("title", value) + + @property + def version_text(self): + return self._text_of_element("version") + + @version_text.setter + def version_text(self, value: str): + self._set_element_text("version", value) + + def _datetime_of_element(self, property_name: str) -> dt.datetime | None: + element = getattr(self, property_name) + if element is None: + return None + datetime_str = element.text + try: + return self._parse_W3CDTF_to_datetime(datetime_str) + except ValueError: + # invalid datetime strings are ignored + return None + + def _get_or_add(self, prop_name: str) -> BaseOxmlElement: + """Return element returned by "get_or_add_" method for `prop_name`.""" + get_or_add_method_name = "get_or_add_%s" % prop_name + get_or_add_method = getattr(self, get_or_add_method_name) + element = get_or_add_method() + return element + + @classmethod + def _offset_dt(cls, dt_: dt.datetime, offset_str: str) -> dt.datetime: + """A |datetime| instance offset from `dt_` by timezone offset in `offset_str`. + + `offset_str` is like `"-07:00"`. + """ + match = cls._offset_pattern.match(offset_str) + if match is None: + raise ValueError("'%s' is not a valid offset string" % offset_str) + sign, hours_str, minutes_str = match.groups() + sign_factor = -1 if sign == "+" else 1 + hours = int(hours_str) * sign_factor + minutes = int(minutes_str) * sign_factor + td = dt.timedelta(hours=hours, minutes=minutes) + return dt_ + td + + _offset_pattern = re.compile(r"([+-])(\d\d):(\d\d)") + + @classmethod + def _parse_W3CDTF_to_datetime(cls, w3cdtf_str: str) -> dt.datetime: + # valid W3CDTF date cases: + # yyyy e.g. "2003" + # yyyy-mm e.g. "2003-12" + # yyyy-mm-dd e.g. "2003-12-31" + # UTC timezone e.g. "2003-12-31T10:14:55Z" + # numeric timezone e.g. "2003-12-31T10:14:55-08:00" + templates = ( + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%d", + "%Y-%m", + "%Y", + ) + # strptime isn't smart enough to parse literal timezone offsets like + # "-07:30", so we have to do it ourselves + parseable_part = w3cdtf_str[:19] + offset_str = w3cdtf_str[19:] + dt_ = None + for tmpl in templates: + try: + dt_ = dt.datetime.strptime(parseable_part, tmpl) + except ValueError: + continue + if dt_ is None: + tmpl = "could not parse W3CDTF datetime string '%s'" + raise ValueError(tmpl % w3cdtf_str) + if len(offset_str) == 6: + dt_ = cls._offset_dt(dt_, offset_str) + return dt_.replace(tzinfo=dt.timezone.utc) + + def _set_element_datetime(self, prop_name: str, value: dt.datetime): + """Set date/time value of child element having `prop_name` to `value`.""" + if not isinstance(value, dt.datetime): # pyright: ignore[reportUnnecessaryIsInstance] + tmpl = "property requires <type 'datetime.datetime'> object, got %s" + raise ValueError(tmpl % type(value)) + element = self._get_or_add(prop_name) + dt_str = value.strftime("%Y-%m-%dT%H:%M:%SZ") + element.text = dt_str + if prop_name in ("created", "modified"): + # These two require an explicit "xsi:type="dcterms:W3CDTF"" + # attribute. The first and last line are a hack required to add + # the xsi namespace to the root element rather than each child + # element in which it is referenced + self.set(qn("xsi:foo"), "bar") + element.set(qn("xsi:type"), "dcterms:W3CDTF") + del self.attrib[qn("xsi:foo")] + + def _set_element_text(self, prop_name: str, value: Any) -> None: + """Set string value of `name` property to `value`.""" + if not isinstance(value, str): + value = str(value) + + if len(value) > 255: + tmpl = "exceeded 255 char limit for property, got:\n\n'%s'" + raise ValueError(tmpl % value) + element = self._get_or_add(prop_name) + element.text = value + + def _text_of_element(self, property_name: str) -> str: + """The text in the element matching `property_name`. + + The empty string if the element is not present or contains no text. + """ + element = getattr(self, property_name) + if element is None: + return "" + if element.text is None: + return "" + return element.text diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/document.py b/.venv/lib/python3.12/site-packages/docx/oxml/document.py new file mode 100644 index 00000000..36819ef7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/document.py @@ -0,0 +1,88 @@ +"""Custom element classes that correspond to the document part, e.g. <w:document>.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, List + +from docx.oxml.section import CT_SectPr +from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne + +if TYPE_CHECKING: + from docx.oxml.table import CT_Tbl + from docx.oxml.text.paragraph import CT_P + + +class CT_Document(BaseOxmlElement): + """``<w:document>`` element, the root element of a document.xml file.""" + + body: CT_Body = ZeroOrOne("w:body") # pyright: ignore[reportAssignmentType] + + @property + def sectPr_lst(self) -> List[CT_SectPr]: + """All `w:sectPr` elements directly accessible from document element. + + Note this does not include a `sectPr` child in a paragraphs wrapped in + revision marks or other intervening layer, perhaps `w:sdt` or customXml + elements. + + `w:sectPr` elements appear in document order. The last one is always + `w:body/w:sectPr`, all preceding are `w:p/w:pPr/w:sectPr`. + """ + xpath = "./w:body/w:p/w:pPr/w:sectPr | ./w:body/w:sectPr" + return self.xpath(xpath) + + +class CT_Body(BaseOxmlElement): + """`w:body`, the container element for the main document story in `document.xml`.""" + + add_p: Callable[[], CT_P] + get_or_add_sectPr: Callable[[], CT_SectPr] + p_lst: List[CT_P] + tbl_lst: List[CT_Tbl] + + _insert_tbl: Callable[[CT_Tbl], CT_Tbl] + + p = ZeroOrMore("w:p", successors=("w:sectPr",)) + tbl = ZeroOrMore("w:tbl", successors=("w:sectPr",)) + sectPr: CT_SectPr | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:sectPr", successors=() + ) + + def add_section_break(self) -> CT_SectPr: + """Return `w:sectPr` element for new section added at end of document. + + The last `w:sectPr` becomes the second-to-last, with the new `w:sectPr` being an + exact clone of the previous one, except that all header and footer references + are removed (and are therefore now "inherited" from the prior section). + + A copy of the previously-last `w:sectPr` will now appear in a new `w:p` at the + end of the document. The returned `w:sectPr` is the sentinel `w:sectPr` for the + document (and as implemented, `is` the prior sentinel `w:sectPr` with headers + and footers removed). + """ + # ---get the sectPr at file-end, which controls last section (sections[-1])--- + sentinel_sectPr = self.get_or_add_sectPr() + # ---add exact copy to new `w:p` element; that is now second-to last section--- + self.add_p().set_sectPr(sentinel_sectPr.clone()) + # ---remove any header or footer references from "new" last section--- + for hdrftr_ref in sentinel_sectPr.xpath("w:headerReference|w:footerReference"): + sentinel_sectPr.remove(hdrftr_ref) + # ---the sentinel `w:sectPr` now controls the new last section--- + return sentinel_sectPr + + def clear_content(self): + """Remove all content child elements from this <w:body> element. + + Leave the <w:sectPr> element if it is present. + """ + for content_elm in self.xpath("./*[not(self::w:sectPr)]"): + self.remove(content_elm) + + @property + def inner_content_elements(self) -> List[CT_P | CT_Tbl]: + """Generate all `w:p` and `w:tbl` elements in this document-body. + + Elements appear in document order. Elements shaded by nesting in a `w:ins` or + other "wrapper" element will not be included. + """ + return self.xpath("./w:p | ./w:tbl") diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/drawing.py b/.venv/lib/python3.12/site-packages/docx/oxml/drawing.py new file mode 100644 index 00000000..5b627f97 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/drawing.py @@ -0,0 +1,11 @@ +"""Custom element-classes for DrawingML-related elements like `<w:drawing>`. + +For legacy reasons, many DrawingML-related elements are in `docx.oxml.shape`. Expect +those to move over here as we have reason to touch them. +""" + +from docx.oxml.xmlchemy import BaseOxmlElement + + +class CT_Drawing(BaseOxmlElement): + """`<w:drawing>` element, containing a DrawingML object like a picture or chart.""" diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/exceptions.py b/.venv/lib/python3.12/site-packages/docx/oxml/exceptions.py new file mode 100644 index 00000000..8919239a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/exceptions.py @@ -0,0 +1,10 @@ +"""Exceptions for oxml sub-package.""" + + +class XmlchemyError(Exception): + """Generic error class.""" + + +class InvalidXmlError(XmlchemyError): + """Raised when invalid XML is encountered, such as on attempt to access a missing + required child element.""" diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/ns.py b/.venv/lib/python3.12/site-packages/docx/oxml/ns.py new file mode 100644 index 00000000..5bed1e6a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/ns.py @@ -0,0 +1,109 @@ +"""Namespace-related objects.""" + +from __future__ import annotations + +from typing import Any, Dict + +nsmap = { + "a": "http://schemas.openxmlformats.org/drawingml/2006/main", + "c": "http://schemas.openxmlformats.org/drawingml/2006/chart", + "cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties", + "dc": "http://purl.org/dc/elements/1.1/", + "dcmitype": "http://purl.org/dc/dcmitype/", + "dcterms": "http://purl.org/dc/terms/", + "dgm": "http://schemas.openxmlformats.org/drawingml/2006/diagram", + "m": "http://schemas.openxmlformats.org/officeDocument/2006/math", + "pic": "http://schemas.openxmlformats.org/drawingml/2006/picture", + "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + "sl": "http://schemas.openxmlformats.org/schemaLibrary/2006/main", + "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main", + "w14": "http://schemas.microsoft.com/office/word/2010/wordml", + "wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", + "xml": "http://www.w3.org/XML/1998/namespace", + "xsi": "http://www.w3.org/2001/XMLSchema-instance", +} + +pfxmap = {value: key for key, value in nsmap.items()} + + +class NamespacePrefixedTag(str): + """Value object that knows the semantics of an XML tag having a namespace prefix.""" + + def __new__(cls, nstag: str, *args: Any): + return super(NamespacePrefixedTag, cls).__new__(cls, nstag) + + def __init__(self, nstag: str): + self._pfx, self._local_part = nstag.split(":") + self._ns_uri = nsmap[self._pfx] + + @property + def clark_name(self) -> str: + return "{%s}%s" % (self._ns_uri, self._local_part) + + @classmethod + def from_clark_name(cls, clark_name: str) -> NamespacePrefixedTag: + nsuri, local_name = clark_name[1:].split("}") + nstag = "%s:%s" % (pfxmap[nsuri], local_name) + return cls(nstag) + + @property + def local_part(self) -> str: + """The local part of this tag. + + E.g. "foobar" is returned for tag "f:foobar". + """ + return self._local_part + + @property + def nsmap(self) -> Dict[str, str]: + """Single-member dict mapping prefix of this tag to it's namespace name. + + Example: `{"f": "http://foo/bar"}`. This is handy for passing to xpath calls + and other uses. + """ + return {self._pfx: self._ns_uri} + + @property + def nspfx(self) -> str: + """The namespace-prefix for this tag. + + For example, "f" is returned for tag "f:foobar". + """ + return self._pfx + + @property + def nsuri(self) -> str: + """The namespace URI for this tag. + + For example, "http://foo/bar" would be returned for tag "f:foobar" if the "f" + prefix maps to "http://foo/bar" in nsmap. + """ + return self._ns_uri + + +def nsdecls(*prefixes: str) -> str: + """Namespace declaration including each namespace-prefix in `prefixes`. + + Handy for adding required namespace declarations to a tree root element. + """ + return " ".join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes]) + + +def nspfxmap(*nspfxs: str) -> Dict[str, str]: + """Subset namespace-prefix mappings specified by *nspfxs*. + + Any number of namespace prefixes can be supplied, e.g. namespaces("a", "r", "p"). + """ + return {pfx: nsmap[pfx] for pfx in nspfxs} + + +def qn(tag: str) -> str: + """Stands for "qualified name". + + This utility function converts a familiar namespace-prefixed tag name like "w:p" + into a Clark-notation qualified tag name for lxml. For example, `qn("w:p")` returns + "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p". + """ + prefix, tagroot = tag.split(":") + uri = nsmap[prefix] + return "{%s}%s" % (uri, tagroot) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/numbering.py b/.venv/lib/python3.12/site-packages/docx/oxml/numbering.py new file mode 100644 index 00000000..3512de65 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/numbering.py @@ -0,0 +1,109 @@ +"""Custom element classes related to the numbering part.""" + +from docx.oxml.parser import OxmlElement +from docx.oxml.shared import CT_DecimalNumber +from docx.oxml.simpletypes import ST_DecimalNumber +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OneAndOnlyOne, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) + + +class CT_Num(BaseOxmlElement): + """``<w:num>`` element, which represents a concrete list definition instance, having + a required child <w:abstractNumId> that references an abstract numbering definition + that defines most of the formatting details.""" + + abstractNumId = OneAndOnlyOne("w:abstractNumId") + lvlOverride = ZeroOrMore("w:lvlOverride") + numId = RequiredAttribute("w:numId", ST_DecimalNumber) + + def add_lvlOverride(self, ilvl): + """Return a newly added CT_NumLvl (<w:lvlOverride>) element having its ``ilvl`` + attribute set to `ilvl`.""" + return self._add_lvlOverride(ilvl=ilvl) + + @classmethod + def new(cls, num_id, abstractNum_id): + """Return a new ``<w:num>`` element having numId of `num_id` and having a + ``<w:abstractNumId>`` child with val attribute set to `abstractNum_id`.""" + num = OxmlElement("w:num") + num.numId = num_id + abstractNumId = CT_DecimalNumber.new("w:abstractNumId", abstractNum_id) + num.append(abstractNumId) + return num + + +class CT_NumLvl(BaseOxmlElement): + """``<w:lvlOverride>`` element, which identifies a level in a list definition to + override with settings it contains.""" + + startOverride = ZeroOrOne("w:startOverride", successors=("w:lvl",)) + ilvl = RequiredAttribute("w:ilvl", ST_DecimalNumber) + + def add_startOverride(self, val): + """Return a newly added CT_DecimalNumber element having tagname + ``w:startOverride`` and ``val`` attribute set to `val`.""" + return self._add_startOverride(val=val) + + +class CT_NumPr(BaseOxmlElement): + """A ``<w:numPr>`` element, a container for numbering properties applied to a + paragraph.""" + + ilvl = ZeroOrOne("w:ilvl", successors=("w:numId", "w:numberingChange", "w:ins")) + numId = ZeroOrOne("w:numId", successors=("w:numberingChange", "w:ins")) + + # @ilvl.setter + # def _set_ilvl(self, val): + # """ + # Get or add a <w:ilvl> child and set its ``w:val`` attribute to `val`. + # """ + # ilvl = self.get_or_add_ilvl() + # ilvl.val = val + + # @numId.setter + # def numId(self, val): + # """ + # Get or add a <w:numId> child and set its ``w:val`` attribute to + # `val`. + # """ + # numId = self.get_or_add_numId() + # numId.val = val + + +class CT_Numbering(BaseOxmlElement): + """``<w:numbering>`` element, the root element of a numbering part, i.e. + numbering.xml.""" + + num = ZeroOrMore("w:num", successors=("w:numIdMacAtCleanup",)) + + def add_num(self, abstractNum_id): + """Return a newly added CT_Num (<w:num>) element referencing the abstract + numbering definition identified by `abstractNum_id`.""" + next_num_id = self._next_numId + num = CT_Num.new(next_num_id, abstractNum_id) + return self._insert_num(num) + + def num_having_numId(self, numId): + """Return the ``<w:num>`` child element having ``numId`` attribute matching + `numId`.""" + xpath = './w:num[@w:numId="%d"]' % numId + try: + return self.xpath(xpath)[0] + except IndexError: + raise KeyError("no <w:num> element with numId %d" % numId) + + @property + def _next_numId(self): + """The first ``numId`` unused by a ``<w:num>`` element, starting at 1 and + filling any gaps in numbering between existing ``<w:num>`` elements.""" + numId_strs = self.xpath("./w:num/@w:numId") + num_ids = [int(numId_str) for numId_str in numId_strs] + for num in range(1, len(num_ids) + 2): + if num not in num_ids: + break + return num diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/parser.py b/.venv/lib/python3.12/site-packages/docx/oxml/parser.py new file mode 100644 index 00000000..e16ba30b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/parser.py @@ -0,0 +1,62 @@ +# pyright: reportImportCycles=false + +"""XML parser for python-docx.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, Type, cast + +from lxml import etree + +from docx.oxml.ns import NamespacePrefixedTag, nsmap + +if TYPE_CHECKING: + from docx.oxml.xmlchemy import BaseOxmlElement + + +# -- configure XML parser -- +element_class_lookup = etree.ElementNamespaceClassLookup() +oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False) +oxml_parser.set_element_class_lookup(element_class_lookup) + + +def parse_xml(xml: str | bytes) -> "BaseOxmlElement": + """Root lxml element obtained by parsing XML character string `xml`. + + The custom parser is used, so custom element classes are produced for elements in + `xml` that have them. + """ + return cast("BaseOxmlElement", etree.fromstring(xml, oxml_parser)) + + +def register_element_cls(tag: str, cls: Type["BaseOxmlElement"]): + """Register an lxml custom element-class to use for `tag`. + + A instance of `cls` to be constructed when the oxml parser encounters an element + with matching `tag`. `tag` is a string of the form `nspfx:tagroot`, e.g. + `'w:document'`. + """ + nspfx, tagroot = tag.split(":") + namespace = element_class_lookup.get_namespace(nsmap[nspfx]) + namespace[tagroot] = cls + + +def OxmlElement( + nsptag_str: str, + attrs: Dict[str, str] | None = None, + nsdecls: Dict[str, str] | None = None, +) -> BaseOxmlElement | etree._Element: # pyright: ignore[reportPrivateUsage] + """Return a 'loose' lxml element having the tag specified by `nsptag_str`. + + The tag in `nsptag_str` must contain the standard namespace prefix, e.g. `a:tbl`. + The resulting element is an instance of the custom element class for this tag name + if one is defined. A dictionary of attribute values may be provided as `attrs`; they + are set if present. All namespaces defined in the dict `nsdecls` are declared in the + element using the key as the prefix and the value as the namespace name. If + `nsdecls` is not provided, a single namespace declaration is added based on the + prefix on `nsptag_str`. + """ + nsptag = NamespacePrefixedTag(nsptag_str) + if nsdecls is None: + nsdecls = nsptag.nsmap + return oxml_parser.makeelement(nsptag.clark_name, attrib=attrs, nsmap=nsdecls) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/section.py b/.venv/lib/python3.12/site-packages/docx/oxml/section.py new file mode 100644 index 00000000..71072e2d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/section.py @@ -0,0 +1,537 @@ +"""Section-related custom element classes.""" + +from __future__ import annotations + +from copy import deepcopy +from typing import Callable, Iterator, List, Sequence, cast + +from lxml import etree +from typing_extensions import TypeAlias + +from docx.enum.section import WD_HEADER_FOOTER, WD_ORIENTATION, WD_SECTION_START +from docx.oxml.ns import nsmap +from docx.oxml.shared import CT_OnOff +from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure, XsdString +from docx.oxml.table import CT_Tbl +from docx.oxml.text.paragraph import CT_P +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OptionalAttribute, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) +from docx.shared import Length, lazyproperty + +BlockElement: TypeAlias = "CT_P | CT_Tbl" + + +class CT_HdrFtr(BaseOxmlElement): + """`w:hdr` and `w:ftr`, the root element for header and footer part respectively.""" + + add_p: Callable[[], CT_P] + p_lst: List[CT_P] + tbl_lst: List[CT_Tbl] + + _insert_tbl: Callable[[CT_Tbl], CT_Tbl] + + p = ZeroOrMore("w:p", successors=()) + tbl = ZeroOrMore("w:tbl", successors=()) + + @property + def inner_content_elements(self) -> List[CT_P | CT_Tbl]: + """Generate all `w:p` and `w:tbl` elements in this header or footer. + + Elements appear in document order. Elements shaded by nesting in a `w:ins` or + other "wrapper" element will not be included. + """ + return self.xpath("./w:p | ./w:tbl") + + +class CT_HdrFtrRef(BaseOxmlElement): + """`w:headerReference` and `w:footerReference` elements.""" + + type_: WD_HEADER_FOOTER = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "w:type", WD_HEADER_FOOTER + ) + rId: str = RequiredAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType] + + +class CT_PageMar(BaseOxmlElement): + """``<w:pgMar>`` element, defining page margins.""" + + top: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:top", ST_SignedTwipsMeasure + ) + right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:right", ST_TwipsMeasure + ) + bottom: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:bottom", ST_SignedTwipsMeasure + ) + left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:left", ST_TwipsMeasure + ) + header: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:header", ST_TwipsMeasure + ) + footer: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:footer", ST_TwipsMeasure + ) + gutter: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:gutter", ST_TwipsMeasure + ) + + +class CT_PageSz(BaseOxmlElement): + """``<w:pgSz>`` element, defining page dimensions and orientation.""" + + w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:w", ST_TwipsMeasure + ) + h: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:h", ST_TwipsMeasure + ) + orient: WD_ORIENTATION = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:orient", WD_ORIENTATION, default=WD_ORIENTATION.PORTRAIT + ) + + +class CT_SectPr(BaseOxmlElement): + """`w:sectPr` element, the container element for section properties.""" + + get_or_add_pgMar: Callable[[], CT_PageMar] + get_or_add_pgSz: Callable[[], CT_PageSz] + get_or_add_titlePg: Callable[[], CT_OnOff] + get_or_add_type: Callable[[], CT_SectType] + _add_footerReference: Callable[[], CT_HdrFtrRef] + _add_headerReference: Callable[[], CT_HdrFtrRef] + _remove_titlePg: Callable[[], None] + _remove_type: Callable[[], None] + + _tag_seq = ( + "w:footnotePr", + "w:endnotePr", + "w:type", + "w:pgSz", + "w:pgMar", + "w:paperSrc", + "w:pgBorders", + "w:lnNumType", + "w:pgNumType", + "w:cols", + "w:formProt", + "w:vAlign", + "w:noEndnote", + "w:titlePg", + "w:textDirection", + "w:bidi", + "w:rtlGutter", + "w:docGrid", + "w:printerSettings", + "w:sectPrChange", + ) + headerReference = ZeroOrMore("w:headerReference", successors=_tag_seq) + footerReference = ZeroOrMore("w:footerReference", successors=_tag_seq) + type: CT_SectType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:type", successors=_tag_seq[3:] + ) + pgSz: CT_PageSz | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:pgSz", successors=_tag_seq[4:] + ) + pgMar: CT_PageMar | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:pgMar", successors=_tag_seq[5:] + ) + titlePg: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:titlePg", successors=_tag_seq[14:] + ) + del _tag_seq + + def add_footerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef: + """Return newly added CT_HdrFtrRef element of `type_` with `rId`. + + The element tag is `w:footerReference`. + """ + footerReference = self._add_footerReference() + footerReference.type_ = type_ + footerReference.rId = rId + return footerReference + + def add_headerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef: + """Return newly added CT_HdrFtrRef element of `type_` with `rId`. + + The element tag is `w:headerReference`. + """ + headerReference = self._add_headerReference() + headerReference.type_ = type_ + headerReference.rId = rId + return headerReference + + @property + def bottom_margin(self) -> Length | None: + """Value of the `w:bottom` attr of `<w:pgMar>` child element, as |Length|. + + |None| when either the element or the attribute is not present. + """ + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.bottom + + @bottom_margin.setter + def bottom_margin(self, value: int | Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.bottom = value if value is None or isinstance(value, Length) else Length(value) + + def clone(self) -> CT_SectPr: + """Return an exact duplicate of this ``<w:sectPr>`` element tree suitable for + use in adding a section break. + + All rsid* attributes are removed from the root ``<w:sectPr>`` element. + """ + cloned_sectPr = deepcopy(self) + cloned_sectPr.attrib.clear() + return cloned_sectPr + + @property + def footer(self) -> Length | None: + """Distance from bottom edge of page to bottom edge of the footer. + + This is the value of the `w:footer` attribute in the `w:pgMar` child element, + as a |Length| object, or |None| if either the element or the attribute is not + present. + """ + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.footer + + @footer.setter + def footer(self, value: int | Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.footer = value if value is None or isinstance(value, Length) else Length(value) + + def get_footerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None: + """Return footerReference element of `type_` or None if not present.""" + path = "./w:footerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_) + footerReferences = self.xpath(path) + if not footerReferences: + return None + return footerReferences[0] + + def get_headerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None: + """Return headerReference element of `type_` or None if not present.""" + matching_headerReferences = self.xpath( + "./w:headerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_) + ) + if len(matching_headerReferences) == 0: + return None + return matching_headerReferences[0] + + @property + def gutter(self) -> Length | None: + """The value of the ``w:gutter`` attribute in the ``<w:pgMar>`` child element, + as a |Length| object, or |None| if either the element or the attribute is not + present.""" + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.gutter + + @gutter.setter + def gutter(self, value: int | Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.gutter = value if value is None or isinstance(value, Length) else Length(value) + + @property + def header(self) -> Length | None: + """Distance from top edge of page to top edge of header. + + This value comes from the `w:header` attribute on the `w:pgMar` child element. + |None| if either the element or the attribute is not present. + """ + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.header + + @header.setter + def header(self, value: int | Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.header = value if value is None or isinstance(value, Length) else Length(value) + + def iter_inner_content(self) -> Iterator[CT_P | CT_Tbl]: + """Generate all `w:p` and `w:tbl` elements in this section. + + Elements appear in document order. Elements shaded by nesting in a `w:ins` or + other "wrapper" element will not be included. + """ + return _SectBlockElementIterator.iter_sect_block_elements(self) + + @property + def left_margin(self) -> Length | None: + """The value of the ``w:left`` attribute in the ``<w:pgMar>`` child element, as + a |Length| object, or |None| if either the element or the attribute is not + present.""" + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.left + + @left_margin.setter + def left_margin(self, value: int | Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.left = value if value is None or isinstance(value, Length) else Length(value) + + @property + def orientation(self) -> WD_ORIENTATION: + """`WD_ORIENTATION` member indicating page-orientation for this section. + + This is the value of the `orient` attribute on the `w:pgSz` child, or + `WD_ORIENTATION.PORTRAIT` if not present. + """ + pgSz = self.pgSz + if pgSz is None: + return WD_ORIENTATION.PORTRAIT + return pgSz.orient + + @orientation.setter + def orientation(self, value: WD_ORIENTATION | None): + pgSz = self.get_or_add_pgSz() + pgSz.orient = value if value else WD_ORIENTATION.PORTRAIT + + @property + def page_height(self) -> Length | None: + """Value in EMU of the `h` attribute of the `w:pgSz` child element. + + |None| if not present. + """ + pgSz = self.pgSz + if pgSz is None: + return None + return pgSz.h + + @page_height.setter + def page_height(self, value: Length | None): + pgSz = self.get_or_add_pgSz() + pgSz.h = value + + @property + def page_width(self) -> Length | None: + """Value in EMU of the ``w`` attribute of the ``<w:pgSz>`` child element. + + |None| if not present. + """ + pgSz = self.pgSz + if pgSz is None: + return None + return pgSz.w + + @page_width.setter + def page_width(self, value: Length | None): + pgSz = self.get_or_add_pgSz() + pgSz.w = value + + @property + def preceding_sectPr(self) -> CT_SectPr | None: + """SectPr immediately preceding this one or None if this is the first.""" + # -- [1] predicate returns list of zero or one value -- + preceding_sectPrs = self.xpath("./preceding::w:sectPr[1]") + return preceding_sectPrs[0] if len(preceding_sectPrs) > 0 else None + + def remove_footerReference(self, type_: WD_HEADER_FOOTER) -> str: + """Return rId of w:footerReference child of `type_` after removing it.""" + footerReference = self.get_footerReference(type_) + if footerReference is None: + # -- should never happen, but to satisfy type-check and just in case -- + raise ValueError("CT_SectPr has no footer reference") + rId = footerReference.rId + self.remove(footerReference) + return rId + + def remove_headerReference(self, type_: WD_HEADER_FOOTER): + """Return rId of w:headerReference child of `type_` after removing it.""" + headerReference = self.get_headerReference(type_) + if headerReference is None: + # -- should never happen, but to satisfy type-check and just in case -- + raise ValueError("CT_SectPr has no header reference") + rId = headerReference.rId + self.remove(headerReference) + return rId + + @property + def right_margin(self) -> Length | None: + """The value of the ``w:right`` attribute in the ``<w:pgMar>`` child element, as + a |Length| object, or |None| if either the element or the attribute is not + present.""" + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.right + + @right_margin.setter + def right_margin(self, value: Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.right = value + + @property + def start_type(self) -> WD_SECTION_START: + """The member of the ``WD_SECTION_START`` enumeration corresponding to the value + of the ``val`` attribute of the ``<w:type>`` child element, or + ``WD_SECTION_START.NEW_PAGE`` if not present.""" + type = self.type + if type is None or type.val is None: + return WD_SECTION_START.NEW_PAGE + return type.val + + @start_type.setter + def start_type(self, value: WD_SECTION_START | None): + if value is None or value is WD_SECTION_START.NEW_PAGE: + self._remove_type() + return + type = self.get_or_add_type() + type.val = value + + @property + def titlePg_val(self) -> bool: + """Value of `w:titlePg/@val` or |False| if `./w:titlePg` is not present.""" + titlePg = self.titlePg + if titlePg is None: + return False + return titlePg.val + + @titlePg_val.setter + def titlePg_val(self, value: bool | None): + if value in [None, False]: + self._remove_titlePg() + else: + self.get_or_add_titlePg().val = True + + @property + def top_margin(self) -> Length | None: + """The value of the ``w:top`` attribute in the ``<w:pgMar>`` child element, as a + |Length| object, or |None| if either the element or the attribute is not + present.""" + pgMar = self.pgMar + if pgMar is None: + return None + return pgMar.top + + @top_margin.setter + def top_margin(self, value: Length | None): + pgMar = self.get_or_add_pgMar() + pgMar.top = value + + +class CT_SectType(BaseOxmlElement): + """``<w:sectType>`` element, defining the section start type.""" + + val: WD_SECTION_START | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:val", WD_SECTION_START + ) + + +# == HELPERS ========================================================================= + + +class _SectBlockElementIterator: + """Generates the block-item XML elements in a section. + + A block-item element is a `CT_P` (paragraph) or a `CT_Tbl` (table). + """ + + _compiled_blocks_xpath: etree.XPath | None = None + _compiled_count_xpath: etree.XPath | None = None + + def __init__(self, sectPr: CT_SectPr): + self._sectPr = sectPr + + @classmethod + def iter_sect_block_elements(cls, sectPr: CT_SectPr) -> Iterator[BlockElement]: + """Generate each CT_P or CT_Tbl element within extents governed by `sectPr`.""" + return cls(sectPr)._iter_sect_block_elements() + + def _iter_sect_block_elements(self) -> Iterator[BlockElement]: + """Generate each CT_P or CT_Tbl element in section.""" + # -- General strategy is to get all block (<w;p> and <w:tbl>) elements from + # -- start of doc to and including this section, then compute the count of those + # -- elements that came from prior sections and skip that many to leave only the + # -- ones in this section. It's possible to express this "between here and + # -- there" (end of prior section and end of this one) concept in XPath, but it + # -- would be harder to follow because there are special cases (e.g. no prior + # -- section) and the boundary expressions are fairly hairy. I also believe it + # -- would be computationally more expensive than doing it this straighforward + # -- albeit (theoretically) slightly wasteful way. + + sectPr, sectPrs = self._sectPr, self._sectPrs + sectPr_idx = sectPrs.index(sectPr) + + # -- count block items belonging to prior sections -- + n_blks_to_skip = ( + 0 + if sectPr_idx == 0 + else self._count_of_blocks_in_and_above_section(sectPrs[sectPr_idx - 1]) + ) + + # -- and skip those in set of all blks from doc start to end of this section -- + for element in self._blocks_in_and_above_section(sectPr)[n_blks_to_skip:]: + yield element + + def _blocks_in_and_above_section(self, sectPr: CT_SectPr) -> Sequence[BlockElement]: + """All ps and tbls in section defined by `sectPr` and all prior sections.""" + if self._compiled_blocks_xpath is None: + self._compiled_blocks_xpath = etree.XPath( + self._blocks_in_and_above_section_xpath, + namespaces=nsmap, + regexp=False, + ) + xpath = self._compiled_blocks_xpath + # -- XPath callable results are Any (basically), so need a cast. -- + return cast(Sequence[BlockElement], xpath(sectPr)) + + @lazyproperty + def _blocks_in_and_above_section_xpath(self) -> str: + """XPath expr for ps and tbls in context of a sectPr and all prior sectPrs.""" + # -- "p_sect" is a section with sectPr located at w:p/w:pPr/w:sectPr. + # -- "body_sect" is a section with sectPr located at w:body/w:sectPr. The last + # -- section in the document is a "body_sect". All others are of the "p_sect" + # -- variety. "term" means "terminal", like the last p or tbl in the section. + # -- "pred" means "predecessor", like a preceding p or tbl in the section. + + # -- the terminal block in a p-based sect is the p the sectPr appears in -- + p_sect_term_block = "./parent::w:pPr/parent::w:p" + # -- the terminus of a body-based sect is the sectPr itself (not a block) -- + body_sect_term = "self::w:sectPr[parent::w:body]" + # -- all the ps and tbls preceding (but not including) the context node -- + pred_ps_and_tbls = "preceding-sibling::*[self::w:p | self::w:tbl]" + + # -- p_sect_term_block and body_sect_term(inus) are mutually exclusive. So the + # -- result is either the union of nodes found by the first two selectors or the + # -- nodes found by the last selector, never both. + return ( + # -- include the p containing a sectPr -- + f"{p_sect_term_block}" + # -- along with all the blocks that precede it -- + f" | {p_sect_term_block}/{pred_ps_and_tbls}" + # -- or all the preceding blocks if sectPr is body-based (last sectPr) -- + f" | {body_sect_term}/{pred_ps_and_tbls}" + ) + + def _count_of_blocks_in_and_above_section(self, sectPr: CT_SectPr) -> int: + """All ps and tbls in section defined by `sectPr` and all prior sections.""" + if self._compiled_count_xpath is None: + self._compiled_count_xpath = etree.XPath( + f"count({self._blocks_in_and_above_section_xpath})", + namespaces=nsmap, + regexp=False, + ) + xpath = self._compiled_count_xpath + # -- numeric XPath results are always float, so need an int() conversion -- + return int(cast(float, xpath(sectPr))) + + @lazyproperty + def _sectPrs(self) -> Sequence[CT_SectPr]: + """All w:sectPr elements in document, in document-order.""" + return self._sectPr.xpath( + "/w:document/w:body/w:p/w:pPr/w:sectPr | /w:document/w:body/w:sectPr", + ) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/settings.py b/.venv/lib/python3.12/site-packages/docx/oxml/settings.py new file mode 100644 index 00000000..d5bb41a6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/settings.py @@ -0,0 +1,138 @@ +"""Custom element classes related to document settings.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne + +if TYPE_CHECKING: + from docx.oxml.shared import CT_OnOff + + +class CT_Settings(BaseOxmlElement): + """`w:settings` element, root element for the settings part.""" + + get_or_add_evenAndOddHeaders: Callable[[], CT_OnOff] + _remove_evenAndOddHeaders: Callable[[], None] + + _tag_seq = ( + "w:writeProtection", + "w:view", + "w:zoom", + "w:removePersonalInformation", + "w:removeDateAndTime", + "w:doNotDisplayPageBoundaries", + "w:displayBackgroundShape", + "w:printPostScriptOverText", + "w:printFractionalCharacterWidth", + "w:printFormsData", + "w:embedTrueTypeFonts", + "w:embedSystemFonts", + "w:saveSubsetFonts", + "w:saveFormsData", + "w:mirrorMargins", + "w:alignBordersAndEdges", + "w:bordersDoNotSurroundHeader", + "w:bordersDoNotSurroundFooter", + "w:gutterAtTop", + "w:hideSpellingErrors", + "w:hideGrammaticalErrors", + "w:activeWritingStyle", + "w:proofState", + "w:formsDesign", + "w:attachedTemplate", + "w:linkStyles", + "w:stylePaneFormatFilter", + "w:stylePaneSortMethod", + "w:documentType", + "w:mailMerge", + "w:revisionView", + "w:trackRevisions", + "w:doNotTrackMoves", + "w:doNotTrackFormatting", + "w:documentProtection", + "w:autoFormatOverride", + "w:styleLockTheme", + "w:styleLockQFSet", + "w:defaultTabStop", + "w:autoHyphenation", + "w:consecutiveHyphenLimit", + "w:hyphenationZone", + "w:doNotHyphenateCaps", + "w:showEnvelope", + "w:summaryLength", + "w:clickAndTypeStyle", + "w:defaultTableStyle", + "w:evenAndOddHeaders", + "w:bookFoldRevPrinting", + "w:bookFoldPrinting", + "w:bookFoldPrintingSheets", + "w:drawingGridHorizontalSpacing", + "w:drawingGridVerticalSpacing", + "w:displayHorizontalDrawingGridEvery", + "w:displayVerticalDrawingGridEvery", + "w:doNotUseMarginsForDrawingGridOrigin", + "w:drawingGridHorizontalOrigin", + "w:drawingGridVerticalOrigin", + "w:doNotShadeFormData", + "w:noPunctuationKerning", + "w:characterSpacingControl", + "w:printTwoOnOne", + "w:strictFirstAndLastChars", + "w:noLineBreaksAfter", + "w:noLineBreaksBefore", + "w:savePreviewPicture", + "w:doNotValidateAgainstSchema", + "w:saveInvalidXml", + "w:ignoreMixedContent", + "w:alwaysShowPlaceholderText", + "w:doNotDemarcateInvalidXml", + "w:saveXmlDataOnly", + "w:useXSLTWhenSaving", + "w:saveThroughXslt", + "w:showXMLTags", + "w:alwaysMergeEmptyNamespace", + "w:updateFields", + "w:hdrShapeDefaults", + "w:footnotePr", + "w:endnotePr", + "w:compat", + "w:docVars", + "w:rsids", + "m:mathPr", + "w:attachedSchema", + "w:themeFontLang", + "w:clrSchemeMapping", + "w:doNotIncludeSubdocsInStats", + "w:doNotAutoCompressPictures", + "w:forceUpgrade", + "w:captions", + "w:readModeInkLockDown", + "w:smartTagType", + "sl:schemaLibrary", + "w:shapeDefaults", + "w:doNotEmbedSmartTags", + "w:decimalSymbol", + "w:listSeparator", + ) + evenAndOddHeaders: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:evenAndOddHeaders", successors=_tag_seq[48:] + ) + del _tag_seq + + @property + def evenAndOddHeaders_val(self) -> bool: + """Value of `w:evenAndOddHeaders/@w:val` or |None| if not present.""" + evenAndOddHeaders = self.evenAndOddHeaders + if evenAndOddHeaders is None: + return False + return evenAndOddHeaders.val + + @evenAndOddHeaders_val.setter + def evenAndOddHeaders_val(self, value: bool | None): + if value is None or value is False: + self._remove_evenAndOddHeaders() + return + + self.get_or_add_evenAndOddHeaders().val = value diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/shape.py b/.venv/lib/python3.12/site-packages/docx/oxml/shape.py new file mode 100644 index 00000000..289d3557 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/shape.py @@ -0,0 +1,302 @@ +"""Custom element classes for shape-related elements like `<w:inline>`.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +from docx.oxml.ns import nsdecls +from docx.oxml.parser import parse_xml +from docx.oxml.simpletypes import ( + ST_Coordinate, + ST_DrawingElementId, + ST_PositiveCoordinate, + ST_RelationshipId, + XsdString, + XsdToken, +) +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OneAndOnlyOne, + OptionalAttribute, + RequiredAttribute, + ZeroOrOne, +) + +if TYPE_CHECKING: + from docx.shared import Length + + +class CT_Anchor(BaseOxmlElement): + """`<wp:anchor>` element, container for a "floating" shape.""" + + +class CT_Blip(BaseOxmlElement): + """``<a:blip>`` element, specifies image source and adjustments such as alpha and + tint.""" + + embed: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "r:embed", ST_RelationshipId + ) + link: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "r:link", ST_RelationshipId + ) + + +class CT_BlipFillProperties(BaseOxmlElement): + """``<pic:blipFill>`` element, specifies picture properties.""" + + blip: CT_Blip = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "a:blip", successors=("a:srcRect", "a:tile", "a:stretch") + ) + + +class CT_GraphicalObject(BaseOxmlElement): + """``<a:graphic>`` element, container for a DrawingML object.""" + + graphicData: CT_GraphicalObjectData = OneAndOnlyOne( # pyright: ignore[reportAssignmentType] + "a:graphicData" + ) + + +class CT_GraphicalObjectData(BaseOxmlElement): + """``<a:graphicData>`` element, container for the XML of a DrawingML object.""" + + pic: CT_Picture = ZeroOrOne("pic:pic") # pyright: ignore[reportAssignmentType] + uri: str = RequiredAttribute("uri", XsdToken) # pyright: ignore[reportAssignmentType] + + +class CT_Inline(BaseOxmlElement): + """`<wp:inline>` element, container for an inline shape.""" + + extent: CT_PositiveSize2D = OneAndOnlyOne("wp:extent") # pyright: ignore[reportAssignmentType] + docPr: CT_NonVisualDrawingProps = OneAndOnlyOne( # pyright: ignore[reportAssignmentType] + "wp:docPr" + ) + graphic: CT_GraphicalObject = OneAndOnlyOne( # pyright: ignore[reportAssignmentType] + "a:graphic" + ) + + @classmethod + def new(cls, cx: Length, cy: Length, shape_id: int, pic: CT_Picture) -> CT_Inline: + """Return a new ``<wp:inline>`` element populated with the values passed as + parameters.""" + inline = cast(CT_Inline, parse_xml(cls._inline_xml())) + inline.extent.cx = cx + inline.extent.cy = cy + inline.docPr.id = shape_id + inline.docPr.name = "Picture %d" % shape_id + inline.graphic.graphicData.uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" + inline.graphic.graphicData._insert_pic(pic) + return inline + + @classmethod + def new_pic_inline( + cls, shape_id: int, rId: str, filename: str, cx: Length, cy: Length + ) -> CT_Inline: + """Create `wp:inline` element containing a `pic:pic` element. + + The contents of the `pic:pic` element is taken from the argument values. + """ + pic_id = 0 # Word doesn't seem to use this, but does not omit it + pic = CT_Picture.new(pic_id, filename, rId, cx, cy) + inline = cls.new(cx, cy, shape_id, pic) + inline.graphic.graphicData._insert_pic(pic) + return inline + + @classmethod + def _inline_xml(cls): + return ( + "<wp:inline %s>\n" + ' <wp:extent cx="914400" cy="914400"/>\n' + ' <wp:docPr id="666" name="unnamed"/>\n' + " <wp:cNvGraphicFramePr>\n" + ' <a:graphicFrameLocks noChangeAspect="1"/>\n' + " </wp:cNvGraphicFramePr>\n" + " <a:graphic>\n" + ' <a:graphicData uri="URI not set"/>\n' + " </a:graphic>\n" + "</wp:inline>" % nsdecls("wp", "a", "pic", "r") + ) + + +class CT_NonVisualDrawingProps(BaseOxmlElement): + """Used for ``<wp:docPr>`` element, and perhaps others. + + Specifies the id and name of a DrawingML drawing. + """ + + id = RequiredAttribute("id", ST_DrawingElementId) + name = RequiredAttribute("name", XsdString) + + +class CT_NonVisualPictureProperties(BaseOxmlElement): + """``<pic:cNvPicPr>`` element, specifies picture locking and resize behaviors.""" + + +class CT_Picture(BaseOxmlElement): + """``<pic:pic>`` element, a DrawingML picture.""" + + nvPicPr: CT_PictureNonVisual = OneAndOnlyOne( # pyright: ignore[reportAssignmentType] + "pic:nvPicPr" + ) + blipFill: CT_BlipFillProperties = OneAndOnlyOne( # pyright: ignore[reportAssignmentType] + "pic:blipFill" + ) + spPr: CT_ShapeProperties = OneAndOnlyOne("pic:spPr") # pyright: ignore[reportAssignmentType] + + @classmethod + def new(cls, pic_id, filename, rId, cx, cy): + """Return a new ``<pic:pic>`` element populated with the minimal contents + required to define a viable picture element, based on the values passed as + parameters.""" + pic = parse_xml(cls._pic_xml()) + pic.nvPicPr.cNvPr.id = pic_id + pic.nvPicPr.cNvPr.name = filename + pic.blipFill.blip.embed = rId + pic.spPr.cx = cx + pic.spPr.cy = cy + return pic + + @classmethod + def _pic_xml(cls): + return ( + "<pic:pic %s>\n" + " <pic:nvPicPr>\n" + ' <pic:cNvPr id="666" name="unnamed"/>\n' + " <pic:cNvPicPr/>\n" + " </pic:nvPicPr>\n" + " <pic:blipFill>\n" + " <a:blip/>\n" + " <a:stretch>\n" + " <a:fillRect/>\n" + " </a:stretch>\n" + " </pic:blipFill>\n" + " <pic:spPr>\n" + " <a:xfrm>\n" + ' <a:off x="0" y="0"/>\n' + ' <a:ext cx="914400" cy="914400"/>\n' + " </a:xfrm>\n" + ' <a:prstGeom prst="rect"/>\n' + " </pic:spPr>\n" + "</pic:pic>" % nsdecls("pic", "a", "r") + ) + + +class CT_PictureNonVisual(BaseOxmlElement): + """``<pic:nvPicPr>`` element, non-visual picture properties.""" + + cNvPr = OneAndOnlyOne("pic:cNvPr") + + +class CT_Point2D(BaseOxmlElement): + """Used for ``<a:off>`` element, and perhaps others. + + Specifies an x, y coordinate (point). + """ + + x = RequiredAttribute("x", ST_Coordinate) + y = RequiredAttribute("y", ST_Coordinate) + + +class CT_PositiveSize2D(BaseOxmlElement): + """Used for ``<wp:extent>`` element, and perhaps others later. + + Specifies the size of a DrawingML drawing. + """ + + cx: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "cx", ST_PositiveCoordinate + ) + cy: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "cy", ST_PositiveCoordinate + ) + + +class CT_PresetGeometry2D(BaseOxmlElement): + """``<a:prstGeom>`` element, specifies an preset autoshape geometry, such as + ``rect``.""" + + +class CT_RelativeRect(BaseOxmlElement): + """``<a:fillRect>`` element, specifying picture should fill containing rectangle + shape.""" + + +class CT_ShapeProperties(BaseOxmlElement): + """``<pic:spPr>`` element, specifies size and shape of picture container.""" + + xfrm = ZeroOrOne( + "a:xfrm", + successors=( + "a:custGeom", + "a:prstGeom", + "a:ln", + "a:effectLst", + "a:effectDag", + "a:scene3d", + "a:sp3d", + "a:extLst", + ), + ) + + @property + def cx(self): + """Shape width as an instance of Emu, or None if not present.""" + xfrm = self.xfrm + if xfrm is None: + return None + return xfrm.cx + + @cx.setter + def cx(self, value): + xfrm = self.get_or_add_xfrm() + xfrm.cx = value + + @property + def cy(self): + """Shape height as an instance of Emu, or None if not present.""" + xfrm = self.xfrm + if xfrm is None: + return None + return xfrm.cy + + @cy.setter + def cy(self, value): + xfrm = self.get_or_add_xfrm() + xfrm.cy = value + + +class CT_StretchInfoProperties(BaseOxmlElement): + """``<a:stretch>`` element, specifies how picture should fill its containing + shape.""" + + +class CT_Transform2D(BaseOxmlElement): + """``<a:xfrm>`` element, specifies size and shape of picture container.""" + + off = ZeroOrOne("a:off", successors=("a:ext",)) + ext = ZeroOrOne("a:ext", successors=()) + + @property + def cx(self): + ext = self.ext + if ext is None: + return None + return ext.cx + + @cx.setter + def cx(self, value): + ext = self.get_or_add_ext() + ext.cx = value + + @property + def cy(self): + ext = self.ext + if ext is None: + return None + return ext.cy + + @cy.setter + def cy(self, value): + ext = self.get_or_add_ext() + ext.cy = value diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/shared.py b/.venv/lib/python3.12/site-packages/docx/oxml/shared.py new file mode 100644 index 00000000..8c2ebc9a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/shared.py @@ -0,0 +1,53 @@ +"""Objects shared by modules in the docx.oxml subpackage.""" + +from __future__ import annotations + +from typing import cast + +from docx.oxml.ns import qn +from docx.oxml.parser import OxmlElement +from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String +from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute + + +class CT_DecimalNumber(BaseOxmlElement): + """Used for ``<w:numId>``, ``<w:ilvl>``, ``<w:abstractNumId>`` and several others, + containing a text representation of a decimal number (e.g. 42) in its ``val`` + attribute.""" + + val: int = RequiredAttribute("w:val", ST_DecimalNumber) # pyright: ignore[reportAssignmentType] + + @classmethod + def new(cls, nsptagname: str, val: int): + """Return a new ``CT_DecimalNumber`` element having tagname `nsptagname` and + ``val`` attribute set to `val`.""" + return OxmlElement(nsptagname, attrs={qn("w:val"): str(val)}) + + +class CT_OnOff(BaseOxmlElement): + """Used for `w:b`, `w:i` elements and others. + + Contains a bool-ish string in its `val` attribute, xsd:boolean plus "on" and + "off". Defaults to `True`, so `<w:b>` for example means "bold is turned on". + """ + + val: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:val", ST_OnOff, default=True + ) + + +class CT_String(BaseOxmlElement): + """Used for `w:pStyle` and `w:tblStyle` elements and others. + + In those cases, it containing a style name in its `val` attribute. + """ + + val: str = RequiredAttribute("w:val", ST_String) # pyright: ignore[reportAssignmentType] + + @classmethod + def new(cls, nsptagname: str, val: str): + """Return a new ``CT_String`` element with tagname `nsptagname` and ``val`` + attribute set to `val`.""" + elm = cast(CT_String, OxmlElement(nsptagname)) + elm.val = val + return elm diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/simpletypes.py b/.venv/lib/python3.12/site-packages/docx/oxml/simpletypes.py new file mode 100644 index 00000000..dd10ab91 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/simpletypes.py @@ -0,0 +1,381 @@ +# pyright: reportImportCycles=false + +"""Simple-type classes, corresponding to ST_* schema items. + +These provide validation and format translation for values stored in XML element +attributes. Naming generally corresponds to the simple type in the associated XML +schema. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Tuple + +from docx.exceptions import InvalidXmlError +from docx.shared import Emu, Pt, RGBColor, Twips + +if TYPE_CHECKING: + from docx.shared import Length + + +class BaseSimpleType: + """Base class for simple-types.""" + + @classmethod + def from_xml(cls, xml_value: str) -> Any: + return cls.convert_from_xml(xml_value) + + @classmethod + def to_xml(cls, value: Any) -> str: + cls.validate(value) + str_value = cls.convert_to_xml(value) + return str_value + + @classmethod + def convert_from_xml(cls, str_value: str) -> Any: + return int(str_value) + + @classmethod + def convert_to_xml(cls, value: Any) -> str: ... + + @classmethod + def validate(cls, value: Any) -> None: ... + + @classmethod + def validate_int(cls, value: object): + if not isinstance(value, int): + raise TypeError("value must be <type 'int'>, got %s" % type(value)) + + @classmethod + def validate_int_in_range(cls, value: int, min_inclusive: int, max_inclusive: int) -> None: + cls.validate_int(value) + if value < min_inclusive or value > max_inclusive: + raise ValueError( + "value must be in range %d to %d inclusive, got %d" + % (min_inclusive, max_inclusive, value) + ) + + @classmethod + def validate_string(cls, value: Any) -> str: + if not isinstance(value, str): + raise TypeError("value must be a string, got %s" % type(value)) + return value + + +class BaseIntType(BaseSimpleType): + @classmethod + def convert_from_xml(cls, str_value: str) -> int: + return int(str_value) + + @classmethod + def convert_to_xml(cls, value: int) -> str: + return str(value) + + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int(value) + + +class BaseStringType(BaseSimpleType): + @classmethod + def convert_from_xml(cls, str_value: str) -> str: + return str_value + + @classmethod + def convert_to_xml(cls, value: str) -> str: + return value + + @classmethod + def validate(cls, value: str): + cls.validate_string(value) + + +class BaseStringEnumerationType(BaseStringType): + _members: Tuple[str, ...] + + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_string(value) + if value not in cls._members: + raise ValueError("must be one of %s, got '%s'" % (cls._members, value)) + + +class XsdAnyUri(BaseStringType): + """There's a regex in the spec this is supposed to meet... + + but current assessment is that spending cycles on validating wouldn't be worth it + for the number of programming errors it would catch. + """ + + +class XsdBoolean(BaseSimpleType): + @classmethod + def convert_from_xml(cls, str_value: str) -> bool: + if str_value not in ("1", "0", "true", "false"): + raise InvalidXmlError( + "value must be one of '1', '0', 'true' or 'false', got '%s'" % str_value + ) + return str_value in ("1", "true") + + @classmethod + def convert_to_xml(cls, value: bool) -> str: + return {True: "1", False: "0"}[value] + + @classmethod + def validate(cls, value: Any) -> None: + if value not in (True, False): + raise TypeError( + "only True or False (and possibly None) may be assigned, got" " '%s'" % value + ) + + +class XsdId(BaseStringType): + """String that must begin with a letter or underscore and cannot contain any colons. + + Not fully validated because not used in external API. + """ + + pass + + +class XsdInt(BaseIntType): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, -2147483648, 2147483647) + + +class XsdLong(BaseIntType): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, -9223372036854775808, 9223372036854775807) + + +class XsdString(BaseStringType): + pass + + +class XsdStringEnumeration(BaseStringEnumerationType): + """Set of enumerated xsd:string values.""" + + +class XsdToken(BaseStringType): + """Xsd:string with whitespace collapsing, e.g. multiple spaces reduced to one, + leading and trailing space stripped.""" + + pass + + +class XsdUnsignedInt(BaseIntType): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, 0, 4294967295) + + +class XsdUnsignedLong(BaseIntType): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, 0, 18446744073709551615) + + +class ST_BrClear(XsdString): + @classmethod + def validate(cls, value: str) -> None: + cls.validate_string(value) + valid_values = ("none", "left", "right", "all") + if value not in valid_values: + raise ValueError("must be one of %s, got '%s'" % (valid_values, value)) + + +class ST_BrType(XsdString): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_string(value) + valid_values = ("page", "column", "textWrapping") + if value not in valid_values: + raise ValueError("must be one of %s, got '%s'" % (valid_values, value)) + + +class ST_Coordinate(BaseIntType): + @classmethod + def convert_from_xml(cls, str_value: str) -> Length: + if "i" in str_value or "m" in str_value or "p" in str_value: + return ST_UniversalMeasure.convert_from_xml(str_value) + return Emu(int(str_value)) + + @classmethod + def validate(cls, value: Any) -> None: + ST_CoordinateUnqualified.validate(value) + + +class ST_CoordinateUnqualified(XsdLong): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, -27273042329600, 27273042316900) + + +class ST_DecimalNumber(XsdInt): + pass + + +class ST_DrawingElementId(XsdUnsignedInt): + pass + + +class ST_HexColor(BaseStringType): + @classmethod + def convert_from_xml( # pyright: ignore[reportIncompatibleMethodOverride] + cls, str_value: str + ) -> RGBColor | str: + if str_value == "auto": + return ST_HexColorAuto.AUTO + return RGBColor.from_string(str_value) + + @classmethod + def convert_to_xml( # pyright: ignore[reportIncompatibleMethodOverride] + cls, value: RGBColor + ) -> str: + """Keep alpha hex numerals all uppercase just for consistency.""" + # expecting 3-tuple of ints in range 0-255 + return "%02X%02X%02X" % value + + @classmethod + def validate(cls, value: Any) -> None: + # must be an RGBColor object --- + if not isinstance(value, RGBColor): + raise ValueError( + "rgb color value must be RGBColor object, got %s %s" % (type(value), value) + ) + + +class ST_HexColorAuto(XsdStringEnumeration): + """Value for `w:color/[@val="auto"] attribute setting.""" + + AUTO = "auto" + + _members = (AUTO,) + + +class ST_HpsMeasure(XsdUnsignedLong): + """Half-point measure, e.g. 24.0 represents 12.0 points.""" + + @classmethod + def convert_from_xml(cls, str_value: str) -> Length: + if "m" in str_value or "n" in str_value or "p" in str_value: + return ST_UniversalMeasure.convert_from_xml(str_value) + return Pt(int(str_value) / 2.0) + + @classmethod + def convert_to_xml(cls, value: int | Length) -> str: + emu = Emu(value) + half_points = int(emu.pt * 2) + return str(half_points) + + +class ST_Merge(XsdStringEnumeration): + """Valid values for <w:xMerge val=""> attribute.""" + + CONTINUE = "continue" + RESTART = "restart" + + _members = (CONTINUE, RESTART) + + +class ST_OnOff(XsdBoolean): + @classmethod + def convert_from_xml(cls, str_value: str) -> bool: + if str_value not in ("1", "0", "true", "false", "on", "off"): + raise InvalidXmlError( + "value must be one of '1', '0', 'true', 'false', 'on', or 'o" + "ff', got '%s'" % str_value + ) + return str_value in ("1", "true", "on") + + +class ST_PositiveCoordinate(XsdLong): + @classmethod + def convert_from_xml(cls, str_value: str) -> Length: + return Emu(int(str_value)) + + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_int_in_range(value, 0, 27273042316900) + + +class ST_RelationshipId(XsdString): + pass + + +class ST_SignedTwipsMeasure(XsdInt): + @classmethod + def convert_from_xml(cls, str_value: str) -> Length: + if "i" in str_value or "m" in str_value or "p" in str_value: + return ST_UniversalMeasure.convert_from_xml(str_value) + return Twips(int(round(float(str_value)))) + + @classmethod + def convert_to_xml(cls, value: int | Length) -> str: + emu = Emu(value) + twips = emu.twips + return str(twips) + + +class ST_String(XsdString): + pass + + +class ST_TblLayoutType(XsdString): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_string(value) + valid_values = ("fixed", "autofit") + if value not in valid_values: + raise ValueError("must be one of %s, got '%s'" % (valid_values, value)) + + +class ST_TblWidth(XsdString): + @classmethod + def validate(cls, value: Any) -> None: + cls.validate_string(value) + valid_values = ("auto", "dxa", "nil", "pct") + if value not in valid_values: + raise ValueError("must be one of %s, got '%s'" % (valid_values, value)) + + +class ST_TwipsMeasure(XsdUnsignedLong): + @classmethod + def convert_from_xml(cls, str_value: str) -> Length: + if "i" in str_value or "m" in str_value or "p" in str_value: + return ST_UniversalMeasure.convert_from_xml(str_value) + return Twips(int(str_value)) + + @classmethod + def convert_to_xml(cls, value: int | Length) -> str: + emu = Emu(value) + twips = emu.twips + return str(twips) + + +class ST_UniversalMeasure(BaseSimpleType): + @classmethod + def convert_from_xml(cls, str_value: str) -> Emu: + float_part, units_part = str_value[:-2], str_value[-2:] + quantity = float(float_part) + multiplier = { + "mm": 36000, + "cm": 360000, + "in": 914400, + "pt": 12700, + "pc": 152400, + "pi": 152400, + }[units_part] + return Emu(int(round(quantity * multiplier))) + + +class ST_VerticalAlignRun(XsdStringEnumeration): + """Valid values for `w:vertAlign/@val`.""" + + BASELINE = "baseline" + SUPERSCRIPT = "superscript" + SUBSCRIPT = "subscript" + + _members = (BASELINE, SUPERSCRIPT, SUBSCRIPT) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/styles.py b/.venv/lib/python3.12/site-packages/docx/oxml/styles.py new file mode 100644 index 00000000..fb0e5d0d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/styles.py @@ -0,0 +1,320 @@ +"""Custom element classes related to the styles part.""" + +from __future__ import annotations + +from docx.enum.style import WD_STYLE_TYPE +from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OptionalAttribute, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) + + +def styleId_from_name(name): + """Return the style id corresponding to `name`, taking into account special-case + names such as 'Heading 1'.""" + return { + "caption": "Caption", + "heading 1": "Heading1", + "heading 2": "Heading2", + "heading 3": "Heading3", + "heading 4": "Heading4", + "heading 5": "Heading5", + "heading 6": "Heading6", + "heading 7": "Heading7", + "heading 8": "Heading8", + "heading 9": "Heading9", + }.get(name, name.replace(" ", "")) + + +class CT_LatentStyles(BaseOxmlElement): + """`w:latentStyles` element, defining behavior defaults for latent styles and + containing `w:lsdException` child elements that each override those defaults for a + named latent style.""" + + lsdException = ZeroOrMore("w:lsdException", successors=()) + + count = OptionalAttribute("w:count", ST_DecimalNumber) + defLockedState = OptionalAttribute("w:defLockedState", ST_OnOff) + defQFormat = OptionalAttribute("w:defQFormat", ST_OnOff) + defSemiHidden = OptionalAttribute("w:defSemiHidden", ST_OnOff) + defUIPriority = OptionalAttribute("w:defUIPriority", ST_DecimalNumber) + defUnhideWhenUsed = OptionalAttribute("w:defUnhideWhenUsed", ST_OnOff) + + def bool_prop(self, attr_name): + """Return the boolean value of the attribute having `attr_name`, or |False| if + not present.""" + value = getattr(self, attr_name) + if value is None: + return False + return value + + def get_by_name(self, name): + """Return the `w:lsdException` child having `name`, or |None| if not found.""" + found = self.xpath('w:lsdException[@w:name="%s"]' % name) + if not found: + return None + return found[0] + + def set_bool_prop(self, attr_name, value): + """Set the on/off attribute having `attr_name` to `value`.""" + setattr(self, attr_name, bool(value)) + + +class CT_LsdException(BaseOxmlElement): + """``<w:lsdException>`` element, defining override visibility behaviors for a named + latent style.""" + + locked = OptionalAttribute("w:locked", ST_OnOff) + name = RequiredAttribute("w:name", ST_String) + qFormat = OptionalAttribute("w:qFormat", ST_OnOff) + semiHidden = OptionalAttribute("w:semiHidden", ST_OnOff) + uiPriority = OptionalAttribute("w:uiPriority", ST_DecimalNumber) + unhideWhenUsed = OptionalAttribute("w:unhideWhenUsed", ST_OnOff) + + def delete(self): + """Remove this `w:lsdException` element from the XML document.""" + self.getparent().remove(self) + + def on_off_prop(self, attr_name): + """Return the boolean value of the attribute having `attr_name`, or |None| if + not present.""" + return getattr(self, attr_name) + + def set_on_off_prop(self, attr_name, value): + """Set the on/off attribute having `attr_name` to `value`.""" + setattr(self, attr_name, value) + + +class CT_Style(BaseOxmlElement): + """A ``<w:style>`` element, representing a style definition.""" + + _tag_seq = ( + "w:name", + "w:aliases", + "w:basedOn", + "w:next", + "w:link", + "w:autoRedefine", + "w:hidden", + "w:uiPriority", + "w:semiHidden", + "w:unhideWhenUsed", + "w:qFormat", + "w:locked", + "w:personal", + "w:personalCompose", + "w:personalReply", + "w:rsid", + "w:pPr", + "w:rPr", + "w:tblPr", + "w:trPr", + "w:tcPr", + "w:tblStylePr", + ) + name = ZeroOrOne("w:name", successors=_tag_seq[1:]) + basedOn = ZeroOrOne("w:basedOn", successors=_tag_seq[3:]) + next = ZeroOrOne("w:next", successors=_tag_seq[4:]) + uiPriority = ZeroOrOne("w:uiPriority", successors=_tag_seq[8:]) + semiHidden = ZeroOrOne("w:semiHidden", successors=_tag_seq[9:]) + unhideWhenUsed = ZeroOrOne("w:unhideWhenUsed", successors=_tag_seq[10:]) + qFormat = ZeroOrOne("w:qFormat", successors=_tag_seq[11:]) + locked = ZeroOrOne("w:locked", successors=_tag_seq[12:]) + pPr = ZeroOrOne("w:pPr", successors=_tag_seq[17:]) + rPr = ZeroOrOne("w:rPr", successors=_tag_seq[18:]) + del _tag_seq + + type: WD_STYLE_TYPE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:type", WD_STYLE_TYPE + ) + styleId: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:styleId", ST_String + ) + default = OptionalAttribute("w:default", ST_OnOff) + customStyle = OptionalAttribute("w:customStyle", ST_OnOff) + + @property + def basedOn_val(self): + """Value of `w:basedOn/@w:val` or |None| if not present.""" + basedOn = self.basedOn + if basedOn is None: + return None + return basedOn.val + + @basedOn_val.setter + def basedOn_val(self, value): + if value is None: + self._remove_basedOn() + else: + self.get_or_add_basedOn().val = value + + @property + def base_style(self): + """Sibling CT_Style element this style is based on or |None| if no base style or + base style not found.""" + basedOn = self.basedOn + if basedOn is None: + return None + styles = self.getparent() + base_style = styles.get_by_id(basedOn.val) + if base_style is None: + return None + return base_style + + def delete(self): + """Remove this `w:style` element from its parent `w:styles` element.""" + self.getparent().remove(self) + + @property + def locked_val(self): + """Value of `w:locked/@w:val` or |False| if not present.""" + locked = self.locked + if locked is None: + return False + return locked.val + + @locked_val.setter + def locked_val(self, value): + self._remove_locked() + if bool(value) is True: + locked = self._add_locked() + locked.val = value + + @property + def name_val(self): + """Value of ``<w:name>`` child or |None| if not present.""" + name = self.name + if name is None: + return None + return name.val + + @name_val.setter + def name_val(self, value): + self._remove_name() + if value is not None: + name = self._add_name() + name.val = value + + @property + def next_style(self): + """Sibling CT_Style element identified by the value of `w:name/@w:val` or |None| + if no value is present or no style with that style id is found.""" + next = self.next + if next is None: + return None + styles = self.getparent() + return styles.get_by_id(next.val) # None if not found + + @property + def qFormat_val(self): + """Value of `w:qFormat/@w:val` or |False| if not present.""" + qFormat = self.qFormat + if qFormat is None: + return False + return qFormat.val + + @qFormat_val.setter + def qFormat_val(self, value): + self._remove_qFormat() + if bool(value): + self._add_qFormat() + + @property + def semiHidden_val(self): + """Value of ``<w:semiHidden>`` child or |False| if not present.""" + semiHidden = self.semiHidden + if semiHidden is None: + return False + return semiHidden.val + + @semiHidden_val.setter + def semiHidden_val(self, value): + self._remove_semiHidden() + if bool(value) is True: + semiHidden = self._add_semiHidden() + semiHidden.val = value + + @property + def uiPriority_val(self): + """Value of ``<w:uiPriority>`` child or |None| if not present.""" + uiPriority = self.uiPriority + if uiPriority is None: + return None + return uiPriority.val + + @uiPriority_val.setter + def uiPriority_val(self, value): + self._remove_uiPriority() + if value is not None: + uiPriority = self._add_uiPriority() + uiPriority.val = value + + @property + def unhideWhenUsed_val(self): + """Value of `w:unhideWhenUsed/@w:val` or |False| if not present.""" + unhideWhenUsed = self.unhideWhenUsed + if unhideWhenUsed is None: + return False + return unhideWhenUsed.val + + @unhideWhenUsed_val.setter + def unhideWhenUsed_val(self, value): + self._remove_unhideWhenUsed() + if bool(value) is True: + unhideWhenUsed = self._add_unhideWhenUsed() + unhideWhenUsed.val = value + + +class CT_Styles(BaseOxmlElement): + """``<w:styles>`` element, the root element of a styles part, i.e. styles.xml.""" + + _tag_seq = ("w:docDefaults", "w:latentStyles", "w:style") + latentStyles = ZeroOrOne("w:latentStyles", successors=_tag_seq[2:]) + style = ZeroOrMore("w:style", successors=()) + del _tag_seq + + def add_style_of_type(self, name, style_type, builtin): + """Return a newly added `w:style` element having `name` and `style_type`. + + `w:style/@customStyle` is set based on the value of `builtin`. + """ + style = self.add_style() + style.type = style_type + style.customStyle = None if builtin else True + style.styleId = styleId_from_name(name) + style.name_val = name + return style + + def default_for(self, style_type): + """Return `w:style[@w:type="*{style_type}*][-1]` or |None| if not found.""" + default_styles_for_type = [ + s for s in self._iter_styles() if s.type == style_type and s.default + ] + if not default_styles_for_type: + return None + # spec calls for last default in document order + return default_styles_for_type[-1] + + def get_by_id(self, styleId: str) -> CT_Style | None: + """`w:style` child where @styleId = `styleId`. + + |None| if not found. + """ + xpath = f'w:style[@w:styleId="{styleId}"]' + return next(iter(self.xpath(xpath)), None) + + def get_by_name(self, name: str) -> CT_Style | None: + """`w:style` child with `w:name` grandchild having value `name`. + + |None| if not found. + """ + xpath = 'w:style[w:name/@w:val="%s"]' % name + return next(iter(self.xpath(xpath)), None) + + def _iter_styles(self): + """Generate each of the `w:style` child elements in document order.""" + return (style for style in self.xpath("w:style")) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/table.py b/.venv/lib/python3.12/site-packages/docx/oxml/table.py new file mode 100644 index 00000000..e38d5856 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/table.py @@ -0,0 +1,977 @@ +"""Custom element classes for tables.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, cast + +from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT, WD_ROW_HEIGHT_RULE, WD_TABLE_DIRECTION +from docx.exceptions import InvalidSpanError +from docx.oxml.ns import nsdecls, qn +from docx.oxml.parser import parse_xml +from docx.oxml.shared import CT_DecimalNumber +from docx.oxml.simpletypes import ( + ST_Merge, + ST_TblLayoutType, + ST_TblWidth, + ST_TwipsMeasure, + XsdInt, +) +from docx.oxml.text.paragraph import CT_P +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OneAndOnlyOne, + OneOrMore, + OptionalAttribute, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, +) +from docx.shared import Emu, Length, Twips + +if TYPE_CHECKING: + from docx.enum.table import WD_TABLE_ALIGNMENT + from docx.enum.text import WD_ALIGN_PARAGRAPH + from docx.oxml.shared import CT_OnOff, CT_String + from docx.oxml.text.parfmt import CT_Jc + + +class CT_Height(BaseOxmlElement): + """Used for `w:trHeight` to specify a row height and row height rule.""" + + val: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:val", ST_TwipsMeasure + ) + hRule: WD_ROW_HEIGHT_RULE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:hRule", WD_ROW_HEIGHT_RULE + ) + + +class CT_Row(BaseOxmlElement): + """``<w:tr>`` element.""" + + add_tc: Callable[[], CT_Tc] + get_or_add_trPr: Callable[[], CT_TrPr] + _add_trPr: Callable[[], CT_TrPr] + + tc_lst: list[CT_Tc] + # -- custom inserter below -- + tblPrEx: CT_TblPrEx | None = ZeroOrOne("w:tblPrEx") # pyright: ignore[reportAssignmentType] + # -- custom inserter below -- + trPr: CT_TrPr | None = ZeroOrOne("w:trPr") # pyright: ignore[reportAssignmentType] + tc = ZeroOrMore("w:tc") + + @property + def grid_after(self) -> int: + """The number of unpopulated layout-grid cells at the end of this row.""" + trPr = self.trPr + if trPr is None: + return 0 + return trPr.grid_after + + @property + def grid_before(self) -> int: + """The number of unpopulated layout-grid cells at the start of this row.""" + trPr = self.trPr + if trPr is None: + return 0 + return trPr.grid_before + + def tc_at_grid_offset(self, grid_offset: int) -> CT_Tc: + """The `tc` element in this tr at exact `grid offset`. + + Raises ValueError when this `w:tr` contains no `w:tc` with exact starting `grid_offset`. + """ + # -- account for omitted cells at the start of the row -- + remaining_offset = grid_offset - self.grid_before + + for tc in self.tc_lst: + # -- We've gone past grid_offset without finding a tc, no sense searching further. -- + if remaining_offset < 0: + break + # -- We've arrived at grid_offset, this is the `w:tc` we're looking for. -- + if remaining_offset == 0: + return tc + # -- We're not there yet, skip forward the number of layout-grid cells this cell + # -- occupies. + remaining_offset -= tc.grid_span + + raise ValueError(f"no `tc` element at grid_offset={grid_offset}") + + @property + def tr_idx(self) -> int: + """Index of this `w:tr` element within its parent `w:tbl` element.""" + tbl = cast(CT_Tbl, self.getparent()) + return tbl.tr_lst.index(self) + + @property + def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None: + """The value of `./w:trPr/w:trHeight/@w:hRule`, or |None| if not present.""" + trPr = self.trPr + if trPr is None: + return None + return trPr.trHeight_hRule + + @trHeight_hRule.setter + def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None): + trPr = self.get_or_add_trPr() + trPr.trHeight_hRule = value + + @property + def trHeight_val(self): + """Return the value of `w:trPr/w:trHeight@w:val`, or |None| if not present.""" + trPr = self.trPr + if trPr is None: + return None + return trPr.trHeight_val + + @trHeight_val.setter + def trHeight_val(self, value: Length | None): + trPr = self.get_or_add_trPr() + trPr.trHeight_val = value + + def _insert_tblPrEx(self, tblPrEx: CT_TblPrEx): + self.insert(0, tblPrEx) + + def _insert_trPr(self, trPr: CT_TrPr): + tblPrEx = self.tblPrEx + if tblPrEx is not None: + tblPrEx.addnext(trPr) + else: + self.insert(0, trPr) + + def _new_tc(self): + return CT_Tc.new() + + +class CT_Tbl(BaseOxmlElement): + """``<w:tbl>`` element.""" + + add_tr: Callable[[], CT_Row] + tr_lst: list[CT_Row] + + tblPr: CT_TblPr = OneAndOnlyOne("w:tblPr") # pyright: ignore[reportAssignmentType] + tblGrid: CT_TblGrid = OneAndOnlyOne("w:tblGrid") # pyright: ignore[reportAssignmentType] + tr = ZeroOrMore("w:tr") + + @property + def bidiVisual_val(self) -> bool | None: + """Value of `./w:tblPr/w:bidiVisual/@w:val` or |None| if not present. + + Controls whether table cells are displayed right-to-left or left-to-right. + """ + bidiVisual = self.tblPr.bidiVisual + if bidiVisual is None: + return None + return bidiVisual.val + + @bidiVisual_val.setter + def bidiVisual_val(self, value: WD_TABLE_DIRECTION | None): + tblPr = self.tblPr + if value is None: + tblPr._remove_bidiVisual() # pyright: ignore[reportPrivateUsage] + else: + tblPr.get_or_add_bidiVisual().val = bool(value) + + @property + def col_count(self): + """The number of grid columns in this table.""" + return len(self.tblGrid.gridCol_lst) + + def iter_tcs(self): + """Generate each of the `w:tc` elements in this table, left to right and top to + bottom. + + Each cell in the first row is generated, followed by each cell in the second + row, etc. + """ + for tr in self.tr_lst: + for tc in tr.tc_lst: + yield tc + + @classmethod + def new_tbl(cls, rows: int, cols: int, width: Length) -> CT_Tbl: + """Return a new `w:tbl` element having `rows` rows and `cols` columns. + + `width` is distributed evenly between the columns. + """ + return cast(CT_Tbl, parse_xml(cls._tbl_xml(rows, cols, width))) + + @property + def tblStyle_val(self) -> str | None: + """`w:tblPr/w:tblStyle/@w:val` (a table style id) or |None| if not present.""" + tblStyle = self.tblPr.tblStyle + if tblStyle is None: + return None + return tblStyle.val + + @tblStyle_val.setter + def tblStyle_val(self, styleId: str | None) -> None: + """Set the value of `w:tblPr/w:tblStyle/@w:val` (a table style id) to `styleId`. + + If `styleId` is None, remove the `w:tblStyle` element. + """ + tblPr = self.tblPr + tblPr._remove_tblStyle() # pyright: ignore[reportPrivateUsage] + if styleId is None: + return + tblPr._add_tblStyle().val = styleId # pyright: ignore[reportPrivateUsage] + + @classmethod + def _tbl_xml(cls, rows: int, cols: int, width: Length) -> str: + col_width = Emu(width // cols) if cols > 0 else Emu(0) + return ( + f"<w:tbl {nsdecls('w')}>\n" + f" <w:tblPr>\n" + f' <w:tblW w:type="auto" w:w="0"/>\n' + f' <w:tblLook w:firstColumn="1" w:firstRow="1"\n' + f' w:lastColumn="0" w:lastRow="0" w:noHBand="0"\n' + f' w:noVBand="1" w:val="04A0"/>\n' + f" </w:tblPr>\n" + f"{cls._tblGrid_xml(cols, col_width)}" + f"{cls._trs_xml(rows, cols, col_width)}" + f"</w:tbl>\n" + ) + + @classmethod + def _tblGrid_xml(cls, col_count: int, col_width: Length) -> str: + xml = " <w:tblGrid>\n" + for _ in range(col_count): + xml += ' <w:gridCol w:w="%d"/>\n' % col_width.twips + xml += " </w:tblGrid>\n" + return xml + + @classmethod + def _trs_xml(cls, row_count: int, col_count: int, col_width: Length) -> str: + return f" <w:tr>\n{cls._tcs_xml(col_count, col_width)} </w:tr>\n" * row_count + + @classmethod + def _tcs_xml(cls, col_count: int, col_width: Length) -> str: + return ( + f" <w:tc>\n" + f" <w:tcPr>\n" + f' <w:tcW w:type="dxa" w:w="{col_width.twips}"/>\n' + f" </w:tcPr>\n" + f" <w:p/>\n" + f" </w:tc>\n" + ) * col_count + + +class CT_TblGrid(BaseOxmlElement): + """`w:tblGrid` element. + + Child of `w:tbl`, holds `w:gridCol> elements that define column count, width, etc. + """ + + add_gridCol: Callable[[], CT_TblGridCol] + gridCol_lst: list[CT_TblGridCol] + + gridCol = ZeroOrMore("w:gridCol", successors=("w:tblGridChange",)) + + +class CT_TblGridCol(BaseOxmlElement): + """`w:gridCol` element, child of `w:tblGrid`, defines a table column.""" + + w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:w", ST_TwipsMeasure + ) + + @property + def gridCol_idx(self) -> int: + """Index of this `w:gridCol` element within its parent `w:tblGrid` element.""" + tblGrid = cast(CT_TblGrid, self.getparent()) + return tblGrid.gridCol_lst.index(self) + + +class CT_TblLayoutType(BaseOxmlElement): + """`w:tblLayout` element. + + Specifies whether column widths are fixed or can be automatically adjusted based on + content. + """ + + type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:type", ST_TblLayoutType + ) + + +class CT_TblPr(BaseOxmlElement): + """``<w:tblPr>`` element, child of ``<w:tbl>``, holds child elements that define + table properties such as style and borders.""" + + get_or_add_bidiVisual: Callable[[], CT_OnOff] + get_or_add_jc: Callable[[], CT_Jc] + get_or_add_tblLayout: Callable[[], CT_TblLayoutType] + _add_tblStyle: Callable[[], CT_String] + _remove_bidiVisual: Callable[[], None] + _remove_jc: Callable[[], None] + _remove_tblStyle: Callable[[], None] + + _tag_seq = ( + "w:tblStyle", + "w:tblpPr", + "w:tblOverlap", + "w:bidiVisual", + "w:tblStyleRowBandSize", + "w:tblStyleColBandSize", + "w:tblW", + "w:jc", + "w:tblCellSpacing", + "w:tblInd", + "w:tblBorders", + "w:shd", + "w:tblLayout", + "w:tblCellMar", + "w:tblLook", + "w:tblCaption", + "w:tblDescription", + "w:tblPrChange", + ) + tblStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:tblStyle", successors=_tag_seq[1:] + ) + bidiVisual: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:bidiVisual", successors=_tag_seq[4:] + ) + jc: CT_Jc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:jc", successors=_tag_seq[8:] + ) + tblLayout: CT_TblLayoutType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:tblLayout", successors=_tag_seq[13:] + ) + del _tag_seq + + @property + def alignment(self) -> WD_TABLE_ALIGNMENT | None: + """Horizontal alignment of table, |None| if `./w:jc` is not present.""" + jc = self.jc + if jc is None: + return None + return cast("WD_TABLE_ALIGNMENT | None", jc.val) + + @alignment.setter + def alignment(self, value: WD_TABLE_ALIGNMENT | None): + self._remove_jc() + if value is None: + return + jc = self.get_or_add_jc() + jc.val = cast("WD_ALIGN_PARAGRAPH", value) + + @property + def autofit(self) -> bool: + """|False| when there is a `w:tblLayout` child with `@w:type="fixed"`. + + Otherwise |True|. + """ + tblLayout = self.tblLayout + return True if tblLayout is None else tblLayout.type != "fixed" + + @autofit.setter + def autofit(self, value: bool): + tblLayout = self.get_or_add_tblLayout() + tblLayout.type = "autofit" if value else "fixed" + + @property + def style(self): + """Return the value of the ``val`` attribute of the ``<w:tblStyle>`` child or + |None| if not present.""" + tblStyle = self.tblStyle + if tblStyle is None: + return None + return tblStyle.val + + @style.setter + def style(self, value: str | None): + self._remove_tblStyle() + if value is None: + return + self._add_tblStyle().val = value + + +class CT_TblPrEx(BaseOxmlElement): + """`w:tblPrEx` element, exceptions to table-properties. + + Applied at a lower level, like a `w:tr` to modify the appearance. Possibly used when + two tables are merged. For more see: + http://officeopenxml.com/WPtablePropertyExceptions.php + """ + + +class CT_TblWidth(BaseOxmlElement): + """Used for `w:tblW` and `w:tcW` and others, specifies a table-related width.""" + + # the type for `w` attr is actually ST_MeasurementOrPercent, but using + # XsdInt for now because only dxa (twips) values are being used. It's not + # entirely clear what the semantics are for other values like -01.4mm + w: int = RequiredAttribute("w:w", XsdInt) # pyright: ignore[reportAssignmentType] + type = RequiredAttribute("w:type", ST_TblWidth) + + @property + def width(self) -> Length | None: + """EMU length indicated by the combined `w:w` and `w:type` attrs.""" + if self.type != "dxa": + return None + return Twips(self.w) + + @width.setter + def width(self, value: Length): + self.type = "dxa" + self.w = Emu(value).twips + + +class CT_Tc(BaseOxmlElement): + """`w:tc` table cell element.""" + + add_p: Callable[[], CT_P] + get_or_add_tcPr: Callable[[], CT_TcPr] + p_lst: list[CT_P] + tbl_lst: list[CT_Tbl] + _insert_tbl: Callable[[CT_Tbl], CT_Tbl] + _new_p: Callable[[], CT_P] + + # -- tcPr has many successors, `._insert_tcPr()` is overridden below -- + tcPr: CT_TcPr | None = ZeroOrOne("w:tcPr") # pyright: ignore[reportAssignmentType] + p = OneOrMore("w:p") + tbl = OneOrMore("w:tbl") + + @property + def bottom(self) -> int: + """The row index that marks the bottom extent of the vertical span of this cell. + + This is one greater than the index of the bottom-most row of the span, similar + to how a slice of the cell's rows would be specified. + """ + if self.vMerge is not None: + tc_below = self._tc_below + if tc_below is not None and tc_below.vMerge == ST_Merge.CONTINUE: + return tc_below.bottom + return self._tr_idx + 1 + + def clear_content(self): + """Remove all content elements, preserving `w:tcPr` element if present. + + Note that this leaves the `w:tc` element in an invalid state because it doesn't + contain at least one block-level element. It's up to the caller to add a + `w:p`child element as the last content element. + """ + # -- remove all cell inner-content except a `w:tcPr` when present. -- + for e in self.xpath("./*[not(self::w:tcPr)]"): + self.remove(e) + + @property + def grid_offset(self) -> int: + """Starting offset of `tc` in the layout-grid columns of its table. + + A cell in the leftmost grid-column has offset 0. + """ + grid_before = self._tr.grid_before + preceding_tc_grid_spans = sum( + tc.grid_span for tc in self.xpath("./preceding-sibling::w:tc") + ) + return grid_before + preceding_tc_grid_spans + + @property + def grid_span(self) -> int: + """The integer number of columns this cell spans. + + Determined by ./w:tcPr/w:gridSpan/@val, it defaults to 1. + """ + tcPr = self.tcPr + return 1 if tcPr is None else tcPr.grid_span + + @grid_span.setter + def grid_span(self, value: int): + tcPr = self.get_or_add_tcPr() + tcPr.grid_span = value + + @property + def inner_content_elements(self) -> list[CT_P | CT_Tbl]: + """Generate all `w:p` and `w:tbl` elements in this document-body. + + Elements appear in document order. Elements shaded by nesting in a `w:ins` or + other "wrapper" element will not be included. + """ + return self.xpath("./w:p | ./w:tbl") + + def iter_block_items(self): + """Generate a reference to each of the block-level content elements in this + cell, in the order they appear.""" + block_item_tags = (qn("w:p"), qn("w:tbl"), qn("w:sdt")) + for child in self: + if child.tag in block_item_tags: + yield child + + @property + def left(self) -> int: + """The grid column index at which this ``<w:tc>`` element appears.""" + return self.grid_offset + + def merge(self, other_tc: CT_Tc) -> CT_Tc: + """Return top-left `w:tc` element of a new span. + + Span is formed by merging the rectangular region defined by using this tc + element and `other_tc` as diagonal corners. + """ + top, left, height, width = self._span_dimensions(other_tc) + top_tc = self._tbl.tr_lst[top].tc_at_grid_offset(left) + top_tc._grow_to(width, height) + return top_tc + + @classmethod + def new(cls) -> CT_Tc: + """A new `w:tc` element, containing an empty paragraph as the required EG_BlockLevelElt.""" + return cast(CT_Tc, parse_xml("<w:tc %s>\n" " <w:p/>\n" "</w:tc>" % nsdecls("w"))) + + @property + def right(self) -> int: + """The grid column index that marks the right-side extent of the horizontal span + of this cell. + + This is one greater than the index of the right-most column of the span, similar + to how a slice of the cell's columns would be specified. + """ + return self.grid_offset + self.grid_span + + @property + def top(self) -> int: + """The top-most row index in the vertical span of this cell.""" + if self.vMerge is None or self.vMerge == ST_Merge.RESTART: + return self._tr_idx + return self._tc_above.top + + @property + def vMerge(self) -> str | None: + """Value of ./w:tcPr/w:vMerge/@val, |None| if w:vMerge is not present.""" + tcPr = self.tcPr + if tcPr is None: + return None + return tcPr.vMerge_val + + @vMerge.setter + def vMerge(self, value: str | None): + tcPr = self.get_or_add_tcPr() + tcPr.vMerge_val = value + + @property + def width(self) -> Length | None: + """EMU length represented in `./w:tcPr/w:tcW` or |None| if not present.""" + tcPr = self.tcPr + if tcPr is None: + return None + return tcPr.width + + @width.setter + def width(self, value: Length): + tcPr = self.get_or_add_tcPr() + tcPr.width = value + + def _add_width_of(self, other_tc: CT_Tc): + """Add the width of `other_tc` to this cell. + + Does nothing if either this tc or `other_tc` does not have a specified width. + """ + if self.width and other_tc.width: + self.width = Length(self.width + other_tc.width) + + def _grow_to(self, width: int, height: int, top_tc: CT_Tc | None = None): + """Grow this cell to `width` grid columns and `height` rows. + + This is accomplished by expanding horizontal spans and creating continuation + cells to form vertical spans. + """ + + def vMerge_val(top_tc: CT_Tc): + return ( + ST_Merge.CONTINUE + if top_tc is not self + else None if height == 1 else ST_Merge.RESTART + ) + + top_tc = self if top_tc is None else top_tc + self._span_to_width(width, top_tc, vMerge_val(top_tc)) + if height > 1: + tc_below = self._tc_below + assert tc_below is not None + tc_below._grow_to(width, height - 1, top_tc) + + def _insert_tcPr(self, tcPr: CT_TcPr) -> CT_TcPr: + """Override default `._insert_tcPr()`.""" + # -- `tcPr`` has a large number of successors, but always comes first if it appears, + # -- so just using insert(0, ...) rather than spelling out successors. + self.insert(0, tcPr) + return tcPr + + @property + def _is_empty(self) -> bool: + """True if this cell contains only a single empty `w:p` element.""" + block_items = list(self.iter_block_items()) + if len(block_items) > 1: + return False + # -- cell must include at least one block item but can be a `w:tbl`, `w:sdt`, + # -- `w:customXml` or a `w:p` + only_item = block_items[0] + if isinstance(only_item, CT_P) and len(only_item.r_lst) == 0: + return True + return False + + def _move_content_to(self, other_tc: CT_Tc): + """Append the content of this cell to `other_tc`. + + Leaves this cell with a single empty ``<w:p>`` element. + """ + if other_tc is self: + return + if self._is_empty: + return + other_tc._remove_trailing_empty_p() + # -- appending moves each element from self to other_tc -- + for block_element in self.iter_block_items(): + other_tc.append(block_element) + # -- add back the required minimum single empty <w:p> element -- + self.append(self._new_p()) + + def _new_tbl(self) -> None: + raise NotImplementedError( + "use CT_Tbl.new_tbl() to add a new table, specifying rows and columns" + ) + + @property + def _next_tc(self) -> CT_Tc | None: + """The `w:tc` element immediately following this one in this row, or |None| if + this is the last `w:tc` element in the row.""" + following_tcs = self.xpath("./following-sibling::w:tc") + return following_tcs[0] if following_tcs else None + + def _remove(self): + """Remove this `w:tc` element from the XML tree.""" + parent_element = self.getparent() + assert parent_element is not None + parent_element.remove(self) + + def _remove_trailing_empty_p(self): + """Remove last content element from this cell if it's an empty `w:p` element.""" + block_items = list(self.iter_block_items()) + last_content_elm = block_items[-1] + if not isinstance(last_content_elm, CT_P): + return + p = last_content_elm + if len(p.r_lst) > 0: + return + self.remove(p) + + def _span_dimensions(self, other_tc: CT_Tc) -> tuple[int, int, int, int]: + """Return a (top, left, height, width) 4-tuple specifying the extents of the + merged cell formed by using this tc and `other_tc` as opposite corner + extents.""" + + def raise_on_inverted_L(a: CT_Tc, b: CT_Tc): + if a.top == b.top and a.bottom != b.bottom: + raise InvalidSpanError("requested span not rectangular") + if a.left == b.left and a.right != b.right: + raise InvalidSpanError("requested span not rectangular") + + def raise_on_tee_shaped(a: CT_Tc, b: CT_Tc): + top_most, other = (a, b) if a.top < b.top else (b, a) + if top_most.top < other.top and top_most.bottom > other.bottom: + raise InvalidSpanError("requested span not rectangular") + + left_most, other = (a, b) if a.left < b.left else (b, a) + if left_most.left < other.left and left_most.right > other.right: + raise InvalidSpanError("requested span not rectangular") + + raise_on_inverted_L(self, other_tc) + raise_on_tee_shaped(self, other_tc) + + top = min(self.top, other_tc.top) + left = min(self.left, other_tc.left) + bottom = max(self.bottom, other_tc.bottom) + right = max(self.right, other_tc.right) + + return top, left, bottom - top, right - left + + def _span_to_width(self, grid_width: int, top_tc: CT_Tc, vMerge: str | None): + """Incorporate `w:tc` elements to the right until this cell spans `grid_width`. + + Incorporated `w:tc` elements are removed (replaced by gridSpan value). + + Raises |ValueError| if `grid_width` cannot be exactly achieved, such as when a + merged cell would drive the span width greater than `grid_width` or if not + enough grid columns are available to make this cell that wide. All content from + incorporated cells is appended to `top_tc`. The val attribute of the vMerge + element on the single remaining cell is set to `vMerge`. If `vMerge` is |None|, + the vMerge element is removed if present. + """ + self._move_content_to(top_tc) + while self.grid_span < grid_width: + self._swallow_next_tc(grid_width, top_tc) + self.vMerge = vMerge + + def _swallow_next_tc(self, grid_width: int, top_tc: CT_Tc): + """Extend the horizontal span of this `w:tc` element to incorporate the + following `w:tc` element in the row and then delete that following `w:tc` + element. + + Any content in the following `w:tc` element is appended to the content of + `top_tc`. The width of the following `w:tc` element is added to this one, if + present. Raises |InvalidSpanError| if the width of the resulting cell is greater + than `grid_width` or if there is no next `<w:tc>` element in the row. + """ + + def raise_on_invalid_swallow(next_tc: CT_Tc | None): + if next_tc is None: + raise InvalidSpanError("not enough grid columns") + if self.grid_span + next_tc.grid_span > grid_width: + raise InvalidSpanError("span is not rectangular") + + next_tc = self._next_tc + raise_on_invalid_swallow(next_tc) + assert next_tc is not None + next_tc._move_content_to(top_tc) + self._add_width_of(next_tc) + self.grid_span += next_tc.grid_span + next_tc._remove() + + @property + def _tbl(self) -> CT_Tbl: + """The tbl element this tc element appears in.""" + return cast(CT_Tbl, self.xpath("./ancestor::w:tbl[position()=1]")[0]) + + @property + def _tc_above(self) -> CT_Tc: + """The `w:tc` element immediately above this one in its grid column.""" + return self._tr_above.tc_at_grid_offset(self.grid_offset) + + @property + def _tc_below(self) -> CT_Tc | None: + """The tc element immediately below this one in its grid column.""" + tr_below = self._tr_below + if tr_below is None: + return None + return tr_below.tc_at_grid_offset(self.grid_offset) + + @property + def _tr(self) -> CT_Row: + """The tr element this tc element appears in.""" + return cast(CT_Row, self.xpath("./ancestor::w:tr[position()=1]")[0]) + + @property + def _tr_above(self) -> CT_Row: + """The tr element prior in sequence to the tr this cell appears in. + + Raises |ValueError| if called on a cell in the top-most row. + """ + tr_aboves = self.xpath("./ancestor::w:tr[position()=1]/preceding-sibling::w:tr[1]") + if not tr_aboves: + raise ValueError("no tr above topmost tr in w:tbl") + return tr_aboves[0] + + @property + def _tr_below(self) -> CT_Row | None: + """The tr element next in sequence after the tr this cell appears in, or |None| + if this cell appears in the last row.""" + tr_lst = self._tbl.tr_lst + tr_idx = tr_lst.index(self._tr) + try: + return tr_lst[tr_idx + 1] + except IndexError: + return None + + @property + def _tr_idx(self) -> int: + """The row index of the tr element this tc element appears in.""" + return self._tbl.tr_lst.index(self._tr) + + +class CT_TcPr(BaseOxmlElement): + """``<w:tcPr>`` element, defining table cell properties.""" + + get_or_add_gridSpan: Callable[[], CT_DecimalNumber] + get_or_add_tcW: Callable[[], CT_TblWidth] + get_or_add_vAlign: Callable[[], CT_VerticalJc] + _add_vMerge: Callable[[], CT_VMerge] + _remove_gridSpan: Callable[[], None] + _remove_vAlign: Callable[[], None] + _remove_vMerge: Callable[[], None] + + _tag_seq = ( + "w:cnfStyle", + "w:tcW", + "w:gridSpan", + "w:hMerge", + "w:vMerge", + "w:tcBorders", + "w:shd", + "w:noWrap", + "w:tcMar", + "w:textDirection", + "w:tcFitText", + "w:vAlign", + "w:hideMark", + "w:headers", + "w:cellIns", + "w:cellDel", + "w:cellMerge", + "w:tcPrChange", + ) + tcW: CT_TblWidth | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:tcW", successors=_tag_seq[2:] + ) + gridSpan: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:gridSpan", successors=_tag_seq[3:] + ) + vMerge: CT_VMerge | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:vMerge", successors=_tag_seq[5:] + ) + vAlign: CT_VerticalJc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:vAlign", successors=_tag_seq[12:] + ) + del _tag_seq + + @property + def grid_span(self) -> int: + """The integer number of columns this cell spans. + + Determined by ./w:gridSpan/@val, it defaults to 1. + """ + gridSpan = self.gridSpan + return 1 if gridSpan is None else gridSpan.val + + @grid_span.setter + def grid_span(self, value: int): + self._remove_gridSpan() + if value > 1: + self.get_or_add_gridSpan().val = value + + @property + def vAlign_val(self): + """Value of `w:val` attribute on `w:vAlign` child. + + Value is |None| if `w:vAlign` child is not present. The `w:val` attribute on + `w:vAlign` is required. + """ + vAlign = self.vAlign + if vAlign is None: + return None + return vAlign.val + + @vAlign_val.setter + def vAlign_val(self, value: WD_CELL_VERTICAL_ALIGNMENT | None): + if value is None: + self._remove_vAlign() + return + self.get_or_add_vAlign().val = value + + @property + def vMerge_val(self): + """The value of the ./w:vMerge/@val attribute, or |None| if the w:vMerge element + is not present.""" + vMerge = self.vMerge + if vMerge is None: + return None + return vMerge.val + + @vMerge_val.setter + def vMerge_val(self, value: str | None): + self._remove_vMerge() + if value is not None: + self._add_vMerge().val = value + + @property + def width(self) -> Length | None: + """EMU length in `./w:tcW` or |None| if not present or its type is not 'dxa'.""" + tcW = self.tcW + if tcW is None: + return None + return tcW.width + + @width.setter + def width(self, value: Length): + tcW = self.get_or_add_tcW() + tcW.width = value + + +class CT_TrPr(BaseOxmlElement): + """``<w:trPr>`` element, defining table row properties.""" + + get_or_add_trHeight: Callable[[], CT_Height] + + _tag_seq = ( + "w:cnfStyle", + "w:divId", + "w:gridBefore", + "w:gridAfter", + "w:wBefore", + "w:wAfter", + "w:cantSplit", + "w:trHeight", + "w:tblHeader", + "w:tblCellSpacing", + "w:jc", + "w:hidden", + "w:ins", + "w:del", + "w:trPrChange", + ) + gridAfter: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:gridAfter", successors=_tag_seq[4:] + ) + gridBefore: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:gridBefore", successors=_tag_seq[3:] + ) + trHeight: CT_Height | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:trHeight", successors=_tag_seq[8:] + ) + del _tag_seq + + @property + def grid_after(self) -> int: + """The number of unpopulated layout-grid cells at the end of this row.""" + gridAfter = self.gridAfter + return 0 if gridAfter is None else gridAfter.val + + @property + def grid_before(self) -> int: + """The number of unpopulated layout-grid cells at the start of this row.""" + gridBefore = self.gridBefore + return 0 if gridBefore is None else gridBefore.val + + @property + def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None: + """Return the value of `w:trHeight@w:hRule`, or |None| if not present.""" + trHeight = self.trHeight + return None if trHeight is None else trHeight.hRule + + @trHeight_hRule.setter + def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None): + if value is None and self.trHeight is None: + return + trHeight = self.get_or_add_trHeight() + trHeight.hRule = value + + @property + def trHeight_val(self): + """Return the value of `w:trHeight@w:val`, or |None| if not present.""" + trHeight = self.trHeight + return None if trHeight is None else trHeight.val + + @trHeight_val.setter + def trHeight_val(self, value: Length | None): + if value is None and self.trHeight is None: + return + trHeight = self.get_or_add_trHeight() + trHeight.val = value + + +class CT_VerticalJc(BaseOxmlElement): + """`w:vAlign` element, specifying vertical alignment of cell.""" + + val: WD_CELL_VERTICAL_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "w:val", WD_CELL_VERTICAL_ALIGNMENT + ) + + +class CT_VMerge(BaseOxmlElement): + """``<w:vMerge>`` element, specifying vertical merging behavior of a cell.""" + + val: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:val", ST_Merge, default=ST_Merge.CONTINUE + ) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/__init__.py diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py new file mode 100644 index 00000000..140086aa --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/font.py @@ -0,0 +1,356 @@ +"""Custom element classes related to run properties (font).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +from docx.enum.dml import MSO_THEME_COLOR +from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE +from docx.oxml.ns import nsdecls +from docx.oxml.parser import parse_xml +from docx.oxml.simpletypes import ( + ST_HexColor, + ST_HpsMeasure, + ST_String, + ST_VerticalAlignRun, +) +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OptionalAttribute, + RequiredAttribute, + ZeroOrOne, +) + +if TYPE_CHECKING: + from docx.oxml.shared import CT_OnOff, CT_String + from docx.shared import Length + + +class CT_Color(BaseOxmlElement): + """`w:color` element, specifying the color of a font and perhaps other objects.""" + + val = RequiredAttribute("w:val", ST_HexColor) + themeColor = OptionalAttribute("w:themeColor", MSO_THEME_COLOR) + + +class CT_Fonts(BaseOxmlElement): + """`<w:rFonts>` element. + + Specifies typeface name for the various language types. + """ + + ascii: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:ascii", ST_String + ) + hAnsi: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:hAnsi", ST_String + ) + + +class CT_Highlight(BaseOxmlElement): + """`w:highlight` element, specifying font highlighting/background color.""" + + val: WD_COLOR_INDEX = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues] + "w:val", WD_COLOR_INDEX + ) + + +class CT_HpsMeasure(BaseOxmlElement): + """Used for `<w:sz>` element and others, specifying font size in half-points.""" + + val: Length = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues] + "w:val", ST_HpsMeasure + ) + + +class CT_RPr(BaseOxmlElement): + """`<w:rPr>` element, containing the properties for a run.""" + + get_or_add_highlight: Callable[[], CT_Highlight] + get_or_add_rFonts: Callable[[], CT_Fonts] + get_or_add_sz: Callable[[], CT_HpsMeasure] + get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun] + _add_rStyle: Callable[..., CT_String] + _add_u: Callable[[], CT_Underline] + _remove_highlight: Callable[[], None] + _remove_rFonts: Callable[[], None] + _remove_rStyle: Callable[[], None] + _remove_sz: Callable[[], None] + _remove_u: Callable[[], None] + _remove_vertAlign: Callable[[], None] + + _tag_seq = ( + "w:rStyle", + "w:rFonts", + "w:b", + "w:bCs", + "w:i", + "w:iCs", + "w:caps", + "w:smallCaps", + "w:strike", + "w:dstrike", + "w:outline", + "w:shadow", + "w:emboss", + "w:imprint", + "w:noProof", + "w:snapToGrid", + "w:vanish", + "w:webHidden", + "w:color", + "w:spacing", + "w:w", + "w:kern", + "w:position", + "w:sz", + "w:szCs", + "w:highlight", + "w:u", + "w:effect", + "w:bdr", + "w:shd", + "w:fitText", + "w:vertAlign", + "w:rtl", + "w:cs", + "w:em", + "w:lang", + "w:eastAsianLayout", + "w:specVanish", + "w:oMath", + ) + rStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:rStyle", successors=_tag_seq[1:] + ) + rFonts: CT_Fonts | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:rFonts", successors=_tag_seq[2:] + ) + b: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:b", successors=_tag_seq[3:] + ) + bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:]) + i = ZeroOrOne("w:i", successors=_tag_seq[5:]) + iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:]) + caps = ZeroOrOne("w:caps", successors=_tag_seq[7:]) + smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:]) + strike = ZeroOrOne("w:strike", successors=_tag_seq[9:]) + dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:]) + outline = ZeroOrOne("w:outline", successors=_tag_seq[11:]) + shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:]) + emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:]) + imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:]) + noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:]) + snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:]) + vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:]) + webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:]) + color = ZeroOrOne("w:color", successors=_tag_seq[19:]) + sz: CT_HpsMeasure | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:sz", successors=_tag_seq[24:] + ) + highlight: CT_Highlight | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:highlight", successors=_tag_seq[26:] + ) + u: CT_Underline | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:u", successors=_tag_seq[27:] + ) + vertAlign: CT_VerticalAlignRun | None = ZeroOrOne( # pyright: ignore[reportGeneralTypeIssues] + "w:vertAlign", successors=_tag_seq[32:] + ) + rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:]) + cs = ZeroOrOne("w:cs", successors=_tag_seq[34:]) + specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:]) + oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:]) + del _tag_seq + + def _new_color(self): + """Override metaclass method to set `w:color/@val` to RGB black on create.""" + return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w")) + + @property + def highlight_val(self) -> WD_COLOR_INDEX | None: + """Value of `./w:highlight/@val`. + + Specifies font's highlight color, or `None` if the text is not highlighted. + """ + highlight = self.highlight + if highlight is None: + return None + return highlight.val + + @highlight_val.setter + def highlight_val(self, value: WD_COLOR_INDEX | None) -> None: + if value is None: + self._remove_highlight() + return + highlight = self.get_or_add_highlight() + highlight.val = value + + @property + def rFonts_ascii(self) -> str | None: + """The value of `w:rFonts/@w:ascii` or |None| if not present. + + Represents the assigned typeface name. The rFonts element also specifies other + special-case typeface names; this method handles the case where just the common + name is required. + """ + rFonts = self.rFonts + if rFonts is None: + return None + return rFonts.ascii + + @rFonts_ascii.setter + def rFonts_ascii(self, value: str | None) -> None: + if value is None: + self._remove_rFonts() + return + rFonts = self.get_or_add_rFonts() + rFonts.ascii = value + + @property + def rFonts_hAnsi(self) -> str | None: + """The value of `w:rFonts/@w:hAnsi` or |None| if not present.""" + rFonts = self.rFonts + if rFonts is None: + return None + return rFonts.hAnsi + + @rFonts_hAnsi.setter + def rFonts_hAnsi(self, value: str | None): + if value is None and self.rFonts is None: + return + rFonts = self.get_or_add_rFonts() + rFonts.hAnsi = value + + @property + def style(self) -> str | None: + """String in `./w:rStyle/@val`, or None if `w:rStyle` is not present.""" + rStyle = self.rStyle + if rStyle is None: + return None + return rStyle.val + + @style.setter + def style(self, style: str | None) -> None: + """Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary. + + If `style` is |None|, remove `w:rStyle` element if present. + """ + if style is None: + self._remove_rStyle() + elif self.rStyle is None: + self._add_rStyle(val=style) + else: + self.rStyle.val = style + + @property + def subscript(self) -> bool | None: + """|True| if `./w:vertAlign/@w:val` is "subscript". + + |False| if `w:vertAlign/@w:val` contains any other value. |None| if + `w:vertAlign` is not present. + """ + vertAlign = self.vertAlign + if vertAlign is None: + return None + if vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT: + return True + return False + + @subscript.setter + def subscript(self, value: bool | None) -> None: + if value is None: + self._remove_vertAlign() + elif bool(value) is True: + self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT + # -- assert bool(value) is False -- + elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT: + self._remove_vertAlign() + + @property + def superscript(self) -> bool | None: + """|True| if `w:vertAlign/@w:val` is 'superscript'. + + |False| if `w:vertAlign/@w:val` contains any other value. |None| if + `w:vertAlign` is not present. + """ + vertAlign = self.vertAlign + if vertAlign is None: + return None + if vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT: + return True + return False + + @superscript.setter + def superscript(self, value: bool | None): + if value is None: + self._remove_vertAlign() + elif bool(value) is True: + self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT + # -- assert bool(value) is False -- + elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT: + self._remove_vertAlign() + + @property + def sz_val(self) -> Length | None: + """The value of `w:sz/@w:val` or |None| if not present.""" + sz = self.sz + if sz is None: + return None + return sz.val + + @sz_val.setter + def sz_val(self, value: Length | None): + if value is None: + self._remove_sz() + return + sz = self.get_or_add_sz() + sz.val = value + + @property + def u_val(self) -> WD_UNDERLINE | None: + """Value of `w:u/@val`, or None if not present. + + Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and + `False` respectively. + """ + u = self.u + if u is None: + return None + return u.val + + @u_val.setter + def u_val(self, value: WD_UNDERLINE | None): + self._remove_u() + if value is not None: + self._add_u().val = value + + def _get_bool_val(self, name: str) -> bool | None: + """Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps".""" + element = getattr(self, name) + if element is None: + return None + return element.val + + def _set_bool_val(self, name: str, value: bool | None): + if value is None: + getattr(self, "_remove_%s" % name)() + return + element = getattr(self, "get_or_add_%s" % name)() + element.val = value + + +class CT_Underline(BaseOxmlElement): + """`<w:u>` element, specifying the underlining style for a run.""" + + val: WD_UNDERLINE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:val", WD_UNDERLINE + ) + + +class CT_VerticalAlignRun(BaseOxmlElement): + """`<w:vertAlign>` element, specifying subscript or superscript.""" + + val: str = RequiredAttribute( # pyright: ignore[reportGeneralTypeIssues] + "w:val", ST_VerticalAlignRun + ) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py new file mode 100644 index 00000000..38a33ff1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/hyperlink.py @@ -0,0 +1,45 @@ +"""Custom element classes related to hyperlinks (CT_Hyperlink).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, List + +from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString +from docx.oxml.text.run import CT_R +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OptionalAttribute, + ZeroOrMore, +) + +if TYPE_CHECKING: + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + + +class CT_Hyperlink(BaseOxmlElement): + """`<w:hyperlink>` element, containing the text and address for a hyperlink.""" + + r_lst: List[CT_R] + + rId: str | None = OptionalAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType] + anchor: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:anchor", ST_String + ) + history: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:history", ST_OnOff, default=True + ) + + r = ZeroOrMore("w:r") + + @property + def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]: + """All `w:lastRenderedPageBreak` descendants of this hyperlink.""" + return self.xpath("./w:r/w:lastRenderedPageBreak") + + @property + def text(self) -> str: # pyright: ignore[reportIncompatibleMethodOverride] + """The textual content of this hyperlink. + + `CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children. + """ + return "".join(r.text for r in self.xpath("w:r")) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py new file mode 100644 index 00000000..943f9b6c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py @@ -0,0 +1,284 @@ +"""Custom element class for rendered page-break (CT_LastRenderedPageBreak).""" + +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING + +from docx.oxml.xmlchemy import BaseOxmlElement +from docx.shared import lazyproperty + +if TYPE_CHECKING: + from docx.oxml.text.hyperlink import CT_Hyperlink + from docx.oxml.text.paragraph import CT_P + + +class CT_LastRenderedPageBreak(BaseOxmlElement): + """`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer. + + A rendered page-break is one inserted by the renderer when it runs out of room on a + page. It is an empty element (no attrs or children) and is a child of CT_R, peer to + CT_Text. + + NOTE: this complex-type name does not exist in the schema, where + `w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it + distinguished behavior. CT_Empty is used for many elements. + """ + + @property + def following_fragment_p(self) -> CT_P: + """A "loose" `CT_P` containing only the paragraph content before this break. + + Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered + page-break in its paragraph. + + The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this + page-break with this `w:lastRenderedPageBreak` element and all content preceding + it removed. + + NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements + (when the paragraph contained more than one). While this is rare, the caller + should treat this paragraph the same as other paragraphs and split it if + necessary in a folloing step or recursion. + """ + if not self == self._first_lrpb_in_p(self._enclosing_p): + raise ValueError("only defined on first rendered page-break in paragraph") + + # -- splitting approach is different when break is inside a hyperlink -- + return ( + self._following_frag_in_hlink + if self._is_in_hyperlink + else self._following_frag_in_run + ) + + @property + def follows_all_content(self) -> bool: + """True when this page-break element is the last "content" in the paragraph. + + This is very uncommon case and may only occur in contrived or cases where the + XML is edited by hand, but it is not precluded by the spec. + """ + # -- a page-break inside a hyperlink never meets these criteria (for our + # -- purposes at least) because it is considered "atomic" and always associated + # -- with the page it starts on. + if self._is_in_hyperlink: + return False + + return bool( + # -- XPath will match zero-or-one w:lastRenderedPageBreak element -- + self._enclosing_p.xpath( + # -- in first run of paragraph -- + f"(./w:r)[last()]" + # -- all page-breaks -- + f"/w:lastRenderedPageBreak" + # -- that are not preceded by any content-bearing elements -- + f"[not(following-sibling::*[{self._run_inner_content_xpath}])]" + ) + ) + + @property + def precedes_all_content(self) -> bool: + """True when a `w:lastRenderedPageBreak` precedes all paragraph content. + + This is a common case; it occurs whenever the page breaks on an even paragraph + boundary. + """ + # -- a page-break inside a hyperlink never meets these criteria because there + # -- is always part of the hyperlink text before the page-break. + if self._is_in_hyperlink: + return False + + return bool( + # -- XPath will match zero-or-one w:lastRenderedPageBreak element -- + self._enclosing_p.xpath( + # -- in first run of paragraph -- + f"./w:r[1]" + # -- all page-breaks -- + f"/w:lastRenderedPageBreak" + # -- that are not preceded by any content-bearing elements -- + f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]" + ) + ) + + @property + def preceding_fragment_p(self) -> CT_P: + """A "loose" `CT_P` containing only the paragraph content before this break. + + Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered + paragraph in its paragraph. + + The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this + page-break with this `w:lastRenderedPageBreak` element and all its following + siblings removed. + """ + if not self == self._first_lrpb_in_p(self._enclosing_p): + raise ValueError("only defined on first rendered page-break in paragraph") + + # -- splitting approach is different when break is inside a hyperlink -- + return ( + self._preceding_frag_in_hlink + if self._is_in_hyperlink + else self._preceding_frag_in_run + ) + + def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink: + """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`. + + Raises `IndexError` when this page-break has a `w:p` grandparent, so only call + when `._is_in_hyperlink` is True. + """ + return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0] + + @property + def _enclosing_p(self) -> CT_P: + """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`.""" + return self.xpath("./ancestor::w:p[1]")[0] + + def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak: + """The first `w:lastRenderedPageBreak` element in `p`. + + Raises `ValueError` if there are no rendered page-breaks in `p`. + """ + lrpbs = p.xpath( + "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak" + ) + if not lrpbs: + raise ValueError("no rendered page-breaks in paragraph element") + return lrpbs[0] + + @lazyproperty + def _following_frag_in_hlink(self) -> CT_P: + """Following CT_P fragment when break occurs within a hyperlink. + + Note this is a *partial-function* and raises when `lrpb` is not inside a + hyperlink. + """ + if not self._is_in_hyperlink: + raise ValueError("only defined on a rendered page-break in a hyperlink") + + # -- work on a clone `w:p` so our mutations don't persist -- + p = copy.deepcopy(self._enclosing_p) + + # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) -- + lrpb = self._first_lrpb_in_p(p) + + # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found -- + hyperlink = lrpb._enclosing_hyperlink(lrpb) + + # -- delete all w:p inner-content preceding the hyperlink -- + for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"): + p.remove(e) + + # -- remove the whole hyperlink, it belongs to the preceding-fragment-p -- + hyperlink.getparent().remove(hyperlink) + + # -- that's it, return the remaining fragment of `w:p` clone -- + return p + + @lazyproperty + def _following_frag_in_run(self) -> CT_P: + """following CT_P fragment when break does not occur in a hyperlink. + + Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink. + """ + if self._is_in_hyperlink: + raise ValueError("only defined on a rendered page-break not in a hyperlink") + + # -- work on a clone `w:p` so our mutations don't persist -- + p = copy.deepcopy(self._enclosing_p) + + # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) -- + lrpb = self._first_lrpb_in_p(p) + + # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found -- + enclosing_r = lrpb.xpath("./parent::w:r")[0] + + # -- delete all w:p inner-content preceding that run (but not w:pPr) -- + for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"): + p.remove(e) + + # -- then remove all run inner-content preceding this lrpb in its run (but not + # -- the `w:rPr`) and also remove the page-break itself + for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"): + enclosing_r.remove(e) + enclosing_r.remove(lrpb) + + return p + + @lazyproperty + def _is_in_hyperlink(self) -> bool: + """True when this page-break is embedded in a hyperlink run.""" + return bool(self.xpath("./parent::w:r/parent::w:hyperlink")) + + @lazyproperty + def _preceding_frag_in_hlink(self) -> CT_P: + """Preceding CT_P fragment when break occurs within a hyperlink. + + Note this is a *partial-function* and raises when `lrpb` is not inside a + hyperlink. + """ + if not self._is_in_hyperlink: + raise ValueError("only defined on a rendered page-break in a hyperlink") + + # -- work on a clone `w:p` so our mutations don't persist -- + p = copy.deepcopy(self._enclosing_p) + + # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) -- + lrpb = self._first_lrpb_in_p(p) + + # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found -- + hyperlink = lrpb._enclosing_hyperlink(lrpb) + + # -- delete all w:p inner-content following the hyperlink -- + for e in hyperlink.xpath("./following-sibling::*"): + p.remove(e) + + # -- remove this page-break from inside the hyperlink -- + lrpb.getparent().remove(lrpb) + + # -- that's it, the entire hyperlink goes into the preceding fragment so + # -- the hyperlink is not "split". + return p + + @lazyproperty + def _preceding_frag_in_run(self) -> CT_P: + """Preceding CT_P fragment when break does not occur in a hyperlink. + + Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink. + """ + if self._is_in_hyperlink: + raise ValueError("only defined on a rendered page-break not in a hyperlink") + + # -- work on a clone `w:p` so our mutations don't persist -- + p = copy.deepcopy(self._enclosing_p) + + # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) -- + lrpb = self._first_lrpb_in_p(p) + + # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found -- + enclosing_r = lrpb.xpath("./parent::w:r")[0] + + # -- delete all `w:p` inner-content following that run -- + for e in enclosing_r.xpath("./following-sibling::*"): + p.remove(e) + + # -- then delete all `w:r` inner-content following this lrpb in its run and + # -- also remove the page-break itself + for e in lrpb.xpath("./following-sibling::*"): + enclosing_r.remove(e) + enclosing_r.remove(lrpb) + + return p + + @lazyproperty + def _run_inner_content_xpath(self) -> str: + """XPath fragment matching any run inner-content elements.""" + return ( + "self::w:br" + " | self::w:cr" + " | self::w:drawing" + " | self::w:noBreakHyphen" + " | self::w:ptab" + " | self::w:t" + " | self::w:tab" + ) diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py new file mode 100644 index 00000000..63e96f31 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/paragraph.py @@ -0,0 +1,106 @@ +# pyright: reportPrivateUsage=false + +"""Custom element classes related to paragraphs (CT_P).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, List, cast + +from docx.oxml.parser import OxmlElement +from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne + +if TYPE_CHECKING: + from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + from docx.oxml.section import CT_SectPr + from docx.oxml.text.hyperlink import CT_Hyperlink + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + from docx.oxml.text.parfmt import CT_PPr + from docx.oxml.text.run import CT_R + + +class CT_P(BaseOxmlElement): + """`<w:p>` element, containing the properties and text for a paragraph.""" + + add_r: Callable[[], CT_R] + get_or_add_pPr: Callable[[], CT_PPr] + hyperlink_lst: List[CT_Hyperlink] + r_lst: List[CT_R] + + pPr: CT_PPr | None = ZeroOrOne("w:pPr") # pyright: ignore[reportAssignmentType] + hyperlink = ZeroOrMore("w:hyperlink") + r = ZeroOrMore("w:r") + + def add_p_before(self) -> CT_P: + """Return a new `<w:p>` element inserted directly prior to this one.""" + new_p = cast(CT_P, OxmlElement("w:p")) + self.addprevious(new_p) + return new_p + + @property + def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None: + """The value of the `<w:jc>` grandchild element or |None| if not present.""" + pPr = self.pPr + if pPr is None: + return None + return pPr.jc_val + + @alignment.setter + def alignment(self, value: WD_PARAGRAPH_ALIGNMENT): + pPr = self.get_or_add_pPr() + pPr.jc_val = value + + def clear_content(self): + """Remove all child elements, except the `<w:pPr>` element if present.""" + for child in self.xpath("./*[not(self::w:pPr)]"): + self.remove(child) + + @property + def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]: + """Run and hyperlink children of the `w:p` element, in document order.""" + return self.xpath("./w:r | ./w:hyperlink") + + @property + def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]: + """All `w:lastRenderedPageBreak` descendants of this paragraph. + + Rendered page-breaks commonly occur in a run but can also occur in a run inside + a hyperlink. This returns both. + """ + return self.xpath( + "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak" + ) + + def set_sectPr(self, sectPr: CT_SectPr): + """Unconditionally replace or add `sectPr` as grandchild in correct sequence.""" + pPr = self.get_or_add_pPr() + pPr._remove_sectPr() + pPr._insert_sectPr(sectPr) + + @property + def style(self) -> str | None: + """String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild. + + |None| if not present. + """ + pPr = self.pPr + if pPr is None: + return None + return pPr.style + + @style.setter + def style(self, style: str | None): + pPr = self.get_or_add_pPr() + pPr.style = style + + @property + def text(self): # pyright: ignore[reportIncompatibleMethodOverride] + """The textual content of this paragraph. + + Inner-content child elements like `w:r` and `w:hyperlink` are translated to + their text equivalent. + """ + return "".join(e.text for e in self.xpath("w:r | w:hyperlink")) + + def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr: + self.insert(0, pPr) + return pPr diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py new file mode 100644 index 00000000..de560963 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/parfmt.py @@ -0,0 +1,387 @@ +"""Custom element classes related to paragraph properties (CT_PPr).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +from docx.enum.text import ( + WD_ALIGN_PARAGRAPH, + WD_LINE_SPACING, + WD_TAB_ALIGNMENT, + WD_TAB_LEADER, +) +from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure +from docx.oxml.xmlchemy import ( + BaseOxmlElement, + OneOrMore, + OptionalAttribute, + RequiredAttribute, + ZeroOrOne, +) +from docx.shared import Length + +if TYPE_CHECKING: + from docx.oxml.section import CT_SectPr + from docx.oxml.shared import CT_String + + +class CT_Ind(BaseOxmlElement): + """``<w:ind>`` element, specifying paragraph indentation.""" + + left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:left", ST_SignedTwipsMeasure + ) + right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:right", ST_SignedTwipsMeasure + ) + firstLine: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:firstLine", ST_TwipsMeasure + ) + hanging: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:hanging", ST_TwipsMeasure + ) + + +class CT_Jc(BaseOxmlElement): + """``<w:jc>`` element, specifying paragraph justification.""" + + val: WD_ALIGN_PARAGRAPH = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "w:val", WD_ALIGN_PARAGRAPH + ) + + +class CT_PPr(BaseOxmlElement): + """``<w:pPr>`` element, containing the properties for a paragraph.""" + + get_or_add_ind: Callable[[], CT_Ind] + get_or_add_pStyle: Callable[[], CT_String] + _insert_sectPr: Callable[[CT_SectPr], None] + _remove_pStyle: Callable[[], None] + _remove_sectPr: Callable[[], None] + + _tag_seq = ( + "w:pStyle", + "w:keepNext", + "w:keepLines", + "w:pageBreakBefore", + "w:framePr", + "w:widowControl", + "w:numPr", + "w:suppressLineNumbers", + "w:pBdr", + "w:shd", + "w:tabs", + "w:suppressAutoHyphens", + "w:kinsoku", + "w:wordWrap", + "w:overflowPunct", + "w:topLinePunct", + "w:autoSpaceDE", + "w:autoSpaceDN", + "w:bidi", + "w:adjustRightInd", + "w:snapToGrid", + "w:spacing", + "w:ind", + "w:contextualSpacing", + "w:mirrorIndents", + "w:suppressOverlap", + "w:jc", + "w:textDirection", + "w:textAlignment", + "w:textboxTightWrap", + "w:outlineLvl", + "w:divId", + "w:cnfStyle", + "w:rPr", + "w:sectPr", + "w:pPrChange", + ) + pStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:pStyle", successors=_tag_seq[1:] + ) + keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:]) + keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:]) + pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:]) + widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:]) + numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:]) + tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:]) + spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:]) + ind: CT_Ind | None = ZeroOrOne( # pyright: ignore[reportAssignmentType] + "w:ind", successors=_tag_seq[23:] + ) + jc = ZeroOrOne("w:jc", successors=_tag_seq[27:]) + sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:]) + del _tag_seq + + @property + def first_line_indent(self) -> Length | None: + """A |Length| value calculated from the values of `w:ind/@w:firstLine` and + `w:ind/@w:hanging`. + + Returns |None| if the `w:ind` child is not present. + """ + ind = self.ind + if ind is None: + return None + hanging = ind.hanging + if hanging is not None: + return Length(-hanging) + firstLine = ind.firstLine + if firstLine is None: + return None + return firstLine + + @first_line_indent.setter + def first_line_indent(self, value: Length | None): + if self.ind is None and value is None: + return + ind = self.get_or_add_ind() + ind.firstLine = ind.hanging = None + if value is None: + return + elif value < 0: + ind.hanging = -value + else: + ind.firstLine = value + + @property + def ind_left(self) -> Length | None: + """The value of `w:ind/@w:left` or |None| if not present.""" + ind = self.ind + if ind is None: + return None + return ind.left + + @ind_left.setter + def ind_left(self, value: Length | None): + if value is None and self.ind is None: + return + ind = self.get_or_add_ind() + ind.left = value + + @property + def ind_right(self) -> Length | None: + """The value of `w:ind/@w:right` or |None| if not present.""" + ind = self.ind + if ind is None: + return None + return ind.right + + @ind_right.setter + def ind_right(self, value: Length | None): + if value is None and self.ind is None: + return + ind = self.get_or_add_ind() + ind.right = value + + @property + def jc_val(self) -> WD_ALIGN_PARAGRAPH | None: + """Value of the `<w:jc>` child element or |None| if not present.""" + return self.jc.val if self.jc is not None else None + + @jc_val.setter + def jc_val(self, value): + if value is None: + self._remove_jc() + return + self.get_or_add_jc().val = value + + @property + def keepLines_val(self): + """The value of `keepLines/@val` or |None| if not present.""" + keepLines = self.keepLines + if keepLines is None: + return None + return keepLines.val + + @keepLines_val.setter + def keepLines_val(self, value): + if value is None: + self._remove_keepLines() + else: + self.get_or_add_keepLines().val = value + + @property + def keepNext_val(self): + """The value of `keepNext/@val` or |None| if not present.""" + keepNext = self.keepNext + if keepNext is None: + return None + return keepNext.val + + @keepNext_val.setter + def keepNext_val(self, value): + if value is None: + self._remove_keepNext() + else: + self.get_or_add_keepNext().val = value + + @property + def pageBreakBefore_val(self): + """The value of `pageBreakBefore/@val` or |None| if not present.""" + pageBreakBefore = self.pageBreakBefore + if pageBreakBefore is None: + return None + return pageBreakBefore.val + + @pageBreakBefore_val.setter + def pageBreakBefore_val(self, value): + if value is None: + self._remove_pageBreakBefore() + else: + self.get_or_add_pageBreakBefore().val = value + + @property + def spacing_after(self): + """The value of `w:spacing/@w:after` or |None| if not present.""" + spacing = self.spacing + if spacing is None: + return None + return spacing.after + + @spacing_after.setter + def spacing_after(self, value): + if value is None and self.spacing is None: + return + self.get_or_add_spacing().after = value + + @property + def spacing_before(self): + """The value of `w:spacing/@w:before` or |None| if not present.""" + spacing = self.spacing + if spacing is None: + return None + return spacing.before + + @spacing_before.setter + def spacing_before(self, value): + if value is None and self.spacing is None: + return + self.get_or_add_spacing().before = value + + @property + def spacing_line(self): + """The value of `w:spacing/@w:line` or |None| if not present.""" + spacing = self.spacing + if spacing is None: + return None + return spacing.line + + @spacing_line.setter + def spacing_line(self, value): + if value is None and self.spacing is None: + return + self.get_or_add_spacing().line = value + + @property + def spacing_lineRule(self): + """The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing` + enumeration. + + Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the + responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and + `MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is + desired. + """ + spacing = self.spacing + if spacing is None: + return None + lineRule = spacing.lineRule + if lineRule is None and spacing.line is not None: + return WD_LINE_SPACING.MULTIPLE + return lineRule + + @spacing_lineRule.setter + def spacing_lineRule(self, value): + if value is None and self.spacing is None: + return + self.get_or_add_spacing().lineRule = value + + @property + def style(self) -> str | None: + """String contained in `./w:pStyle/@val`, or None if child is not present.""" + pStyle = self.pStyle + if pStyle is None: + return None + return pStyle.val + + @style.setter + def style(self, style: str | None): + """Set `./w:pStyle/@val` `style`, adding a new element if necessary. + + If `style` is |None|, remove `./w:pStyle` when present. + """ + if style is None: + self._remove_pStyle() + return + pStyle = self.get_or_add_pStyle() + pStyle.val = style + + @property + def widowControl_val(self): + """The value of `widowControl/@val` or |None| if not present.""" + widowControl = self.widowControl + if widowControl is None: + return None + return widowControl.val + + @widowControl_val.setter + def widowControl_val(self, value): + if value is None: + self._remove_widowControl() + else: + self.get_or_add_widowControl().val = value + + +class CT_Spacing(BaseOxmlElement): + """``<w:spacing>`` element, specifying paragraph spacing attributes such as space + before and line spacing.""" + + after = OptionalAttribute("w:after", ST_TwipsMeasure) + before = OptionalAttribute("w:before", ST_TwipsMeasure) + line = OptionalAttribute("w:line", ST_SignedTwipsMeasure) + lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING) + + +class CT_TabStop(BaseOxmlElement): + """`<w:tab>` element, representing an individual tab stop. + + Overloaded to use for a tab-character in a run, which also uses the w:tab tag but + only needs a __str__ method. + """ + + val: WD_TAB_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "w:val", WD_TAB_ALIGNMENT + ) + leader: WD_TAB_LEADER | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES + ) + pos: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType] + "w:pos", ST_SignedTwipsMeasure + ) + + def __str__(self) -> str: + """Text equivalent of a `w:tab` element appearing in a run. + + Allows text of run inner-content to be accessed consistently across all text + inner-content. + """ + return "\t" + + +class CT_TabStops(BaseOxmlElement): + """``<w:tabs>`` element, container for a sorted sequence of tab stops.""" + + tab = OneOrMore("w:tab", successors=()) + + def insert_tab_in_order(self, pos, align, leader): + """Insert a newly created `w:tab` child element in `pos` order.""" + new_tab = self._new_tab() + new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader + for tab in self.tab_lst: + if new_tab.pos < tab.pos: + tab.addprevious(new_tab) + return new_tab + self.append(new_tab) + return new_tab diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py new file mode 100644 index 00000000..88efae83 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/run.py @@ -0,0 +1,276 @@ +"""Custom element classes related to text runs (CT_R).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable, Iterator, List + +from docx.oxml.drawing import CT_Drawing +from docx.oxml.ns import qn +from docx.oxml.simpletypes import ST_BrClear, ST_BrType +from docx.oxml.text.font import CT_RPr +from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne +from docx.shared import TextAccumulator + +if TYPE_CHECKING: + from docx.oxml.shape import CT_Anchor, CT_Inline + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + from docx.oxml.text.parfmt import CT_TabStop + +# ------------------------------------------------------------------------------------ +# Run-level elements + + +class CT_R(BaseOxmlElement): + """`<w:r>` element, containing the properties and text for a run.""" + + add_br: Callable[[], CT_Br] + add_tab: Callable[[], CT_TabStop] + get_or_add_rPr: Callable[[], CT_RPr] + _add_drawing: Callable[[], CT_Drawing] + _add_t: Callable[..., CT_Text] + + rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType] + br = ZeroOrMore("w:br") + cr = ZeroOrMore("w:cr") + drawing = ZeroOrMore("w:drawing") + t = ZeroOrMore("w:t") + tab = ZeroOrMore("w:tab") + + def add_t(self, text: str) -> CT_Text: + """Return a newly added `<w:t>` element containing `text`.""" + t = self._add_t(text=text) + if len(text.strip()) < len(text): + t.set(qn("xml:space"), "preserve") + return t + + def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing: + """Return newly appended `CT_Drawing` (`w:drawing`) child element. + + The `w:drawing` element has `inline_or_anchor` as its child. + """ + drawing = self._add_drawing() + drawing.append(inline_or_anchor) + return drawing + + def clear_content(self) -> None: + """Remove all child elements except a `w:rPr` element if present.""" + # -- remove all run inner-content except a `w:rPr` when present. -- + for e in self.xpath("./*[not(self::w:rPr)]"): + self.remove(e) + + @property + def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]: + """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements.""" + from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak + + accum = TextAccumulator() + + def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]: + for e in self.xpath( + "w:br" + " | w:cr" + " | w:drawing" + " | w:lastRenderedPageBreak" + " | w:noBreakHyphen" + " | w:ptab" + " | w:t" + " | w:tab" + ): + if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)): + yield from accum.pop() + yield e + else: + accum.push(str(e)) + + # -- don't forget the "tail" string -- + yield from accum.pop() + + return list(iter_items()) + + @property + def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]: + """All `w:lastRenderedPageBreaks` descendants of this run.""" + return self.xpath("./w:lastRenderedPageBreak") + + @property + def style(self) -> str | None: + """String contained in `w:val` attribute of `w:rStyle` grandchild. + + |None| if that element is not present. + """ + rPr = self.rPr + if rPr is None: + return None + return rPr.style + + @style.setter + def style(self, style: str | None): + """Set character style of this `w:r` element to `style`. + + If `style` is None, remove the style element. + """ + rPr = self.get_or_add_rPr() + rPr.style = style + + @property + def text(self) -> str: + """The textual content of this run. + + Inner-content child elements like `w:tab` are translated to their text + equivalent. + """ + return "".join( + str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab") + ) + + @text.setter + def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride] + self.clear_content() + _RunContentAppender.append_to_run_from_text(self, text) + + def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr: + self.insert(0, rPr) + return rPr + + +# ------------------------------------------------------------------------------------ +# Run inner-content elements + + +class CT_Br(BaseOxmlElement): + """`<w:br>` element, indicating a line, page, or column break in a run.""" + + type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType] + "w:type", ST_BrType, default="textWrapping" + ) + clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore + + def __str__(self) -> str: + """Text equivalent of this element. Actual value depends on break type. + + A line break is translated as "\n". Column and page breaks produce the empty + string (""). + + This allows the text of run inner-content to be accessed in a consistent way + for all run inner-context text elements. + """ + return "\n" if self.type == "textWrapping" else "" + + +class CT_Cr(BaseOxmlElement): + """`<w:cr>` element, representing a carriage-return (0x0D) character within a run. + + In Word, this represents a "soft carriage-return" in the sense that it does not end + the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the + text equivalent is considered to be newline ("\n") since in plain-text that's the + closest Python equivalent. + + NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to + `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used + for many elements. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single newline ("\n").""" + return "\n" + + +class CT_NoBreakHyphen(BaseOxmlElement): + """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position. + + This maps to a plain-text dash ("-"). + + NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen` + maps to `CT_Empty`. This name was added to give it behavior distinguished from the + many other elements represented in the schema by CT_Empty. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single dash character ("-").""" + return "-" + + +class CT_PTab(BaseOxmlElement): + """`<w:ptab>` element, representing an absolute-position tab character within a run. + + This character advances the rendering position to the specified position regardless + of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar. + """ + + def __str__(self) -> str: + """Text equivalent of this element, a single tab ("\t") character. + + This allows the text of run inner-content to be accessed in a consistent way + for all run inner-context text elements. + """ + return "\t" + + +# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That +# -- element class provides the __str__() method for this empty element, unconditionally +# -- returning "\t". + + +class CT_Text(BaseOxmlElement): + """`<w:t>` element, containing a sequence of characters within a run.""" + + def __str__(self) -> str: + """Text contained in this element, the empty string if it has no content. + + This property allows this run inner-content element to be queried for its text + the same way as other run-content elements are. In particular, this never + returns None, as etree._Element does when there is no content. + """ + return self.text or "" + + +# ------------------------------------------------------------------------------------ +# Utility + + +class _RunContentAppender: + """Translates a Python string into run content elements appended in a `w:r` element. + + Contiguous sequences of regular characters are appended in a single `<w:t>` element. + Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a + newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be + appended. + """ + + def __init__(self, r: CT_R): + self._r = r + self._bfr: List[str] = [] + + @classmethod + def append_to_run_from_text(cls, r: CT_R, text: str): + """Append inner-content elements for `text` to `r` element.""" + appender = cls(r) + appender.add_text(text) + + def add_text(self, text: str): + """Append inner-content elements for `text` to the `w:r` element.""" + for char in text: + self.add_char(char) + self.flush() + + def add_char(self, char: str): + """Process next character of input through finite state maching (FSM). + + There are two possible states, buffer pending and not pending, but those are + hidden behind the `.flush()` method which must be called at the end of text to + ensure any pending `<w:t>` element is written. + """ + if char == "\t": + self.flush() + self._r.add_tab() + elif char in "\r\n": + self.flush() + self._r.add_br() + else: + self._bfr.append(char) + + def flush(self): + text = "".join(self._bfr) + if text: + self._r.add_t(text) + self._bfr.clear() diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/xmlchemy.py b/.venv/lib/python3.12/site-packages/docx/oxml/xmlchemy.py new file mode 100644 index 00000000..077bcd58 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/oxml/xmlchemy.py @@ -0,0 +1,709 @@ +# pyright: reportImportCycles=false + +"""Enabling declarative definition of lxml custom element classes.""" + +from __future__ import annotations + +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Sequence, + Tuple, + Type, + TypeVar, +) + +from lxml import etree +from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage] + +from docx.oxml.exceptions import InvalidXmlError +from docx.oxml.ns import NamespacePrefixedTag, nsmap, qn +from docx.shared import lazyproperty + +if TYPE_CHECKING: + from docx.enum.base import BaseXmlEnum + from docx.oxml.simpletypes import BaseSimpleType + + +def serialize_for_reading(element: ElementBase): + """Serialize `element` to human-readable XML suitable for tests. + + No XML declaration. + """ + xml = etree.tostring(element, encoding="unicode", pretty_print=True) + return XmlString(xml) + + +class XmlString(str): + """Provides string comparison override suitable for serialized XML that is useful + for tests.""" + + # ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>' + # | | || | + # +----------+------------------------------------------++-----------+ + # front attrs | text + # close + + _xml_elm_line_patt = re.compile(r"( *</?[\w:]+)(.*?)(/?>)([^<]*</[\w:]+>)?$") + + def __eq__(self, other: object) -> bool: + if not isinstance(other, str): + return False + lines = self.splitlines() + lines_other = other.splitlines() + if len(lines) != len(lines_other): + return False + for line, line_other in zip(lines, lines_other): + if not self._eq_elm_strs(line, line_other): + return False + return True + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) + + def _attr_seq(self, attrs: str) -> List[str]: + """Return a sequence of attribute strings parsed from `attrs`. + + Each attribute string is stripped of whitespace on both ends. + """ + attrs = attrs.strip() + attr_lst = attrs.split() + return sorted(attr_lst) + + def _eq_elm_strs(self, line: str, line_2: str): + """Return True if the element in `line_2` is XML equivalent to the element in + `line`.""" + front, attrs, close, text = self._parse_line(line) + front_2, attrs_2, close_2, text_2 = self._parse_line(line_2) + if front != front_2: + return False + if self._attr_seq(attrs) != self._attr_seq(attrs_2): + return False + if close != close_2: + return False + if text != text_2: + return False + return True + + @classmethod + def _parse_line(cls, line: str) -> Tuple[str, str, str, str]: + """(front, attrs, close, text) 4-tuple result of parsing XML element `line`.""" + match = cls._xml_elm_line_patt.match(line) + if match is None: + return "", "", "", "" + front, attrs, close, text = [match.group(n) for n in range(1, 5)] + return front, attrs, close, text + + +_T = TypeVar("_T") + + +class MetaOxmlElement(type): + """Metaclass for BaseOxmlElement.""" + + def __init__(cls, clsname: str, bases: Tuple[type, ...], namespace: Dict[str, Any]): + dispatchable = ( + OneAndOnlyOne, + OneOrMore, + OptionalAttribute, + RequiredAttribute, + ZeroOrMore, + ZeroOrOne, + ZeroOrOneChoice, + ) + for key, value in namespace.items(): + if isinstance(value, dispatchable): + value.populate_class_members(cls, key) + + +class BaseAttribute: + """Base class for OptionalAttribute and RequiredAttribute. + + Provides common methods. + """ + + def __init__(self, attr_name: str, simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType]): + super(BaseAttribute, self).__init__() + self._attr_name = attr_name + self._simple_type = simple_type + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + self._element_cls = element_cls + self._prop_name = prop_name + + self._add_attr_property() + + def _add_attr_property(self): + """Add a read/write `.{prop_name}` property to the element class. + + The property returns the interpreted value of this attribute on access and + changes the attribute value to its ST_* counterpart on assignment. + """ + property_ = property(self._getter, self._setter, None) + # -- assign unconditionally to overwrite element name definition -- + setattr(self._element_cls, self._prop_name, property_) + + @property + def _clark_name(self): + if ":" in self._attr_name: + return qn(self._attr_name) + return self._attr_name + + @property + def _getter(self) -> Callable[[BaseOxmlElement], Any | None]: ... + + @property + def _setter( + self, + ) -> Callable[[BaseOxmlElement, Any | None], None]: ... + + +class OptionalAttribute(BaseAttribute): + """Defines an optional attribute on a custom element class. + + An optional attribute returns a default value when not present for reading. When + assigned |None|, the attribute is removed, but still returns the default value when + one is specified. + """ + + def __init__( + self, + attr_name: str, + simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType], + default: BaseXmlEnum | BaseSimpleType | str | bool | None = None, + ): + super(OptionalAttribute, self).__init__(attr_name, simple_type) + self._default = default + + @property + def _docstring(self): + """String to use as `__doc__` attribute of attribute property.""" + return ( + f"{self._simple_type.__name__} type-converted value of" + f" ``{self._attr_name}`` attribute, or |None| (or specified default" + f" value) if not present. Assigning the default value causes the" + f" attribute to be removed from the element." + ) + + @property + def _getter( + self, + ) -> Callable[[BaseOxmlElement], Any | None]: + """Function suitable for `__get__()` method on attribute property descriptor.""" + + def get_attr_value( + obj: BaseOxmlElement, + ) -> Any | None: + attr_str_value = obj.get(self._clark_name) + if attr_str_value is None: + return self._default + return self._simple_type.from_xml(attr_str_value) + + get_attr_value.__doc__ = self._docstring + return get_attr_value + + @property + def _setter(self) -> Callable[[BaseOxmlElement, Any], None]: + """Function suitable for `__set__()` method on attribute property descriptor.""" + + def set_attr_value(obj: BaseOxmlElement, value: Any | None): + if value is None or value == self._default: + if self._clark_name in obj.attrib: + del obj.attrib[self._clark_name] + return + str_value = self._simple_type.to_xml(value) + if str_value is None: + if self._clark_name in obj.attrib: + del obj.attrib[self._clark_name] + return + obj.set(self._clark_name, str_value) + + return set_attr_value + + +class RequiredAttribute(BaseAttribute): + """Defines a required attribute on a custom element class. + + A required attribute is assumed to be present for reading, so does not have a + default value; its actual value is always used. If missing on read, an + |InvalidXmlError| is raised. It also does not remove the attribute if |None| is + assigned. Assigning |None| raises |TypeError| or |ValueError|, depending on the + simple type of the attribute. + """ + + @property + def _docstring(self): + """Return the string to use as the ``__doc__`` attribute of the property for + this attribute.""" + return "%s type-converted value of ``%s`` attribute." % ( + self._simple_type.__name__, + self._attr_name, + ) + + @property + def _getter(self) -> Callable[[BaseOxmlElement], Any]: + """function object suitable for "get" side of attr property descriptor.""" + + def get_attr_value(obj: BaseOxmlElement) -> Any | None: + attr_str_value = obj.get(self._clark_name) + if attr_str_value is None: + raise InvalidXmlError( + "required '%s' attribute not present on element %s" % (self._attr_name, obj.tag) + ) + return self._simple_type.from_xml(attr_str_value) + + get_attr_value.__doc__ = self._docstring + return get_attr_value + + @property + def _setter(self) -> Callable[[BaseOxmlElement, Any], None]: + """function object suitable for "set" side of attribute property descriptor.""" + + def set_attr_value(obj: BaseOxmlElement, value: Any): + str_value = self._simple_type.to_xml(value) + if str_value is None: + raise ValueError(f"cannot assign {value} to this required attribute") + obj.set(self._clark_name, str_value) + + return set_attr_value + + +class _BaseChildElement: + """Base class for the child-element classes. + + The child-element sub-classes correspond to varying cardinalities, such as ZeroOrOne + and ZeroOrMore. + """ + + def __init__(self, nsptagname: str, successors: Tuple[str, ...] = ()): + super(_BaseChildElement, self).__init__() + self._nsptagname = nsptagname + self._successors = successors + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Baseline behavior for adding the appropriate methods to `element_cls`.""" + self._element_cls = element_cls + self._prop_name = prop_name + + def _add_adder(self): + """Add an ``_add_x()`` method to the element class for this child element.""" + + def _add_child(obj: BaseOxmlElement, **attrs: Any): + new_method = getattr(obj, self._new_method_name) + child = new_method() + for key, value in attrs.items(): + setattr(child, key, value) + insert_method = getattr(obj, self._insert_method_name) + insert_method(child) + return child + + _add_child.__doc__ = ( + "Add a new ``<%s>`` child element unconditionally, inserted in t" + "he correct sequence." % self._nsptagname + ) + self._add_to_class(self._add_method_name, _add_child) + + def _add_creator(self): + """Add a ``_new_{prop_name}()`` method to the element class that creates a new, + empty element of the correct type, having no attributes.""" + creator = self._creator + creator.__doc__ = ( + 'Return a "loose", newly created ``<%s>`` element having no attri' + "butes, text, or children." % self._nsptagname + ) + self._add_to_class(self._new_method_name, creator) + + def _add_getter(self): + """Add a read-only ``{prop_name}`` property to the element class for this child + element.""" + property_ = property(self._getter, None, None) + # -- assign unconditionally to overwrite element name definition -- + setattr(self._element_cls, self._prop_name, property_) + + def _add_inserter(self): + """Add an ``_insert_x()`` method to the element class for this child element.""" + + def _insert_child(obj: BaseOxmlElement, child: BaseOxmlElement): + obj.insert_element_before(child, *self._successors) + return child + + _insert_child.__doc__ = ( + "Return the passed ``<%s>`` element after inserting it as a chil" + "d in the correct sequence." % self._nsptagname + ) + self._add_to_class(self._insert_method_name, _insert_child) + + def _add_list_getter(self): + """Add a read-only ``{prop_name}_lst`` property to the element class to retrieve + a list of child elements matching this type.""" + prop_name = "%s_lst" % self._prop_name + property_ = property(self._list_getter, None, None) + setattr(self._element_cls, prop_name, property_) + + @lazyproperty + def _add_method_name(self): + return "_add_%s" % self._prop_name + + def _add_public_adder(self): + """Add a public ``add_x()`` method to the parent element class.""" + + def add_child(obj: BaseOxmlElement): + private_add_method = getattr(obj, self._add_method_name) + child = private_add_method() + return child + + add_child.__doc__ = ( + "Add a new ``<%s>`` child element unconditionally, inserted in t" + "he correct sequence." % self._nsptagname + ) + self._add_to_class(self._public_add_method_name, add_child) + + def _add_to_class(self, name: str, method: Callable[..., Any]): + """Add `method` to the target class as `name`, unless `name` is already defined + on the class.""" + if hasattr(self._element_cls, name): + return + setattr(self._element_cls, name, method) + + @property + def _creator(self) -> Callable[[BaseOxmlElement], BaseOxmlElement]: + """Callable that creates an empty element of the right type, with no attrs.""" + from docx.oxml.parser import OxmlElement + + def new_child_element(obj: BaseOxmlElement): + return OxmlElement(self._nsptagname) + + return new_child_element + + @property + def _getter(self): + """Return a function object suitable for the "get" side of the property + descriptor. + + This default getter returns the child element with matching tag name or |None| + if not present. + """ + + def get_child_element(obj: BaseOxmlElement): + return obj.find(qn(self._nsptagname)) + + get_child_element.__doc__ = ( + "``<%s>`` child element or |None| if not present." % self._nsptagname + ) + return get_child_element + + @lazyproperty + def _insert_method_name(self): + return "_insert_%s" % self._prop_name + + @property + def _list_getter(self): + """Return a function object suitable for the "get" side of a list property + descriptor.""" + + def get_child_element_list(obj: BaseOxmlElement): + return obj.findall(qn(self._nsptagname)) + + get_child_element_list.__doc__ = ( + "A list containing each of the ``<%s>`` child elements, in the o" + "rder they appear." % self._nsptagname + ) + return get_child_element_list + + @lazyproperty + def _public_add_method_name(self): + """add_childElement() is public API for a repeating element, allowing new + elements to be added to the sequence. + + May be overridden to provide a friendlier API to clients having domain + appropriate parameter names for required attributes. + """ + return "add_%s" % self._prop_name + + @lazyproperty + def _remove_method_name(self): + return "_remove_%s" % self._prop_name + + @lazyproperty + def _new_method_name(self): + return "_new_%s" % self._prop_name + + +class Choice(_BaseChildElement): + """Defines a child element belonging to a group, only one of which may appear as a + child.""" + + @property + def nsptagname(self): + return self._nsptagname + + def populate_class_members( # pyright: ignore[reportIncompatibleMethodOverride] + self, + element_cls: MetaOxmlElement, + group_prop_name: str, + successors: Tuple[str, ...], + ) -> None: + """Add the appropriate methods to `element_cls`.""" + self._element_cls = element_cls + self._group_prop_name = group_prop_name + self._successors = successors + + self._add_getter() + self._add_creator() + self._add_inserter() + self._add_adder() + self._add_get_or_change_to_method() + + def _add_get_or_change_to_method(self): + """Add a ``get_or_change_to_x()`` method to the element class for this child + element.""" + + def get_or_change_to_child(obj: BaseOxmlElement): + child = getattr(obj, self._prop_name) + if child is not None: + return child + remove_group_method = getattr(obj, self._remove_group_method_name) + remove_group_method() + add_method = getattr(obj, self._add_method_name) + child = add_method() + return child + + get_or_change_to_child.__doc__ = ( + "Return the ``<%s>`` child, replacing any other group element if" " found." + ) % self._nsptagname + self._add_to_class(self._get_or_change_to_method_name, get_or_change_to_child) + + @property + def _prop_name(self): + """Property name computed from tag name, e.g. a:schemeClr -> schemeClr.""" + start = self._nsptagname.index(":") + 1 if ":" in self._nsptagname else 0 + return self._nsptagname[start:] + + @lazyproperty + def _get_or_change_to_method_name(self): + return "get_or_change_to_%s" % self._prop_name + + @lazyproperty + def _remove_group_method_name(self): + return "_remove_%s" % self._group_prop_name + + +class OneAndOnlyOne(_BaseChildElement): + """Defines a required child element for MetaOxmlElement.""" + + def __init__(self, nsptagname: str): + super(OneAndOnlyOne, self).__init__(nsptagname, ()) + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + super(OneAndOnlyOne, self).populate_class_members(element_cls, prop_name) + self._add_getter() + + @property + def _getter(self): + """Return a function object suitable for the "get" side of the property + descriptor.""" + + def get_child_element(obj: BaseOxmlElement): + child = obj.find(qn(self._nsptagname)) + if child is None: + raise InvalidXmlError( + "required ``<%s>`` child element not present" % self._nsptagname + ) + return child + + get_child_element.__doc__ = "Required ``<%s>`` child element." % self._nsptagname + return get_child_element + + +class OneOrMore(_BaseChildElement): + """Defines a repeating child element for MetaOxmlElement that must appear at least + once.""" + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + super(OneOrMore, self).populate_class_members(element_cls, prop_name) + self._add_list_getter() + self._add_creator() + self._add_inserter() + self._add_adder() + self._add_public_adder() + delattr(element_cls, prop_name) + + +class ZeroOrMore(_BaseChildElement): + """Defines an optional repeating child element for MetaOxmlElement.""" + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + super(ZeroOrMore, self).populate_class_members(element_cls, prop_name) + self._add_list_getter() + self._add_creator() + self._add_inserter() + self._add_adder() + self._add_public_adder() + delattr(element_cls, prop_name) + + +class ZeroOrOne(_BaseChildElement): + """Defines an optional child element for MetaOxmlElement.""" + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + super(ZeroOrOne, self).populate_class_members(element_cls, prop_name) + self._add_getter() + self._add_creator() + self._add_inserter() + self._add_adder() + self._add_get_or_adder() + self._add_remover() + + def _add_get_or_adder(self): + """Add a ``get_or_add_x()`` method to the element class for this child + element.""" + + def get_or_add_child(obj: BaseOxmlElement): + child = getattr(obj, self._prop_name) + if child is None: + add_method = getattr(obj, self._add_method_name) + child = add_method() + return child + + get_or_add_child.__doc__ = ( + "Return the ``<%s>`` child element, newly added if not present." + ) % self._nsptagname + self._add_to_class(self._get_or_add_method_name, get_or_add_child) + + def _add_remover(self): + """Add a ``_remove_x()`` method to the element class for this child element.""" + + def _remove_child(obj: BaseOxmlElement): + obj.remove_all(self._nsptagname) + + _remove_child.__doc__ = ("Remove all ``<%s>`` child elements.") % self._nsptagname + self._add_to_class(self._remove_method_name, _remove_child) + + @lazyproperty + def _get_or_add_method_name(self): + return "get_or_add_%s" % self._prop_name + + +class ZeroOrOneChoice(_BaseChildElement): + """Correspondes to an ``EG_*`` element group where at most one of its members may + appear as a child.""" + + def __init__(self, choices: Sequence[Choice], successors: Tuple[str, ...] = ()): + self._choices = choices + self._successors = successors + + def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None: + """Add the appropriate methods to `element_cls`.""" + super(ZeroOrOneChoice, self).populate_class_members(element_cls, prop_name) + self._add_choice_getter() + for choice in self._choices: + choice.populate_class_members(element_cls, self._prop_name, self._successors) + self._add_group_remover() + + def _add_choice_getter(self): + """Add a read-only ``{prop_name}`` property to the element class that returns + the present member of this group, or |None| if none are present.""" + property_ = property(self._choice_getter, None, None) + # assign unconditionally to overwrite element name definition + setattr(self._element_cls, self._prop_name, property_) + + def _add_group_remover(self): + """Add a ``_remove_eg_x()`` method to the element class for this choice + group.""" + + def _remove_choice_group(obj: BaseOxmlElement): + for tagname in self._member_nsptagnames: + obj.remove_all(tagname) + + _remove_choice_group.__doc__ = "Remove the current choice group child element if present." + self._add_to_class(self._remove_choice_group_method_name, _remove_choice_group) + + @property + def _choice_getter(self): + """Return a function object suitable for the "get" side of the property + descriptor.""" + + def get_group_member_element(obj: BaseOxmlElement): + return obj.first_child_found_in(*self._member_nsptagnames) + + get_group_member_element.__doc__ = ( + "Return the child element belonging to this element group, or " + "|None| if no member child is present." + ) + return get_group_member_element + + @lazyproperty + def _member_nsptagnames(self): + """Sequence of namespace-prefixed tagnames, one for each of the member elements + of this choice group.""" + return [choice.nsptagname for choice in self._choices] + + @lazyproperty + def _remove_choice_group_method_name(self): + return "_remove_%s" % self._prop_name + + +# -- lxml typing isn't quite right here, just ignore this error on _Element -- +class BaseOxmlElement(etree.ElementBase, metaclass=MetaOxmlElement): + """Effective base class for all custom element classes. + + Adds standardized behavior to all classes in one place. + """ + + def __repr__(self): + return "<%s '<%s>' at 0x%0x>" % ( + self.__class__.__name__, + self._nsptag, + id(self), + ) + + def first_child_found_in(self, *tagnames: str) -> _Element | None: + """First child with tag in `tagnames`, or None if not found.""" + for tagname in tagnames: + child = self.find(qn(tagname)) + if child is not None: + return child + return None + + def insert_element_before(self, elm: ElementBase, *tagnames: str): + successor = self.first_child_found_in(*tagnames) + if successor is not None: + successor.addprevious(elm) + else: + self.append(elm) + return elm + + def remove_all(self, *tagnames: str) -> None: + """Remove child elements with tagname (e.g. "a:p") in `tagnames`.""" + for tagname in tagnames: + matching = self.findall(qn(tagname)) + for child in matching: + self.remove(child) + + @property + def xml(self) -> str: + """XML string for this element, suitable for testing purposes. + + Pretty printed for readability and without an XML declaration at the top. + """ + return serialize_for_reading(self) + + def xpath(self, xpath_str: str) -> Any: # pyright: ignore[reportIncompatibleMethodOverride] + """Override of `lxml` _Element.xpath() method. + + Provides standard Open XML namespace mapping (`nsmap`) in centralized location. + """ + return super().xpath(xpath_str, namespaces=nsmap) + + @property + def _nsptag(self) -> str: + return NamespacePrefixedTag.from_clark_name(self.tag) |