diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/text')
8 files changed, 1486 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/text/__init__.py b/.venv/lib/python3.12/site-packages/docx/text/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/__init__.py diff --git a/.venv/lib/python3.12/site-packages/docx/text/font.py b/.venv/lib/python3.12/site-packages/docx/text/font.py new file mode 100644 index 00000000..acd60795 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/font.py @@ -0,0 +1,432 @@ +"""Font-related proxy objects.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from docx.dml.color import ColorFormat +from docx.enum.text import WD_UNDERLINE +from docx.shared import ElementProxy, Emu + +if TYPE_CHECKING: + from docx.enum.text import WD_COLOR_INDEX + from docx.oxml.text.run import CT_R + from docx.shared import Length + + +class Font(ElementProxy): + """Proxy object for parent of a `<w:rPr>` element and providing access to + character properties such as font name, font size, bold, and subscript.""" + + def __init__(self, r: CT_R, parent: Any | None = None): + super().__init__(r, parent) + self._element = r + self._r = r + + @property + def all_caps(self) -> bool | None: + """Read/write. + + Causes text in this font to appear in capital letters. + """ + return self._get_bool_prop("caps") + + @all_caps.setter + def all_caps(self, value: bool | None) -> None: + self._set_bool_prop("caps", value) + + @property + def bold(self) -> bool | None: + """Read/write. + + Causes text in this font to appear in bold. + """ + return self._get_bool_prop("b") + + @bold.setter + def bold(self, value: bool | None) -> None: + self._set_bool_prop("b", value) + + @property + def color(self): + """A |ColorFormat| object providing a way to get and set the text color for this + font.""" + return ColorFormat(self._element) + + @property + def complex_script(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the characters in the run to be treated as complex script + regardless of their Unicode values. + """ + return self._get_bool_prop("cs") + + @complex_script.setter + def complex_script(self, value: bool | None) -> None: + self._set_bool_prop("cs", value) + + @property + def cs_bold(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the complex script characters in the run to be displayed in + bold typeface. + """ + return self._get_bool_prop("bCs") + + @cs_bold.setter + def cs_bold(self, value: bool | None) -> None: + self._set_bool_prop("bCs", value) + + @property + def cs_italic(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the complex script characters in the run to be displayed in + italic typeface. + """ + return self._get_bool_prop("iCs") + + @cs_italic.setter + def cs_italic(self, value: bool | None) -> None: + self._set_bool_prop("iCs", value) + + @property + def double_strike(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text in the run to appear with double strikethrough. + """ + return self._get_bool_prop("dstrike") + + @double_strike.setter + def double_strike(self, value: bool | None) -> None: + self._set_bool_prop("dstrike", value) + + @property + def emboss(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text in the run to appear as if raised off the page in + relief. + """ + return self._get_bool_prop("emboss") + + @emboss.setter + def emboss(self, value: bool | None) -> None: + self._set_bool_prop("emboss", value) + + @property + def hidden(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text in the run to be hidden from display, unless + applications settings force hidden text to be shown. + """ + return self._get_bool_prop("vanish") + + @hidden.setter + def hidden(self, value: bool | None) -> None: + self._set_bool_prop("vanish", value) + + @property + def highlight_color(self) -> WD_COLOR_INDEX | None: + """Color of highlighing applied or |None| if not highlighted.""" + rPr = self._element.rPr + if rPr is None: + return None + return rPr.highlight_val + + @highlight_color.setter + def highlight_color(self, value: WD_COLOR_INDEX | None): + rPr = self._element.get_or_add_rPr() + rPr.highlight_val = value + + @property + def italic(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text of the run to appear in italics. |None| indicates + the effective value is inherited from the style hierarchy. + """ + return self._get_bool_prop("i") + + @italic.setter + def italic(self, value: bool | None) -> None: + self._set_bool_prop("i", value) + + @property + def imprint(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text in the run to appear as if pressed into the page. + """ + return self._get_bool_prop("imprint") + + @imprint.setter + def imprint(self, value: bool | None) -> None: + self._set_bool_prop("imprint", value) + + @property + def math(self) -> bool | None: + """Read/write tri-state value. + + When |True|, specifies this run contains WML that should be handled as though it + was Office Open XML Math. + """ + return self._get_bool_prop("oMath") + + @math.setter + def math(self, value: bool | None) -> None: + self._set_bool_prop("oMath", value) + + @property + def name(self) -> str | None: + """The typeface name for this |Font|. + + Causes the text it controls to appear in the named font, if a matching font is + found. |None| indicates the typeface is inherited from the style hierarchy. + """ + rPr = self._element.rPr + if rPr is None: + return None + return rPr.rFonts_ascii + + @name.setter + def name(self, value: str | None) -> None: + rPr = self._element.get_or_add_rPr() + rPr.rFonts_ascii = value + rPr.rFonts_hAnsi = value + + @property + def no_proof(self) -> bool | None: + """Read/write tri-state value. + + When |True|, specifies that the contents of this run should not report any + errors when the document is scanned for spelling and grammar. + """ + return self._get_bool_prop("noProof") + + @no_proof.setter + def no_proof(self, value: bool | None) -> None: + self._set_bool_prop("noProof", value) + + @property + def outline(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the characters in the run to appear as if they have an + outline, by drawing a one pixel wide border around the inside and outside + borders of each character glyph. + """ + return self._get_bool_prop("outline") + + @outline.setter + def outline(self, value: bool | None) -> None: + self._set_bool_prop("outline", value) + + @property + def rtl(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the text in the run to have right-to-left characteristics. + """ + return self._get_bool_prop("rtl") + + @rtl.setter + def rtl(self, value: bool | None) -> None: + self._set_bool_prop("rtl", value) + + @property + def shadow(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the text in the run to appear as if each character has a + shadow. + """ + return self._get_bool_prop("shadow") + + @shadow.setter + def shadow(self, value: bool | None) -> None: + self._set_bool_prop("shadow", value) + + @property + def size(self) -> Length | None: + """Font height in English Metric Units (EMU). + + |None| indicates the font size should be inherited from the style hierarchy. + |Length| is a subclass of |int| having properties for convenient conversion into + points or other length units. The :class:`docx.shared.Pt` class allows + convenient specification of point values:: + + >>> font.size = Pt(24) + >>> font.size + 304800 + >>> font.size.pt + 24.0 + + """ + rPr = self._element.rPr + if rPr is None: + return None + return rPr.sz_val + + @size.setter + def size(self, emu: int | Length | None) -> None: + rPr = self._element.get_or_add_rPr() + rPr.sz_val = None if emu is None else Emu(emu) + + @property + def small_caps(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the lowercase characters in the run to appear as capital + letters two points smaller than the font size specified for the run. + """ + return self._get_bool_prop("smallCaps") + + @small_caps.setter + def small_caps(self, value: bool | None) -> None: + self._set_bool_prop("smallCaps", value) + + @property + def snap_to_grid(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the run to use the document grid characters per line settings + defined in the docGrid element when laying out the characters in this run. + """ + return self._get_bool_prop("snapToGrid") + + @snap_to_grid.setter + def snap_to_grid(self, value: bool | None) -> None: + self._set_bool_prop("snapToGrid", value) + + @property + def spec_vanish(self) -> bool | None: + """Read/write tri-state value. + + When |True|, specifies that the given run shall always behave as if it is + hidden, even when hidden text is being displayed in the current document. The + property has a very narrow, specialized use related to the table of contents. + Consult the spec (ยง17.3.2.36) for more details. + """ + return self._get_bool_prop("specVanish") + + @spec_vanish.setter + def spec_vanish(self, value: bool | None) -> None: + self._set_bool_prop("specVanish", value) + + @property + def strike(self) -> bool | None: + """Read/write tri-state value. + + When |True| causes the text in the run to appear with a single horizontal line + through the center of the line. + """ + return self._get_bool_prop("strike") + + @strike.setter + def strike(self, value: bool | None) -> None: + self._set_bool_prop("strike", value) + + @property + def subscript(self) -> bool | None: + """Boolean indicating whether the characters in this |Font| appear as subscript. + + |None| indicates the subscript/subscript value is inherited from the style + hierarchy. + """ + rPr = self._element.rPr + if rPr is None: + return None + return rPr.subscript + + @subscript.setter + def subscript(self, value: bool | None) -> None: + rPr = self._element.get_or_add_rPr() + rPr.subscript = value + + @property + def superscript(self) -> bool | None: + """Boolean indicating whether the characters in this |Font| appear as + superscript. + + |None| indicates the subscript/superscript value is inherited from the style + hierarchy. + """ + rPr = self._element.rPr + if rPr is None: + return None + return rPr.superscript + + @superscript.setter + def superscript(self, value: bool | None) -> None: + rPr = self._element.get_or_add_rPr() + rPr.superscript = value + + @property + def underline(self) -> bool | WD_UNDERLINE | None: + """The underline style for this |Font|. + + The value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`. + + |None| indicates the font inherits its underline value from the style hierarchy. + |False| indicates no underline. |True| indicates single underline. The values + from :ref:`WdUnderline` are used to specify other outline styles such as double, + wavy, and dotted. + """ + rPr = self._element.rPr + if rPr is None: + return None + val = rPr.u_val + return ( + None + if val == WD_UNDERLINE.INHERITED + else True + if val == WD_UNDERLINE.SINGLE + else False + if val == WD_UNDERLINE.NONE + else val + ) + + @underline.setter + def underline(self, value: bool | WD_UNDERLINE | None) -> None: + rPr = self._element.get_or_add_rPr() + # -- works fine without these two mappings, but only because True == 1 and + # -- False == 0, which happen to match the mapping for WD_UNDERLINE.SINGLE + # -- and .NONE respectively. + val = ( + WD_UNDERLINE.SINGLE + if value is True + else WD_UNDERLINE.NONE + if value is False + else value + ) + rPr.u_val = val + + @property + def web_hidden(self) -> bool | None: + """Read/write tri-state value. + + When |True|, specifies that the contents of this run shall be hidden when the + document is displayed in web page view. + """ + return self._get_bool_prop("webHidden") + + @web_hidden.setter + def web_hidden(self, value: bool | None) -> None: + self._set_bool_prop("webHidden", value) + + def _get_bool_prop(self, name: str) -> bool | None: + """Return the value of boolean child of `w:rPr` having `name`.""" + rPr = self._element.rPr + if rPr is None: + return None + return rPr._get_bool_val(name) # pyright: ignore[reportPrivateUsage] + + def _set_bool_prop(self, name: str, value: bool | None): + """Assign `value` to the boolean child `name` of `w:rPr`.""" + rPr = self._element.get_or_add_rPr() + rPr._set_bool_val(name, value) # pyright: ignore[reportPrivateUsage] diff --git a/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py new file mode 100644 index 00000000..a23df1c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py @@ -0,0 +1,121 @@ +"""Hyperlink-related proxy objects for python-docx, Hyperlink in particular. + +A hyperlink occurs in a paragraph, at the same level as a Run, and a hyperlink itself +contains runs, which is where the visible text of the hyperlink is stored. So it's kind +of in-between, less than a paragraph and more than a run. So it gets its own module. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from docx.shared import Parented +from docx.text.run import Run + +if TYPE_CHECKING: + import docx.types as t + from docx.oxml.text.hyperlink import CT_Hyperlink + + +class Hyperlink(Parented): + """Proxy object wrapping a `<w:hyperlink>` element. + + A hyperlink occurs as a child of a paragraph, at the same level as a Run. A + hyperlink itself contains runs, which is where the visible text of the hyperlink is + stored. + """ + + def __init__(self, hyperlink: CT_Hyperlink, parent: t.ProvidesStoryPart): + super().__init__(parent) + self._parent = parent + self._hyperlink = self._element = hyperlink + + @property + def address(self) -> str: + """The "URL" of the hyperlink (but not necessarily a web link). + + While commonly a web link like "https://google.com" the hyperlink address can + take a variety of forms including "internal links" to bookmarked locations + within the document. When this hyperlink is an internal "jump" to for example a + heading from the table-of-contents (TOC), the address is blank. The bookmark + reference (like "_Toc147925734") is stored in the `.fragment` property. + """ + rId = self._hyperlink.rId + return self._parent.part.rels[rId].target_ref if rId else "" + + @property + def contains_page_break(self) -> bool: + """True when the text of this hyperlink is broken across page boundaries. + + This is not uncommon and can happen for example when the hyperlink text is + multiple words and occurs in the last line of a page. Theoretically, a hyperlink + can contain more than one page break but that would be extremely uncommon in + practice. Still, this value should be understood to mean that "one-or-more" + rendered page breaks are present. + """ + return bool(self._hyperlink.lastRenderedPageBreaks) + + @property + def fragment(self) -> str: + """Reference like `#glossary` at end of URL that refers to a sub-resource. + + Note that this value does not include the fragment-separator character ("#"). + + This value is known as a "named anchor" in an HTML context and "anchor" in the + MS API, but an "anchor" element (`<a>`) represents a full hyperlink in HTML so + we avoid confusion by using the more precise RFC 3986 naming "URI fragment". + + These are also used to refer to bookmarks within the same document, in which + case the `.address` value with be blank ("") and this property will hold a + value like "_Toc147925734". + + To reliably get an entire web URL you will need to concatenate this with the + `.address` value, separated by "#" when both are present. Consider using the + `.url` property for that purpose. + + Word sometimes stores a fragment in this property (an XML attribute) and + sometimes with the address, depending on how the URL is inserted, so don't + depend on this field being empty to indicate no fragment is present. + """ + return self._hyperlink.anchor or "" + + @property + def runs(self) -> list[Run]: + """List of |Run| instances in this hyperlink. + + Together these define the visible text of the hyperlink. The text of a hyperlink + is typically contained in a single run will be broken into multiple runs if for + example part of the hyperlink is bold or the text was changed after the document + was saved. + """ + return [Run(r, self._parent) for r in self._hyperlink.r_lst] + + @property + def text(self) -> str: + """String formed by concatenating the text of each run in the hyperlink. + + Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters + respectively. Note that rendered page-breaks can occur within a hyperlink but + they are not reflected in this text. + """ + return self._hyperlink.text + + @property + def url(self) -> str: + """Convenience property to get web URLs from hyperlinks that contain them. + + This value is the empty string ("") when there is no address portion, so its + boolean value can also be used to distinguish external URIs from internal "jump" + hyperlinks like those found in a table-of-contents. + + Note that this value may also be a link to a file, so if you only want web-urls + you'll need to check for a protocol prefix like `https://`. + + When both an address and fragment are present, the return value joins the two + separated by the fragment-separator hash ("#"). Otherwise this value is the same + as that of the `.address` property. + """ + address, fragment = self.address, self.fragment + if not address: + return "" + return f"{address}#{fragment}" if fragment else address diff --git a/.venv/lib/python3.12/site-packages/docx/text/pagebreak.py b/.venv/lib/python3.12/site-packages/docx/text/pagebreak.py new file mode 100644 index 00000000..0977ccea --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/pagebreak.py @@ -0,0 +1,104 @@ +"""Proxy objects related to rendered page-breaks.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak +from docx.shared import Parented + +if TYPE_CHECKING: + import docx.types as t + from docx.text.paragraph import Paragraph + + +class RenderedPageBreak(Parented): + """A page-break inserted by Word during page-layout for print or display purposes. + + This usually does not correspond to a "hard" page-break inserted by the document + author, rather just that Word ran out of room on one page and needed to start + another. The position of these can change depending on the printer and page-size, as + well as margins, etc. They also will change in response to edits, but not until Word + loads and saves the document. + + Note these are never inserted by `python-docx` because it has no rendering function. + These are generally only useful for text-extraction of existing documents when + `python-docx` is being used solely as a document "reader". + + NOTE: a rendered page-break can occur within a hyperlink; consider a multi-word + hyperlink like "excellent Wikipedia article on LLMs" that happens to fall close to + the end of the last line on a page such that the page breaks between "Wikipedia" and + "article". In such a "page-breaks-in-hyperlink" case, THESE METHODS WILL "MOVE" THE + PAGE-BREAK to occur after the hyperlink, such that the entire hyperlink appears in + the paragraph returned by `.preceding_paragraph_fragment`. While this places the + "tail" text of the hyperlink on the "wrong" page, it avoids having two hyperlinks + each with a fragment of the actual text and pointing to the same address. + """ + + def __init__( + self, + lastRenderedPageBreak: CT_LastRenderedPageBreak, + parent: t.ProvidesStoryPart, + ): + super().__init__(parent) + self._element = lastRenderedPageBreak + self._lastRenderedPageBreak = lastRenderedPageBreak + + @property + def preceding_paragraph_fragment(self) -> Paragraph | None: + """A "loose" paragraph containing the content preceding this page-break. + + Compare `.following_paragraph_fragment` as these two are intended to be used + together. + + This value is `None` when no content precedes this page-break. This case is + common and occurs whenever a page breaks on an even paragraph boundary. + Returning `None` for this case avoids "inserting" a non-existent paragraph into + the content stream. Note that content can include DrawingML items like images or + charts. + + Note the returned paragraph *is divorced from the document body*. Any changes + made to it will not be reflected in the document. It is intended to provide a + familiar container (`Paragraph`) to interrogate for the content preceding this + page-break in the paragraph in which it occured. + + Contains the entire hyperlink when this break occurs within a hyperlink. + """ + if self._lastRenderedPageBreak.precedes_all_content: + return None + + from docx.text.paragraph import Paragraph + + return Paragraph(self._lastRenderedPageBreak.preceding_fragment_p, self._parent) + + @property + def following_paragraph_fragment(self) -> Paragraph | None: + """A "loose" paragraph containing the content following this page-break. + + HAS POTENTIALLY SURPRISING BEHAVIORS so read carefully to be sure this is what + you want. This is primarily targeted toward text-extraction use-cases for which + precisely associating text with the page it occurs on is important. + + Compare `.preceding_paragraph_fragment` as these two are intended to be used + together. + + This value is `None` when no content follows this page-break. This case is + unlikely to occur in practice because Word places even-paragraph-boundary + page-breaks on the paragraph *following* the page-break. Still, it is possible + and must be checked for. Returning `None` for this case avoids "inserting" an + extra, non-existent paragraph into the content stream. Note that content can + include DrawingML items like images or charts, not just text. + + The returned paragraph *is divorced from the document body*. Any changes made to + it will not be reflected in the document. It is intended to provide a container + (`Paragraph`) with familiar properties and methods that can be used to + characterize the paragraph content following a mid-paragraph page-break. + + Contains no portion of the hyperlink when this break occurs within a hyperlink. + """ + if self._lastRenderedPageBreak.follows_all_content: + return None + + from docx.text.paragraph import Paragraph + + return Paragraph(self._lastRenderedPageBreak.following_fragment_p, self._parent) diff --git a/.venv/lib/python3.12/site-packages/docx/text/paragraph.py b/.venv/lib/python3.12/site-packages/docx/text/paragraph.py new file mode 100644 index 00000000..234ea66c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/paragraph.py @@ -0,0 +1,173 @@ +"""Paragraph-related proxy types.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Iterator, List, cast + +from docx.enum.style import WD_STYLE_TYPE +from docx.oxml.text.run import CT_R +from docx.shared import StoryChild +from docx.styles.style import ParagraphStyle +from docx.text.hyperlink import Hyperlink +from docx.text.pagebreak import RenderedPageBreak +from docx.text.parfmt import ParagraphFormat +from docx.text.run import Run + +if TYPE_CHECKING: + import docx.types as t + from docx.enum.text import WD_PARAGRAPH_ALIGNMENT + from docx.oxml.text.paragraph import CT_P + from docx.styles.style import CharacterStyle + + +class Paragraph(StoryChild): + """Proxy object wrapping a `<w:p>` element.""" + + def __init__(self, p: CT_P, parent: t.ProvidesStoryPart): + super(Paragraph, self).__init__(parent) + self._p = self._element = p + + def add_run(self, text: str | None = None, style: str | CharacterStyle | None = None) -> Run: + """Append run containing `text` and having character-style `style`. + + `text` can contain tab (``\\t``) characters, which are converted to the + appropriate XML form for a tab. `text` can also include newline (``\\n``) or + carriage return (``\\r``) characters, each of which is converted to a line + break. When `text` is `None`, the new run is empty. + """ + r = self._p.add_r() + run = Run(r, self) + if text: + run.text = text + if style: + run.style = style + return run + + @property + def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None: + """A member of the :ref:`WdParagraphAlignment` enumeration specifying the + justification setting for this paragraph. + + A value of |None| indicates the paragraph has no directly-applied alignment + value and will inherit its alignment value from its style hierarchy. Assigning + |None| to this property removes any directly-applied alignment value. + """ + return self._p.alignment + + @alignment.setter + def alignment(self, value: WD_PARAGRAPH_ALIGNMENT): + self._p.alignment = value + + def clear(self): + """Return this same paragraph after removing all its content. + + Paragraph-level formatting, such as style, is preserved. + """ + self._p.clear_content() + return self + + @property + def contains_page_break(self) -> bool: + """`True` when one or more rendered page-breaks occur in this paragraph.""" + return bool(self._p.lastRenderedPageBreaks) + + @property + def hyperlinks(self) -> List[Hyperlink]: + """A |Hyperlink| instance for each hyperlink in this paragraph.""" + return [Hyperlink(hyperlink, self) for hyperlink in self._p.hyperlink_lst] + + def insert_paragraph_before( + self, text: str | None = None, style: str | ParagraphStyle | None = None + ) -> Paragraph: + """Return a newly created paragraph, inserted directly before this paragraph. + + If `text` is supplied, the new paragraph contains that text in a single run. If + `style` is provided, that style is assigned to the new paragraph. + """ + paragraph = self._insert_paragraph_before() + if text: + paragraph.add_run(text) + if style is not None: + paragraph.style = style + return paragraph + + def iter_inner_content(self) -> Iterator[Run | Hyperlink]: + """Generate the runs and hyperlinks in this paragraph, in the order they appear. + + The content in a paragraph consists of both runs and hyperlinks. This method + allows accessing each of those separately, in document order, for when the + precise position of the hyperlink within the paragraph text is important. Note + that a hyperlink itself contains runs. + """ + for r_or_hlink in self._p.inner_content_elements: + yield ( + Run(r_or_hlink, self) + if isinstance(r_or_hlink, CT_R) + else Hyperlink(r_or_hlink, self) + ) + + @property + def paragraph_format(self): + """The |ParagraphFormat| object providing access to the formatting properties + for this paragraph, such as line spacing and indentation.""" + return ParagraphFormat(self._element) + + @property + def rendered_page_breaks(self) -> List[RenderedPageBreak]: + """All rendered page-breaks in this paragraph. + + Most often an empty list, sometimes contains one page-break, but can contain + more than one is rare or contrived cases. + """ + return [RenderedPageBreak(lrpb, self) for lrpb in self._p.lastRenderedPageBreaks] + + @property + def runs(self) -> List[Run]: + """Sequence of |Run| instances corresponding to the <w:r> elements in this + paragraph.""" + return [Run(r, self) for r in self._p.r_lst] + + @property + def style(self) -> ParagraphStyle | None: + """Read/Write. + + |_ParagraphStyle| object representing the style assigned to this paragraph. If + no explicit style is assigned to this paragraph, its value is the default + paragraph style for the document. A paragraph style name can be assigned in lieu + of a paragraph style object. Assigning |None| removes any applied style, making + its effective value the default paragraph style for the document. + """ + style_id = self._p.style + style = self.part.get_style(style_id, WD_STYLE_TYPE.PARAGRAPH) + return cast(ParagraphStyle, style) + + @style.setter + def style(self, style_or_name: str | ParagraphStyle | None): + style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.PARAGRAPH) + self._p.style = style_id + + @property + def text(self) -> str: + """The textual content of this paragraph. + + The text includes the visible-text portion of any hyperlinks in the paragraph. + Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters + respectively. + + Assigning text to this property causes all existing paragraph content to be + replaced with a single run containing the assigned text. A ``\\t`` character in + the text is mapped to a ``<w:tab/>`` element and each ``\\n`` or ``\\r`` + character is mapped to a line break. Paragraph-level formatting, such as style, + is preserved. All run-level formatting, such as bold or italic, is removed. + """ + return self._p.text + + @text.setter + def text(self, text: str | None): + self.clear() + self.add_run(text) + + def _insert_paragraph_before(self): + """Return a newly created paragraph, inserted directly before this paragraph.""" + p = self._p.add_p_before() + return Paragraph(p, self._parent) diff --git a/.venv/lib/python3.12/site-packages/docx/text/parfmt.py b/.venv/lib/python3.12/site-packages/docx/text/parfmt.py new file mode 100644 index 00000000..ea374373 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/parfmt.py @@ -0,0 +1,286 @@ +"""Paragraph-related proxy types.""" + +from docx.enum.text import WD_LINE_SPACING +from docx.shared import ElementProxy, Emu, Length, Pt, Twips, lazyproperty +from docx.text.tabstops import TabStops + + +class ParagraphFormat(ElementProxy): + """Provides access to paragraph formatting such as justification, indentation, line + spacing, space before and after, and widow/orphan control.""" + + @property + def alignment(self): + """A member of the :ref:`WdParagraphAlignment` enumeration specifying the + justification setting for this paragraph. + + A value of |None| indicates paragraph alignment is inherited from the style + hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.jc_val + + @alignment.setter + def alignment(self, value): + pPr = self._element.get_or_add_pPr() + pPr.jc_val = value + + @property + def first_line_indent(self): + """|Length| value specifying the relative difference in indentation for the + first line of the paragraph. + + A positive value causes the first line to be indented. A negative value produces + a hanging indent. |None| indicates first line indentation is inherited from the + style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.first_line_indent + + @first_line_indent.setter + def first_line_indent(self, value): + pPr = self._element.get_or_add_pPr() + pPr.first_line_indent = value + + @property + def keep_together(self): + """|True| if the paragraph should be kept "in one piece" and not broken across a + page boundary when the document is rendered. + + |None| indicates its effective value is inherited from the style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.keepLines_val + + @keep_together.setter + def keep_together(self, value): + self._element.get_or_add_pPr().keepLines_val = value + + @property + def keep_with_next(self): + """|True| if the paragraph should be kept on the same page as the subsequent + paragraph when the document is rendered. + + For example, this property could be used to keep a section heading on the same + page as its first paragraph. |None| indicates its effective value is inherited + from the style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.keepNext_val + + @keep_with_next.setter + def keep_with_next(self, value): + self._element.get_or_add_pPr().keepNext_val = value + + @property + def left_indent(self): + """|Length| value specifying the space between the left margin and the left side + of the paragraph. + + |None| indicates the left indent value is inherited from the style hierarchy. + Use an |Inches| value object as a convenient way to apply indentation in units + of inches. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.ind_left + + @left_indent.setter + def left_indent(self, value): + pPr = self._element.get_or_add_pPr() + pPr.ind_left = value + + @property + def line_spacing(self): + """|float| or |Length| value specifying the space between baselines in + successive lines of the paragraph. + + A value of |None| indicates line spacing is inherited from the style hierarchy. + A float value, e.g. ``2.0`` or ``1.75``, indicates spacing is applied in + multiples of line heights. A |Length| value such as ``Pt(12)`` indicates spacing + is a fixed height. The |Pt| value class is a convenient way to apply line + spacing in units of points. Assigning |None| resets line spacing to inherit from + the style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return self._line_spacing(pPr.spacing_line, pPr.spacing_lineRule) + + @line_spacing.setter + def line_spacing(self, value): + pPr = self._element.get_or_add_pPr() + if value is None: + pPr.spacing_line = None + pPr.spacing_lineRule = None + elif isinstance(value, Length): + pPr.spacing_line = value + if pPr.spacing_lineRule != WD_LINE_SPACING.AT_LEAST: + pPr.spacing_lineRule = WD_LINE_SPACING.EXACTLY + else: + pPr.spacing_line = Emu(value * Twips(240)) + pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE + + @property + def line_spacing_rule(self): + """A member of the :ref:`WdLineSpacing` enumeration indicating how the value of + :attr:`line_spacing` should be interpreted. + + Assigning any of the :ref:`WdLineSpacing` members :attr:`SINGLE`, + :attr:`DOUBLE`, or :attr:`ONE_POINT_FIVE` will cause the value of + :attr:`line_spacing` to be updated to produce the corresponding line spacing. + """ + pPr = self._element.pPr + if pPr is None: + return None + return self._line_spacing_rule(pPr.spacing_line, pPr.spacing_lineRule) + + @line_spacing_rule.setter + def line_spacing_rule(self, value): + pPr = self._element.get_or_add_pPr() + if value == WD_LINE_SPACING.SINGLE: + pPr.spacing_line = Twips(240) + pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE + elif value == WD_LINE_SPACING.ONE_POINT_FIVE: + pPr.spacing_line = Twips(360) + pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE + elif value == WD_LINE_SPACING.DOUBLE: + pPr.spacing_line = Twips(480) + pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE + else: + pPr.spacing_lineRule = value + + @property + def page_break_before(self): + """|True| if the paragraph should appear at the top of the page following the + prior paragraph. + + |None| indicates its effective value is inherited from the style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.pageBreakBefore_val + + @page_break_before.setter + def page_break_before(self, value): + self._element.get_or_add_pPr().pageBreakBefore_val = value + + @property + def right_indent(self): + """|Length| value specifying the space between the right margin and the right + side of the paragraph. + + |None| indicates the right indent value is inherited from the style hierarchy. + Use a |Cm| value object as a convenient way to apply indentation in units of + centimeters. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.ind_right + + @right_indent.setter + def right_indent(self, value): + pPr = self._element.get_or_add_pPr() + pPr.ind_right = value + + @property + def space_after(self): + """|Length| value specifying the spacing to appear between this paragraph and + the subsequent paragraph. + + |None| indicates this value is inherited from the style hierarchy. |Length| + objects provide convenience properties, such as :attr:`~.Length.pt` and + :attr:`~.Length.inches`, that allow easy conversion to various length units. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.spacing_after + + @space_after.setter + def space_after(self, value): + self._element.get_or_add_pPr().spacing_after = value + + @property + def space_before(self): + """|Length| value specifying the spacing to appear between this paragraph and + the prior paragraph. + + |None| indicates this value is inherited from the style hierarchy. |Length| + objects provide convenience properties, such as :attr:`~.Length.pt` and + :attr:`~.Length.cm`, that allow easy conversion to various length units. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.spacing_before + + @space_before.setter + def space_before(self, value): + self._element.get_or_add_pPr().spacing_before = value + + @lazyproperty + def tab_stops(self): + """|TabStops| object providing access to the tab stops defined for this + paragraph format.""" + pPr = self._element.get_or_add_pPr() + return TabStops(pPr) + + @property + def widow_control(self): + """|True| if the first and last lines in the paragraph remain on the same page + as the rest of the paragraph when Word repaginates the document. + + |None| indicates its effective value is inherited from the style hierarchy. + """ + pPr = self._element.pPr + if pPr is None: + return None + return pPr.widowControl_val + + @widow_control.setter + def widow_control(self, value): + self._element.get_or_add_pPr().widowControl_val = value + + @staticmethod + def _line_spacing(spacing_line, spacing_lineRule): + """Return the line spacing value calculated from the combination of + `spacing_line` and `spacing_lineRule`. + + Returns a |float| number of lines when `spacing_lineRule` is + ``WD_LINE_SPACING.MULTIPLE``, otherwise a |Length| object of absolute line + height is returned. Returns |None| when `spacing_line` is |None|. + """ + if spacing_line is None: + return None + if spacing_lineRule == WD_LINE_SPACING.MULTIPLE: + return spacing_line / Pt(12) + return spacing_line + + @staticmethod + def _line_spacing_rule(line, lineRule): + """Return the line spacing rule value calculated from the combination of `line` + and `lineRule`. + + Returns special members of the :ref:`WdLineSpacing` enumeration when line + spacing is single, double, or 1.5 lines. + """ + if lineRule == WD_LINE_SPACING.MULTIPLE: + if line == Twips(240): + return WD_LINE_SPACING.SINGLE + if line == Twips(360): + return WD_LINE_SPACING.ONE_POINT_FIVE + if line == Twips(480): + return WD_LINE_SPACING.DOUBLE + return lineRule diff --git a/.venv/lib/python3.12/site-packages/docx/text/run.py b/.venv/lib/python3.12/site-packages/docx/text/run.py new file mode 100644 index 00000000..0e2f5bc1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/run.py @@ -0,0 +1,245 @@ +"""Run-related proxy objects for python-docx, Run in particular.""" + +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Iterator, cast + +from docx.drawing import Drawing +from docx.enum.style import WD_STYLE_TYPE +from docx.enum.text import WD_BREAK +from docx.oxml.drawing import CT_Drawing +from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak +from docx.shape import InlineShape +from docx.shared import StoryChild +from docx.styles.style import CharacterStyle +from docx.text.font import Font +from docx.text.pagebreak import RenderedPageBreak + +if TYPE_CHECKING: + import docx.types as t + from docx.enum.text import WD_UNDERLINE + from docx.oxml.text.run import CT_R, CT_Text + from docx.shared import Length + + +class Run(StoryChild): + """Proxy object wrapping `<w:r>` element. + + Several of the properties on Run take a tri-state value, |True|, |False|, or |None|. + |True| and |False| correspond to on and off respectively. |None| indicates the + property is not specified directly on the run and its effective value is taken from + the style hierarchy. + """ + + def __init__(self, r: CT_R, parent: t.ProvidesStoryPart): + super().__init__(parent) + self._r = self._element = self.element = r + + def add_break(self, break_type: WD_BREAK = WD_BREAK.LINE): + """Add a break element of `break_type` to this run. + + `break_type` can take the values `WD_BREAK.LINE`, `WD_BREAK.PAGE`, and + `WD_BREAK.COLUMN` where `WD_BREAK` is imported from `docx.enum.text`. + `break_type` defaults to `WD_BREAK.LINE`. + """ + type_, clear = { + WD_BREAK.LINE: (None, None), + WD_BREAK.PAGE: ("page", None), + WD_BREAK.COLUMN: ("column", None), + WD_BREAK.LINE_CLEAR_LEFT: ("textWrapping", "left"), + WD_BREAK.LINE_CLEAR_RIGHT: ("textWrapping", "right"), + WD_BREAK.LINE_CLEAR_ALL: ("textWrapping", "all"), + }[break_type] + br = self._r.add_br() + if type_ is not None: + br.type = type_ + if clear is not None: + br.clear = clear + + def add_picture( + self, + image_path_or_stream: str | IO[bytes], + width: int | Length | None = None, + height: int | Length | None = None, + ) -> InlineShape: + """Return |InlineShape| containing image identified by `image_path_or_stream`. + + The picture is added to the end of this run. + + `image_path_or_stream` can be a path (a string) or a file-like object containing + a binary image. + + If neither width nor height is specified, the picture appears at + its native size. If only one is specified, it is used to compute a scaling + factor that is then applied to the unspecified dimension, preserving the aspect + ratio of the image. The native size of the picture is calculated using the dots- + per-inch (dpi) value specified in the image file, defaulting to 72 dpi if no + value is specified, as is often the case. + """ + inline = self.part.new_pic_inline(image_path_or_stream, width, height) + self._r.add_drawing(inline) + return InlineShape(inline) + + def add_tab(self) -> None: + """Add a ``<w:tab/>`` element at the end of the run, which Word interprets as a + tab character.""" + self._r.add_tab() + + def add_text(self, text: str): + """Returns a newly appended |_Text| object (corresponding to a new ``<w:t>`` + child element) to the run, containing `text`. + + Compare with the possibly more friendly approach of assigning text to the + :attr:`Run.text` property. + """ + t = self._r.add_t(text) + return _Text(t) + + @property + def bold(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text of the run to appear in bold face. When |False|, + the text unconditionally appears non-bold. When |None| the bold setting for this + run is inherited from the style hierarchy. + """ + return self.font.bold + + @bold.setter + def bold(self, value: bool | None): + self.font.bold = value + + def clear(self): + """Return reference to this run after removing all its content. + + All run formatting is preserved. + """ + self._r.clear_content() + return self + + @property + def contains_page_break(self) -> bool: + """`True` when one or more rendered page-breaks occur in this run. + + Note that "hard" page-breaks inserted by the author are not included. A hard + page-break gives rise to a rendered page-break in the right position so if those + were included that page-break would be "double-counted". + + It would be very rare for multiple rendered page-breaks to occur in a single + run, but it is possible. + """ + return bool(self._r.lastRenderedPageBreaks) + + @property + def font(self) -> Font: + """The |Font| object providing access to the character formatting properties for + this run, such as font name and size.""" + return Font(self._element) + + @property + def italic(self) -> bool | None: + """Read/write tri-state value. + + When |True|, causes the text of the run to appear in italics. When |False|, the + text unconditionally appears non-italic. When |None| the italic setting for this + run is inherited from the style hierarchy. + """ + return self.font.italic + + @italic.setter + def italic(self, value: bool | None): + self.font.italic = value + + def iter_inner_content(self) -> Iterator[str | Drawing | RenderedPageBreak]: + """Generate the content-items in this run in the order they appear. + + NOTE: only content-types currently supported by `python-docx` are generated. In + this version, that is text and rendered page-breaks. Drawing is included but + currently only provides access to its XML element (CT_Drawing) on its + `._drawing` attribute. `Drawing` attributes and methods may be expanded in + future releases. + + There are a number of element-types that can appear inside a run, but most of + those (w:br, w:cr, w:noBreakHyphen, w:t, w:tab) have a clear plain-text + equivalent. Any contiguous range of such elements is generated as a single + `str`. Rendered page-break and drawing elements are generated individually. Any + other elements are ignored. + """ + for item in self._r.inner_content_items: + if isinstance(item, str): + yield item + elif isinstance(item, CT_LastRenderedPageBreak): + yield RenderedPageBreak(item, self) + elif isinstance(item, CT_Drawing): # pyright: ignore[reportUnnecessaryIsInstance] + yield Drawing(item, self) + + @property + def style(self) -> CharacterStyle: + """Read/write. + + A |CharacterStyle| object representing the character style applied to this run. + The default character style for the document (often `Default Character Font`) is + returned if the run has no directly-applied character style. Setting this + property to |None| removes any directly-applied character style. + """ + style_id = self._r.style + return cast(CharacterStyle, self.part.get_style(style_id, WD_STYLE_TYPE.CHARACTER)) + + @style.setter + def style(self, style_or_name: str | CharacterStyle | None): + style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.CHARACTER) + self._r.style = style_id + + @property + def text(self) -> str: + """String formed by concatenating the text equivalent of each run. + + Each `<w:t>` element adds the text characters it contains. A `<w:tab/>` element + adds a `\\t` character. A `<w:cr/>` or `<w:br>` element each add a `\\n` + character. Note that a `<w:br>` element can indicate a page break or column + break as well as a line break. Only line-break `<w:br>` elements translate to + a `\\n` character. Others are ignored. All other content child elements, such as + `<w:drawing>`, are ignored. + + Assigning text to this property has the reverse effect, translating each `\\t` + character to a `<w:tab/>` element and each `\\n` or `\\r` character to a + `<w:cr/>` element. Any existing run content is replaced. Run formatting is + preserved. + """ + return self._r.text + + @text.setter + def text(self, text: str): + self._r.text = text + + @property + def underline(self) -> bool | WD_UNDERLINE | None: + """The underline style for this |Run|. + + Value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`. + + A value of |None| indicates the run has no directly-applied underline value and + so will inherit the underline value of its containing paragraph. Assigning + |None| to this property removes any directly-applied underline value. + + A value of |False| indicates a directly-applied setting of no underline, + overriding any inherited value. + + A value of |True| indicates single underline. + + The values from :ref:`WdUnderline` are used to specify other outline styles such + as double, wavy, and dotted. + """ + return self.font.underline + + @underline.setter + def underline(self, value: bool): + self.font.underline = value + + +class _Text: + """Proxy object wrapping `<w:t>` element.""" + + def __init__(self, t_elm: CT_Text): + super(_Text, self).__init__() + self._t = t_elm diff --git a/.venv/lib/python3.12/site-packages/docx/text/tabstops.py b/.venv/lib/python3.12/site-packages/docx/text/tabstops.py new file mode 100644 index 00000000..824085d2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/tabstops.py @@ -0,0 +1,125 @@ +"""Tabstop-related proxy types.""" + +from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER +from docx.shared import ElementProxy + + +class TabStops(ElementProxy): + """A sequence of |TabStop| objects providing access to the tab stops of a paragraph + or paragraph style. + + Supports iteration, indexed access, del, and len(). It is accesed using the + :attr:`~.ParagraphFormat.tab_stops` property of ParagraphFormat; it is not intended + to be constructed directly. + """ + + def __init__(self, element): + super(TabStops, self).__init__(element, None) + self._pPr = element + + def __delitem__(self, idx): + """Remove the tab at offset `idx` in this sequence.""" + tabs = self._pPr.tabs + try: + tabs.remove(tabs[idx]) + except (AttributeError, IndexError): + raise IndexError("tab index out of range") + + if len(tabs) == 0: + self._pPr.remove(tabs) + + def __getitem__(self, idx): + """Enables list-style access by index.""" + tabs = self._pPr.tabs + if tabs is None: + raise IndexError("TabStops object is empty") + tab = tabs.tab_lst[idx] + return TabStop(tab) + + def __iter__(self): + """Generate a TabStop object for each of the w:tab elements, in XML document + order.""" + tabs = self._pPr.tabs + if tabs is not None: + for tab in tabs.tab_lst: + yield TabStop(tab) + + def __len__(self): + tabs = self._pPr.tabs + if tabs is None: + return 0 + return len(tabs.tab_lst) + + def add_tab_stop( + self, position, alignment=WD_TAB_ALIGNMENT.LEFT, leader=WD_TAB_LEADER.SPACES + ): + """Add a new tab stop at `position`, a |Length| object specifying the location + of the tab stop relative to the paragraph edge. + + A negative `position` value is valid and appears in hanging indentation. Tab + alignment defaults to left, but may be specified by passing a member of the + :ref:`WdTabAlignment` enumeration as `alignment`. An optional leader character + can be specified by passing a member of the :ref:`WdTabLeader` enumeration as + `leader`. + """ + tabs = self._pPr.get_or_add_tabs() + tab = tabs.insert_tab_in_order(position, alignment, leader) + return TabStop(tab) + + def clear_all(self): + """Remove all custom tab stops.""" + self._pPr._remove_tabs() + + +class TabStop(ElementProxy): + """An individual tab stop applying to a paragraph or style. + + Accessed using list semantics on its containing |TabStops| object. + """ + + def __init__(self, element): + super(TabStop, self).__init__(element, None) + self._tab = element + + @property + def alignment(self): + """A member of :ref:`WdTabAlignment` specifying the alignment setting for this + tab stop. + + Read/write. + """ + return self._tab.val + + @alignment.setter + def alignment(self, value): + self._tab.val = value + + @property + def leader(self): + """A member of :ref:`WdTabLeader` specifying a repeating character used as a + "leader", filling in the space spanned by this tab. + + Assigning |None| produces the same result as assigning `WD_TAB_LEADER.SPACES`. + Read/write. + """ + return self._tab.leader + + @leader.setter + def leader(self, value): + self._tab.leader = value + + @property + def position(self): + """A |Length| object representing the distance of this tab stop from the inside + edge of the paragraph. + + May be positive or negative. Read/write. + """ + return self._tab.pos + + @position.setter + def position(self, value): + tab = self._tab + tabs = tab.getparent() + self._tab = tabs.insert_tab_in_order(value, tab.val, tab.leader) + tabs.remove(tab) |