aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py284
1 files changed, 284 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
new file mode 100644
index 00000000..943f9b6c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/text/pagebreak.py
@@ -0,0 +1,284 @@
+"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
+
+from __future__ import annotations
+
+import copy
+from typing import TYPE_CHECKING
+
+from docx.oxml.xmlchemy import BaseOxmlElement
+from docx.shared import lazyproperty
+
+if TYPE_CHECKING:
+ from docx.oxml.text.hyperlink import CT_Hyperlink
+ from docx.oxml.text.paragraph import CT_P
+
+
+class CT_LastRenderedPageBreak(BaseOxmlElement):
+ """`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
+
+ A rendered page-break is one inserted by the renderer when it runs out of room on a
+ page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
+ CT_Text.
+
+ NOTE: this complex-type name does not exist in the schema, where
+ `w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
+ distinguished behavior. CT_Empty is used for many elements.
+ """
+
+ @property
+ def following_fragment_p(self) -> CT_P:
+ """A "loose" `CT_P` containing only the paragraph content before this break.
+
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+ page-break in its paragraph.
+
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+ page-break with this `w:lastRenderedPageBreak` element and all content preceding
+ it removed.
+
+ NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
+ (when the paragraph contained more than one). While this is rare, the caller
+ should treat this paragraph the same as other paragraphs and split it if
+ necessary in a folloing step or recursion.
+ """
+ if not self == self._first_lrpb_in_p(self._enclosing_p):
+ raise ValueError("only defined on first rendered page-break in paragraph")
+
+ # -- splitting approach is different when break is inside a hyperlink --
+ return (
+ self._following_frag_in_hlink
+ if self._is_in_hyperlink
+ else self._following_frag_in_run
+ )
+
+ @property
+ def follows_all_content(self) -> bool:
+ """True when this page-break element is the last "content" in the paragraph.
+
+ This is very uncommon case and may only occur in contrived or cases where the
+ XML is edited by hand, but it is not precluded by the spec.
+ """
+ # -- a page-break inside a hyperlink never meets these criteria (for our
+ # -- purposes at least) because it is considered "atomic" and always associated
+ # -- with the page it starts on.
+ if self._is_in_hyperlink:
+ return False
+
+ return bool(
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+ self._enclosing_p.xpath(
+ # -- in first run of paragraph --
+ f"(./w:r)[last()]"
+ # -- all page-breaks --
+ f"/w:lastRenderedPageBreak"
+ # -- that are not preceded by any content-bearing elements --
+ f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
+ )
+ )
+
+ @property
+ def precedes_all_content(self) -> bool:
+ """True when a `w:lastRenderedPageBreak` precedes all paragraph content.
+
+ This is a common case; it occurs whenever the page breaks on an even paragraph
+ boundary.
+ """
+ # -- a page-break inside a hyperlink never meets these criteria because there
+ # -- is always part of the hyperlink text before the page-break.
+ if self._is_in_hyperlink:
+ return False
+
+ return bool(
+ # -- XPath will match zero-or-one w:lastRenderedPageBreak element --
+ self._enclosing_p.xpath(
+ # -- in first run of paragraph --
+ f"./w:r[1]"
+ # -- all page-breaks --
+ f"/w:lastRenderedPageBreak"
+ # -- that are not preceded by any content-bearing elements --
+ f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
+ )
+ )
+
+ @property
+ def preceding_fragment_p(self) -> CT_P:
+ """A "loose" `CT_P` containing only the paragraph content before this break.
+
+ Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
+ paragraph in its paragraph.
+
+ The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
+ page-break with this `w:lastRenderedPageBreak` element and all its following
+ siblings removed.
+ """
+ if not self == self._first_lrpb_in_p(self._enclosing_p):
+ raise ValueError("only defined on first rendered page-break in paragraph")
+
+ # -- splitting approach is different when break is inside a hyperlink --
+ return (
+ self._preceding_frag_in_hlink
+ if self._is_in_hyperlink
+ else self._preceding_frag_in_run
+ )
+
+ def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
+ """The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
+
+ Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
+ when `._is_in_hyperlink` is True.
+ """
+ return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
+
+ @property
+ def _enclosing_p(self) -> CT_P:
+ """The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
+ return self.xpath("./ancestor::w:p[1]")[0]
+
+ def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
+ """The first `w:lastRenderedPageBreak` element in `p`.
+
+ Raises `ValueError` if there are no rendered page-breaks in `p`.
+ """
+ lrpbs = p.xpath(
+ "./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
+ )
+ if not lrpbs:
+ raise ValueError("no rendered page-breaks in paragraph element")
+ return lrpbs[0]
+
+ @lazyproperty
+ def _following_frag_in_hlink(self) -> CT_P:
+ """Following CT_P fragment when break occurs within a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
+ hyperlink.
+ """
+ if not self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+ hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+ # -- delete all w:p inner-content preceding the hyperlink --
+ for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+ p.remove(e)
+
+ # -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
+ hyperlink.getparent().remove(hyperlink)
+
+ # -- that's it, return the remaining fragment of `w:p` clone --
+ return p
+
+ @lazyproperty
+ def _following_frag_in_run(self) -> CT_P:
+ """following CT_P fragment when break does not occur in a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+ """
+ if self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+ enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+ # -- delete all w:p inner-content preceding that run (but not w:pPr) --
+ for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
+ p.remove(e)
+
+ # -- then remove all run inner-content preceding this lrpb in its run (but not
+ # -- the `w:rPr`) and also remove the page-break itself
+ for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
+ enclosing_r.remove(e)
+ enclosing_r.remove(lrpb)
+
+ return p
+
+ @lazyproperty
+ def _is_in_hyperlink(self) -> bool:
+ """True when this page-break is embedded in a hyperlink run."""
+ return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
+
+ @lazyproperty
+ def _preceding_frag_in_hlink(self) -> CT_P:
+ """Preceding CT_P fragment when break occurs within a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is not inside a
+ hyperlink.
+ """
+ if not self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
+ hyperlink = lrpb._enclosing_hyperlink(lrpb)
+
+ # -- delete all w:p inner-content following the hyperlink --
+ for e in hyperlink.xpath("./following-sibling::*"):
+ p.remove(e)
+
+ # -- remove this page-break from inside the hyperlink --
+ lrpb.getparent().remove(lrpb)
+
+ # -- that's it, the entire hyperlink goes into the preceding fragment so
+ # -- the hyperlink is not "split".
+ return p
+
+ @lazyproperty
+ def _preceding_frag_in_run(self) -> CT_P:
+ """Preceding CT_P fragment when break does not occur in a hyperlink.
+
+ Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
+ """
+ if self._is_in_hyperlink:
+ raise ValueError("only defined on a rendered page-break not in a hyperlink")
+
+ # -- work on a clone `w:p` so our mutations don't persist --
+ p = copy.deepcopy(self._enclosing_p)
+
+ # -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
+ lrpb = self._first_lrpb_in_p(p)
+
+ # -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
+ enclosing_r = lrpb.xpath("./parent::w:r")[0]
+
+ # -- delete all `w:p` inner-content following that run --
+ for e in enclosing_r.xpath("./following-sibling::*"):
+ p.remove(e)
+
+ # -- then delete all `w:r` inner-content following this lrpb in its run and
+ # -- also remove the page-break itself
+ for e in lrpb.xpath("./following-sibling::*"):
+ enclosing_r.remove(e)
+ enclosing_r.remove(lrpb)
+
+ return p
+
+ @lazyproperty
+ def _run_inner_content_xpath(self) -> str:
+ """XPath fragment matching any run inner-content elements."""
+ return (
+ "self::w:br"
+ " | self::w:cr"
+ " | self::w:drawing"
+ " | self::w:noBreakHyphen"
+ " | self::w:ptab"
+ " | self::w:t"
+ " | self::w:tab"
+ )