about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/text/hyperlink.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/text/hyperlink.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/text/hyperlink.py121
1 files changed, 121 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py
new file mode 100644
index 00000000..a23df1c7
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py
@@ -0,0 +1,121 @@
+"""Hyperlink-related proxy objects for python-docx, Hyperlink in particular.
+
+A hyperlink occurs in a paragraph, at the same level as a Run, and a hyperlink itself
+contains runs, which is where the visible text of the hyperlink is stored. So it's kind
+of in-between, less than a paragraph and more than a run. So it gets its own module.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from docx.shared import Parented
+from docx.text.run import Run
+
+if TYPE_CHECKING:
+    import docx.types as t
+    from docx.oxml.text.hyperlink import CT_Hyperlink
+
+
+class Hyperlink(Parented):
+    """Proxy object wrapping a `<w:hyperlink>` element.
+
+    A hyperlink occurs as a child of a paragraph, at the same level as a Run. A
+    hyperlink itself contains runs, which is where the visible text of the hyperlink is
+    stored.
+    """
+
+    def __init__(self, hyperlink: CT_Hyperlink, parent: t.ProvidesStoryPart):
+        super().__init__(parent)
+        self._parent = parent
+        self._hyperlink = self._element = hyperlink
+
+    @property
+    def address(self) -> str:
+        """The "URL" of the hyperlink (but not necessarily a web link).
+
+        While commonly a web link like "https://google.com" the hyperlink address can
+        take a variety of forms including "internal links" to bookmarked locations
+        within the document. When this hyperlink is an internal "jump" to for example a
+        heading from the table-of-contents (TOC), the address is blank. The bookmark
+        reference (like "_Toc147925734") is stored in the `.fragment` property.
+        """
+        rId = self._hyperlink.rId
+        return self._parent.part.rels[rId].target_ref if rId else ""
+
+    @property
+    def contains_page_break(self) -> bool:
+        """True when the text of this hyperlink is broken across page boundaries.
+
+        This is not uncommon and can happen for example when the hyperlink text is
+        multiple words and occurs in the last line of a page. Theoretically, a hyperlink
+        can contain more than one page break but that would be extremely uncommon in
+        practice. Still, this value should be understood to mean that "one-or-more"
+        rendered page breaks are present.
+        """
+        return bool(self._hyperlink.lastRenderedPageBreaks)
+
+    @property
+    def fragment(self) -> str:
+        """Reference like `#glossary` at end of URL that refers to a sub-resource.
+
+        Note that this value does not include the fragment-separator character ("#").
+
+        This value is known as a "named anchor" in an HTML context and "anchor" in the
+        MS API, but an "anchor" element (`<a>`) represents a full hyperlink in HTML so
+        we avoid confusion by using the more precise RFC 3986 naming "URI fragment".
+
+        These are also used to refer to bookmarks within the same document, in which
+        case the `.address` value with be blank ("") and this property will hold a
+        value like "_Toc147925734".
+
+        To reliably get an entire web URL you will need to concatenate this with the
+        `.address` value, separated by "#" when both are present. Consider using the
+        `.url` property for that purpose.
+
+        Word sometimes stores a fragment in this property (an XML attribute) and
+        sometimes with the address, depending on how the URL is inserted, so don't
+        depend on this field being empty to indicate no fragment is present.
+        """
+        return self._hyperlink.anchor or ""
+
+    @property
+    def runs(self) -> list[Run]:
+        """List of |Run| instances in this hyperlink.
+
+        Together these define the visible text of the hyperlink. The text of a hyperlink
+        is typically contained in a single run will be broken into multiple runs if for
+        example part of the hyperlink is bold or the text was changed after the document
+        was saved.
+        """
+        return [Run(r, self._parent) for r in self._hyperlink.r_lst]
+
+    @property
+    def text(self) -> str:
+        """String formed by concatenating the text of each run in the hyperlink.
+
+        Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
+        respectively. Note that rendered page-breaks can occur within a hyperlink but
+        they are not reflected in this text.
+        """
+        return self._hyperlink.text
+
+    @property
+    def url(self) -> str:
+        """Convenience property to get web URLs from hyperlinks that contain them.
+
+        This value is the empty string ("") when there is no address portion, so its
+        boolean value can also be used to distinguish external URIs from internal "jump"
+        hyperlinks like those found in a table-of-contents.
+
+        Note that this value may also be a link to a file, so if you only want web-urls
+        you'll need to check for a protocol prefix like `https://`.
+
+        When both an address and fragment are present, the return value joins the two
+        separated by the fragment-separator hash ("#"). Otherwise this value is the same
+        as that of the `.address` property.
+        """
+        address, fragment = self.address, self.fragment
+        if not address:
+            return ""
+        return f"{address}#{fragment}" if fragment else address