diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/text/hyperlink.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/text/hyperlink.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/docx/text/hyperlink.py | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py new file mode 100644 index 00000000..a23df1c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/docx/text/hyperlink.py @@ -0,0 +1,121 @@ +"""Hyperlink-related proxy objects for python-docx, Hyperlink in particular. + +A hyperlink occurs in a paragraph, at the same level as a Run, and a hyperlink itself +contains runs, which is where the visible text of the hyperlink is stored. So it's kind +of in-between, less than a paragraph and more than a run. So it gets its own module. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from docx.shared import Parented +from docx.text.run import Run + +if TYPE_CHECKING: + import docx.types as t + from docx.oxml.text.hyperlink import CT_Hyperlink + + +class Hyperlink(Parented): + """Proxy object wrapping a `<w:hyperlink>` element. + + A hyperlink occurs as a child of a paragraph, at the same level as a Run. A + hyperlink itself contains runs, which is where the visible text of the hyperlink is + stored. + """ + + def __init__(self, hyperlink: CT_Hyperlink, parent: t.ProvidesStoryPart): + super().__init__(parent) + self._parent = parent + self._hyperlink = self._element = hyperlink + + @property + def address(self) -> str: + """The "URL" of the hyperlink (but not necessarily a web link). + + While commonly a web link like "https://google.com" the hyperlink address can + take a variety of forms including "internal links" to bookmarked locations + within the document. When this hyperlink is an internal "jump" to for example a + heading from the table-of-contents (TOC), the address is blank. The bookmark + reference (like "_Toc147925734") is stored in the `.fragment` property. + """ + rId = self._hyperlink.rId + return self._parent.part.rels[rId].target_ref if rId else "" + + @property + def contains_page_break(self) -> bool: + """True when the text of this hyperlink is broken across page boundaries. + + This is not uncommon and can happen for example when the hyperlink text is + multiple words and occurs in the last line of a page. Theoretically, a hyperlink + can contain more than one page break but that would be extremely uncommon in + practice. Still, this value should be understood to mean that "one-or-more" + rendered page breaks are present. + """ + return bool(self._hyperlink.lastRenderedPageBreaks) + + @property + def fragment(self) -> str: + """Reference like `#glossary` at end of URL that refers to a sub-resource. + + Note that this value does not include the fragment-separator character ("#"). + + This value is known as a "named anchor" in an HTML context and "anchor" in the + MS API, but an "anchor" element (`<a>`) represents a full hyperlink in HTML so + we avoid confusion by using the more precise RFC 3986 naming "URI fragment". + + These are also used to refer to bookmarks within the same document, in which + case the `.address` value with be blank ("") and this property will hold a + value like "_Toc147925734". + + To reliably get an entire web URL you will need to concatenate this with the + `.address` value, separated by "#" when both are present. Consider using the + `.url` property for that purpose. + + Word sometimes stores a fragment in this property (an XML attribute) and + sometimes with the address, depending on how the URL is inserted, so don't + depend on this field being empty to indicate no fragment is present. + """ + return self._hyperlink.anchor or "" + + @property + def runs(self) -> list[Run]: + """List of |Run| instances in this hyperlink. + + Together these define the visible text of the hyperlink. The text of a hyperlink + is typically contained in a single run will be broken into multiple runs if for + example part of the hyperlink is bold or the text was changed after the document + was saved. + """ + return [Run(r, self._parent) for r in self._hyperlink.r_lst] + + @property + def text(self) -> str: + """String formed by concatenating the text of each run in the hyperlink. + + Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters + respectively. Note that rendered page-breaks can occur within a hyperlink but + they are not reflected in this text. + """ + return self._hyperlink.text + + @property + def url(self) -> str: + """Convenience property to get web URLs from hyperlinks that contain them. + + This value is the empty string ("") when there is no address portion, so its + boolean value can also be used to distinguish external URIs from internal "jump" + hyperlinks like those found in a table-of-contents. + + Note that this value may also be a link to a file, so if you only want web-urls + you'll need to check for a protocol prefix like `https://`. + + When both an address and fragment are present, the return value joins the two + separated by the fragment-separator hash ("#"). Otherwise this value is the same + as that of the `.address` property. + """ + address, fragment = self.address, self.fragment + if not address: + return "" + return f"{address}#{fragment}" if fragment else address |