aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/parser.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/docx/oxml/parser.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/docx/oxml/parser.py')
-rw-r--r--.venv/lib/python3.12/site-packages/docx/oxml/parser.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/docx/oxml/parser.py b/.venv/lib/python3.12/site-packages/docx/oxml/parser.py
new file mode 100644
index 00000000..e16ba30b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/docx/oxml/parser.py
@@ -0,0 +1,62 @@
+# pyright: reportImportCycles=false
+
+"""XML parser for python-docx."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Dict, Type, cast
+
+from lxml import etree
+
+from docx.oxml.ns import NamespacePrefixedTag, nsmap
+
+if TYPE_CHECKING:
+ from docx.oxml.xmlchemy import BaseOxmlElement
+
+
+# -- configure XML parser --
+element_class_lookup = etree.ElementNamespaceClassLookup()
+oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
+oxml_parser.set_element_class_lookup(element_class_lookup)
+
+
+def parse_xml(xml: str | bytes) -> "BaseOxmlElement":
+ """Root lxml element obtained by parsing XML character string `xml`.
+
+ The custom parser is used, so custom element classes are produced for elements in
+ `xml` that have them.
+ """
+ return cast("BaseOxmlElement", etree.fromstring(xml, oxml_parser))
+
+
+def register_element_cls(tag: str, cls: Type["BaseOxmlElement"]):
+ """Register an lxml custom element-class to use for `tag`.
+
+ A instance of `cls` to be constructed when the oxml parser encounters an element
+ with matching `tag`. `tag` is a string of the form `nspfx:tagroot`, e.g.
+ `'w:document'`.
+ """
+ nspfx, tagroot = tag.split(":")
+ namespace = element_class_lookup.get_namespace(nsmap[nspfx])
+ namespace[tagroot] = cls
+
+
+def OxmlElement(
+ nsptag_str: str,
+ attrs: Dict[str, str] | None = None,
+ nsdecls: Dict[str, str] | None = None,
+) -> BaseOxmlElement | etree._Element: # pyright: ignore[reportPrivateUsage]
+ """Return a 'loose' lxml element having the tag specified by `nsptag_str`.
+
+ The tag in `nsptag_str` must contain the standard namespace prefix, e.g. `a:tbl`.
+ The resulting element is an instance of the custom element class for this tag name
+ if one is defined. A dictionary of attribute values may be provided as `attrs`; they
+ are set if present. All namespaces defined in the dict `nsdecls` are declared in the
+ element using the key as the prefix and the value as the namespace name. If
+ `nsdecls` is not provided, a single namespace declaration is added based on the
+ prefix on `nsptag_str`.
+ """
+ nsptag = NamespacePrefixedTag(nsptag_str)
+ if nsdecls is None:
+ nsdecls = nsptag.nsmap
+ return oxml_parser.makeelement(nsptag.clark_name, attrib=attrs, nsmap=nsdecls)