aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/cleanup.pxi
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/cleanup.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/cleanup.pxi215
1 files changed, 215 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi b/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi
new file mode 100644
index 00000000..8e266b33
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi
@@ -0,0 +1,215 @@
+# functions for tree cleanup and removing elements from subtrees
+
+def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
+ """cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
+
+ Remove all namespace declarations from a subtree that are not used
+ by any of the elements or attributes in that tree.
+
+ If a 'top_nsmap' is provided, it must be a mapping from prefixes
+ to namespace URIs. These namespaces will be declared on the top
+ element of the subtree before running the cleanup, which allows
+ moving namespace declarations to the top of the tree.
+
+ If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
+ These prefixes will not be removed as part of the cleanup.
+ """
+ element = _rootNodeOrRaise(tree_or_element)
+ c_element = element._c_node
+
+ if top_nsmap:
+ doc = element._doc
+ # declare namespaces from nsmap, then apply them to the subtree
+ _setNodeNamespaces(c_element, doc, None, top_nsmap)
+ moveNodeToDocument(doc, c_element.doc, c_element)
+
+ keep_ns_prefixes = (
+ set([_utf8(prefix) for prefix in keep_ns_prefixes])
+ if keep_ns_prefixes else None)
+
+ _removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
+
+
+def strip_attributes(tree_or_element, *attribute_names):
+ """strip_attributes(tree_or_element, *attribute_names)
+
+ Delete all attributes with the provided attribute names from an
+ Element (or ElementTree) and its descendants.
+
+ Attribute names can contain wildcards as in `_Element.iter`.
+
+ Example usage::
+
+ strip_attributes(root_element,
+ 'simpleattr',
+ '{http://some/ns}attrname',
+ '{http://other/ns}*')
+ """
+ cdef _MultiTagMatcher matcher
+ element = _rootNodeOrRaise(tree_or_element)
+ if not attribute_names:
+ return
+
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
+ matcher.cacheTags(element._doc)
+ if matcher.rejectsAllAttributes():
+ return
+ _strip_attributes(element._c_node, matcher)
+
+
+cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
+ cdef xmlAttr* c_attr
+ cdef xmlAttr* c_next_attr
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ c_attr = c_node.properties
+ while c_attr is not NULL:
+ c_next_attr = c_attr.next
+ if matcher.matchesAttribute(c_attr):
+ tree.xmlRemoveProp(c_attr)
+ c_attr = c_next_attr
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
+def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
+ """strip_elements(tree_or_element, *tag_names, with_tail=True)
+
+ Delete all elements with the provided tag names from a tree or
+ subtree. This will remove the elements and their entire subtree,
+ including all their attributes, text content and descendants. It
+ will also remove the tail text of the element unless you
+ explicitly set the ``with_tail`` keyword argument option to False.
+
+ Tag names can contain wildcards as in `_Element.iter`.
+
+ Note that this will not delete the element (or ElementTree root
+ element) that you passed even if it matches. It will only treat
+ its descendants. If you want to include the root element, check
+ its tag name directly before even calling this function.
+
+ Example usage::
+
+ strip_elements(some_element,
+ 'simpletagname', # non-namespaced tag
+ '{http://some/ns}tagname', # namespaced tag
+ '{http://some/other/ns}*' # any tag from a namespace
+ lxml.etree.Comment # comments
+ )
+ """
+ cdef _MultiTagMatcher matcher
+ doc = _documentOrRaise(tree_or_element)
+ element = _rootNodeOrRaise(tree_or_element)
+ if not tag_names:
+ return
+
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
+ matcher.cacheTags(doc)
+ if matcher.rejectsAll():
+ return
+
+ if isinstance(tree_or_element, _ElementTree):
+ # include PIs and comments next to the root node
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
+ if matcher.matchesType(tree.XML_PI_NODE):
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
+ _strip_elements(doc, element._c_node, matcher, with_tail)
+
+cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
+ bint with_tail):
+ cdef xmlNode* c_child
+ cdef xmlNode* c_next
+
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ # we run through the children here to prevent any problems
+ # with the tree iteration which would occur if we unlinked the
+ # c_node itself
+ c_child = _findChildForwards(c_node, 0)
+ while c_child is not NULL:
+ c_next = _nextElement(c_child)
+ if matcher.matches(c_child):
+ if c_child.type == tree.XML_ELEMENT_NODE:
+ if not with_tail:
+ tree.xmlUnlinkNode(c_child)
+ _removeNode(doc, c_child)
+ else:
+ if with_tail:
+ _removeText(c_child.next)
+ tree.xmlUnlinkNode(c_child)
+ attemptDeallocation(c_child)
+ c_child = c_next
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
+
+
+def strip_tags(tree_or_element, *tag_names):
+ """strip_tags(tree_or_element, *tag_names)
+
+ Delete all elements with the provided tag names from a tree or
+ subtree. This will remove the elements and their attributes, but
+ *not* their text/tail content or descendants. Instead, it will
+ merge the text content and children of the element into its
+ parent.
+
+ Tag names can contain wildcards as in `_Element.iter`.
+
+ Note that this will not delete the element (or ElementTree root
+ element) that you passed even if it matches. It will only treat
+ its descendants.
+
+ Example usage::
+
+ strip_tags(some_element,
+ 'simpletagname', # non-namespaced tag
+ '{http://some/ns}tagname', # namespaced tag
+ '{http://some/other/ns}*' # any tag from a namespace
+ Comment # comments (including their text!)
+ )
+ """
+ cdef _MultiTagMatcher matcher
+ doc = _documentOrRaise(tree_or_element)
+ element = _rootNodeOrRaise(tree_or_element)
+ if not tag_names:
+ return
+
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
+ matcher.cacheTags(doc)
+ if matcher.rejectsAll():
+ return
+
+ if isinstance(tree_or_element, _ElementTree):
+ # include PIs and comments next to the root node
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
+ if matcher.matchesType(tree.XML_PI_NODE):
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
+ _strip_tags(doc, element._c_node, matcher)
+
+cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
+ cdef xmlNode* c_child
+ cdef xmlNode* c_next
+
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ # we run through the children here to prevent any problems
+ # with the tree iteration which would occur if we unlinked the
+ # c_node itself
+ c_child = _findChildForwards(c_node, 0)
+ while c_child is not NULL:
+ if not matcher.matches(c_child):
+ c_child = _nextElement(c_child)
+ continue
+ if c_child.type == tree.XML_ELEMENT_NODE:
+ c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
+ _replaceNodeByChildren(doc, c_child)
+ if not attemptDeallocation(c_child):
+ if c_child.nsDef is not NULL:
+ # make namespaces absolute
+ moveNodeToDocument(doc, doc._c_doc, c_child)
+ c_child = c_next
+ else:
+ c_next = _nextElement(c_child)
+ tree.xmlUnlinkNode(c_child)
+ attemptDeallocation(c_child)
+ c_child = c_next
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)